4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587 """Checks if the instances in a node group are still correct.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type group_uuid: string
592 @param group_uuid: Node group UUID
593 @type owned_instances: set or frozenset
594 @param owned_instances: List of currently owned instances
597 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598 if owned_instances != wanted_instances:
599 raise errors.OpPrereqError("Instances in node group '%s' changed since"
600 " locks were acquired, wanted '%s', have '%s';"
601 " retry the operation" %
603 utils.CommaJoin(wanted_instances),
604 utils.CommaJoin(owned_instances)),
607 return wanted_instances
610 def _SupportsOob(cfg, node):
611 """Tells if node supports OOB.
613 @type cfg: L{config.ConfigWriter}
614 @param cfg: The cluster configuration
615 @type node: L{objects.Node}
616 @param node: The node
617 @return: The OOB script if supported or an empty string otherwise
620 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
623 def _GetWantedNodes(lu, nodes):
624 """Returns list of checked and expanded node names.
626 @type lu: L{LogicalUnit}
627 @param lu: the logical unit on whose behalf we execute
629 @param nodes: list of node names or None for all nodes
631 @return: the list of nodes, sorted
632 @raise errors.ProgrammerError: if the nodes parameter is wrong type
636 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
638 return utils.NiceSort(lu.cfg.GetNodeList())
641 def _GetWantedInstances(lu, instances):
642 """Returns list of checked and expanded instance names.
644 @type lu: L{LogicalUnit}
645 @param lu: the logical unit on whose behalf we execute
646 @type instances: list
647 @param instances: list of instance names or None for all instances
649 @return: the list of instances, sorted
650 @raise errors.OpPrereqError: if the instances parameter is wrong type
651 @raise errors.OpPrereqError: if any of the passed instances is not found
655 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
657 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
661 def _GetUpdatedParams(old_params, update_dict,
662 use_default=True, use_none=False):
663 """Return the new version of a parameter dictionary.
665 @type old_params: dict
666 @param old_params: old parameters
667 @type update_dict: dict
668 @param update_dict: dict containing new parameter values, or
669 constants.VALUE_DEFAULT to reset the parameter to its default
671 @param use_default: boolean
672 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673 values as 'to be deleted' values
674 @param use_none: boolean
675 @type use_none: whether to recognise C{None} values as 'to be
678 @return: the new parameter dictionary
681 params_copy = copy.deepcopy(old_params)
682 for key, val in update_dict.iteritems():
683 if ((use_default and val == constants.VALUE_DEFAULT) or
684 (use_none and val is None)):
690 params_copy[key] = val
694 def _ReleaseLocks(lu, level, names=None, keep=None):
695 """Releases locks owned by an LU.
697 @type lu: L{LogicalUnit}
698 @param level: Lock level
699 @type names: list or None
700 @param names: Names of locks to release
701 @type keep: list or None
702 @param keep: Names of locks to retain
705 assert not (keep is not None and names is not None), \
706 "Only one of the 'names' and the 'keep' parameters can be given"
708 if names is not None:
709 should_release = names.__contains__
711 should_release = lambda name: name not in keep
713 should_release = None
719 # Determine which locks to release
720 for name in lu.owned_locks(level):
721 if should_release(name):
726 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
728 # Release just some locks
729 lu.glm.release(level, names=release)
731 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
734 lu.glm.release(level)
736 assert not lu.glm.is_owned(level), "No locks should be owned"
739 def _MapInstanceDisksToNodes(instances):
740 """Creates a map from (node, volume) to instance name.
742 @type instances: list of L{objects.Instance}
743 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
746 return dict(((node, vol), inst.name)
747 for inst in instances
748 for (node, vols) in inst.MapLVsByNode().items()
752 def _RunPostHook(lu, node_name):
753 """Runs the post-hook for an opcode on a single node.
756 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
758 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
760 # pylint: disable=W0702
761 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
764 def _CheckOutputFields(static, dynamic, selected):
765 """Checks whether all selected fields are valid.
767 @type static: L{utils.FieldSet}
768 @param static: static fields set
769 @type dynamic: L{utils.FieldSet}
770 @param dynamic: dynamic fields set
777 delta = f.NonMatching(selected)
779 raise errors.OpPrereqError("Unknown output fields selected: %s"
780 % ",".join(delta), errors.ECODE_INVAL)
783 def _CheckGlobalHvParams(params):
784 """Validates that given hypervisor params are not global ones.
786 This will ensure that instances don't get customised versions of
790 used_globals = constants.HVC_GLOBALS.intersection(params)
792 msg = ("The following hypervisor parameters are global and cannot"
793 " be customized at instance level, please modify them at"
794 " cluster level: %s" % utils.CommaJoin(used_globals))
795 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
798 def _CheckNodeOnline(lu, node, msg=None):
799 """Ensure that a given node is online.
801 @param lu: the LU on behalf of which we make the check
802 @param node: the node to check
803 @param msg: if passed, should be a message to replace the default one
804 @raise errors.OpPrereqError: if the node is offline
808 msg = "Can't use offline node"
809 if lu.cfg.GetNodeInfo(node).offline:
810 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
813 def _CheckNodeNotDrained(lu, node):
814 """Ensure that a given node is not drained.
816 @param lu: the LU on behalf of which we make the check
817 @param node: the node to check
818 @raise errors.OpPrereqError: if the node is drained
821 if lu.cfg.GetNodeInfo(node).drained:
822 raise errors.OpPrereqError("Can't use drained node %s" % node,
826 def _CheckNodeVmCapable(lu, node):
827 """Ensure that a given node is vm capable.
829 @param lu: the LU on behalf of which we make the check
830 @param node: the node to check
831 @raise errors.OpPrereqError: if the node is not vm capable
834 if not lu.cfg.GetNodeInfo(node).vm_capable:
835 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
840 """Ensure that a node supports a given OS.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @param os_name: the OS to query about
845 @param force_variant: whether to ignore variant errors
846 @raise errors.OpPrereqError: if the node is not supporting the OS
849 result = lu.rpc.call_os_get(node, os_name)
850 result.Raise("OS '%s' not in supported OS list for node %s" %
852 prereq=True, ecode=errors.ECODE_INVAL)
853 if not force_variant:
854 _CheckOSVariant(result.payload, os_name)
857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858 """Ensure that a node has the given secondary ip.
860 @type lu: L{LogicalUnit}
861 @param lu: the LU on behalf of which we make the check
863 @param node: the node to check
864 @type secondary_ip: string
865 @param secondary_ip: the ip to check
866 @type prereq: boolean
867 @param prereq: whether to throw a prerequisite or an execute error
868 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
872 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873 result.Raise("Failure checking secondary ip on node %s" % node,
874 prereq=prereq, ecode=errors.ECODE_ENVIRON)
875 if not result.payload:
876 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877 " please fix and re-run this command" % secondary_ip)
879 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
881 raise errors.OpExecError(msg)
884 def _GetClusterDomainSecret():
885 """Reads the cluster domain secret.
888 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
892 def _CheckInstanceDown(lu, instance, reason):
893 """Ensure that an instance is not running."""
894 if instance.admin_up:
895 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896 (instance.name, reason), errors.ECODE_STATE)
898 pnode = instance.primary_node
899 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900 ins_l.Raise("Can't contact node %s for instance information" % pnode,
901 prereq=True, ecode=errors.ECODE_ENVIRON)
903 if instance.name in ins_l.payload:
904 raise errors.OpPrereqError("Instance %s is running, %s" %
905 (instance.name, reason), errors.ECODE_STATE)
908 def _ExpandItemName(fn, name, kind):
909 """Expand an item name.
911 @param fn: the function to use for expansion
912 @param name: requested item name
913 @param kind: text description ('Node' or 'Instance')
914 @return: the resolved (full) name
915 @raise errors.OpPrereqError: if the item is not found
919 if full_name is None:
920 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925 def _ExpandNodeName(cfg, name):
926 """Wrapper over L{_ExpandItemName} for nodes."""
927 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
930 def _ExpandInstanceName(cfg, name):
931 """Wrapper over L{_ExpandItemName} for instance."""
932 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936 memory, vcpus, nics, disk_template, disks,
937 bep, hvp, hypervisor_name, tags):
938 """Builds instance related env variables for hooks
940 This builds the hook environment from individual variables.
943 @param name: the name of the instance
944 @type primary_node: string
945 @param primary_node: the name of the instance's primary node
946 @type secondary_nodes: list
947 @param secondary_nodes: list of secondary nodes as strings
948 @type os_type: string
949 @param os_type: the name of the instance's OS
950 @type status: boolean
951 @param status: the should_run status of the instance
953 @param memory: the memory size of the instance
955 @param vcpus: the count of VCPUs the instance has
957 @param nics: list of tuples (ip, mac, mode, link) representing
958 the NICs the instance has
959 @type disk_template: string
960 @param disk_template: the disk template of the instance
962 @param disks: the list of (size, mode) pairs
964 @param bep: the backend parameters for the instance
966 @param hvp: the hypervisor parameters for the instance
967 @type hypervisor_name: string
968 @param hypervisor_name: the hypervisor for the instance
970 @param tags: list of instance tags as strings
972 @return: the hook environment for this instance
981 "INSTANCE_NAME": name,
982 "INSTANCE_PRIMARY": primary_node,
983 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984 "INSTANCE_OS_TYPE": os_type,
985 "INSTANCE_STATUS": str_status,
986 "INSTANCE_MEMORY": memory,
987 "INSTANCE_VCPUS": vcpus,
988 "INSTANCE_DISK_TEMPLATE": disk_template,
989 "INSTANCE_HYPERVISOR": hypervisor_name,
993 nic_count = len(nics)
994 for idx, (ip, mac, mode, link) in enumerate(nics):
997 env["INSTANCE_NIC%d_IP" % idx] = ip
998 env["INSTANCE_NIC%d_MAC" % idx] = mac
999 env["INSTANCE_NIC%d_MODE" % idx] = mode
1000 env["INSTANCE_NIC%d_LINK" % idx] = link
1001 if mode == constants.NIC_MODE_BRIDGED:
1002 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1006 env["INSTANCE_NIC_COUNT"] = nic_count
1009 disk_count = len(disks)
1010 for idx, (size, mode) in enumerate(disks):
1011 env["INSTANCE_DISK%d_SIZE" % idx] = size
1012 env["INSTANCE_DISK%d_MODE" % idx] = mode
1016 env["INSTANCE_DISK_COUNT"] = disk_count
1021 env["INSTANCE_TAGS"] = " ".join(tags)
1023 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024 for key, value in source.items():
1025 env["INSTANCE_%s_%s" % (kind, key)] = value
1030 def _NICListToTuple(lu, nics):
1031 """Build a list of nic information tuples.
1033 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034 value in LUInstanceQueryData.
1036 @type lu: L{LogicalUnit}
1037 @param lu: the logical unit on whose behalf we execute
1038 @type nics: list of L{objects.NIC}
1039 @param nics: list of nics to convert to hooks tuples
1043 cluster = lu.cfg.GetClusterInfo()
1047 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048 mode = filled_params[constants.NIC_MODE]
1049 link = filled_params[constants.NIC_LINK]
1050 hooks_nics.append((ip, mac, mode, link))
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055 """Builds instance related env variables for hooks from an object.
1057 @type lu: L{LogicalUnit}
1058 @param lu: the logical unit on whose behalf we execute
1059 @type instance: L{objects.Instance}
1060 @param instance: the instance for which we should build the
1062 @type override: dict
1063 @param override: dictionary with key/values that will override
1066 @return: the hook environment dictionary
1069 cluster = lu.cfg.GetClusterInfo()
1070 bep = cluster.FillBE(instance)
1071 hvp = cluster.FillHV(instance)
1073 "name": instance.name,
1074 "primary_node": instance.primary_node,
1075 "secondary_nodes": instance.secondary_nodes,
1076 "os_type": instance.os,
1077 "status": instance.admin_up,
1078 "memory": bep[constants.BE_MEMORY],
1079 "vcpus": bep[constants.BE_VCPUS],
1080 "nics": _NICListToTuple(lu, instance.nics),
1081 "disk_template": instance.disk_template,
1082 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1085 "hypervisor_name": instance.hypervisor,
1086 "tags": instance.tags,
1089 args.update(override)
1090 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1093 def _AdjustCandidatePool(lu, exceptions):
1094 """Adjust the candidate pool after node operations.
1097 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1099 lu.LogInfo("Promoted nodes to master candidate role: %s",
1100 utils.CommaJoin(node.name for node in mod_list))
1101 for name in mod_list:
1102 lu.context.ReaddNode(name)
1103 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1105 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110 """Decide whether I should promote myself as a master candidate.
1113 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115 # the new node will increase mc_max with one, so:
1116 mc_should = min(mc_should + 1, cp_size)
1117 return mc_now < mc_should
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121 """Check that the brigdes needed by a list of nics exist.
1124 cluster = lu.cfg.GetClusterInfo()
1125 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126 brlist = [params[constants.NIC_LINK] for params in paramslist
1127 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1129 result = lu.rpc.call_bridges_exist(target_node, brlist)
1130 result.Raise("Error checking bridges on destination node '%s'" %
1131 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135 """Check that the brigdes needed by an instance exist.
1139 node = instance.primary_node
1140 _CheckNicsBridgesExist(lu, instance.nics, node)
1143 def _CheckOSVariant(os_obj, name):
1144 """Check whether an OS name conforms to the os variants specification.
1146 @type os_obj: L{objects.OS}
1147 @param os_obj: OS object to check
1149 @param name: OS name passed by the user, to check for validity
1152 variant = objects.OS.GetVariant(name)
1153 if not os_obj.supported_variants:
1155 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156 " passed)" % (os_obj.name, variant),
1160 raise errors.OpPrereqError("OS name must include a variant",
1163 if variant not in os_obj.supported_variants:
1164 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1167 def _GetNodeInstancesInner(cfg, fn):
1168 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1171 def _GetNodeInstances(cfg, node_name):
1172 """Returns a list of all primary and secondary instances on a node.
1176 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180 """Returns primary instances on a node.
1183 return _GetNodeInstancesInner(cfg,
1184 lambda inst: node_name == inst.primary_node)
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188 """Returns secondary instances on a node.
1191 return _GetNodeInstancesInner(cfg,
1192 lambda inst: node_name in inst.secondary_nodes)
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196 """Returns the arguments for a storage type.
1199 # Special case for file storage
1200 if storage_type == constants.ST_FILE:
1201 # storage.FileStorage wants a list of storage directories
1202 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1210 for dev in instance.disks:
1211 cfg.SetDiskID(dev, node_name)
1213 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214 result.Raise("Failed to get disk status from node %s" % node_name,
1215 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1217 for idx, bdev_status in enumerate(result.payload):
1218 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225 """Check the sanity of iallocator and node arguments and use the
1226 cluster-wide iallocator if appropriate.
1228 Check that at most one of (iallocator, node) is specified. If none is
1229 specified, then the LU's opcode's iallocator slot is filled with the
1230 cluster-wide default iallocator.
1232 @type iallocator_slot: string
1233 @param iallocator_slot: the name of the opcode iallocator slot
1234 @type node_slot: string
1235 @param node_slot: the name of the opcode target node slot
1238 node = getattr(lu.op, node_slot, None)
1239 iallocator = getattr(lu.op, iallocator_slot, None)
1241 if node is not None and iallocator is not None:
1242 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1244 elif node is None and iallocator is None:
1245 default_iallocator = lu.cfg.GetDefaultIAllocator()
1246 if default_iallocator:
1247 setattr(lu.op, iallocator_slot, default_iallocator)
1249 raise errors.OpPrereqError("No iallocator or node given and no"
1250 " cluster-wide default iallocator found;"
1251 " please specify either an iallocator or a"
1252 " node, or set a cluster-wide default"
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257 """Decides on which iallocator to use.
1259 @type cfg: L{config.ConfigWriter}
1260 @param cfg: Cluster configuration object
1261 @type iallocator: string or None
1262 @param iallocator: Iallocator specified in opcode
1264 @return: Iallocator name
1268 # Use default iallocator
1269 iallocator = cfg.GetDefaultIAllocator()
1272 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273 " opcode nor as a cluster-wide default",
1279 class LUClusterPostInit(LogicalUnit):
1280 """Logical unit for running hooks after cluster initialization.
1283 HPATH = "cluster-init"
1284 HTYPE = constants.HTYPE_CLUSTER
1286 def BuildHooksEnv(self):
1291 "OP_TARGET": self.cfg.GetClusterName(),
1294 def BuildHooksNodes(self):
1295 """Build hooks nodes.
1298 return ([], [self.cfg.GetMasterNode()])
1300 def Exec(self, feedback_fn):
1307 class LUClusterDestroy(LogicalUnit):
1308 """Logical unit for destroying the cluster.
1311 HPATH = "cluster-destroy"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1328 def CheckPrereq(self):
1329 """Check prerequisites.
1331 This checks whether the cluster is empty.
1333 Any errors are signaled by raising errors.OpPrereqError.
1336 master = self.cfg.GetMasterNode()
1338 nodelist = self.cfg.GetNodeList()
1339 if len(nodelist) != 1 or nodelist[0] != master:
1340 raise errors.OpPrereqError("There are still %d node(s) in"
1341 " this cluster." % (len(nodelist) - 1),
1343 instancelist = self.cfg.GetInstanceList()
1345 raise errors.OpPrereqError("There are still %d instance(s) in"
1346 " this cluster." % len(instancelist),
1349 def Exec(self, feedback_fn):
1350 """Destroys the cluster.
1353 master = self.cfg.GetMasterNode()
1355 # Run post hooks on master node before it's removed
1356 _RunPostHook(self, master)
1358 result = self.rpc.call_node_deactivate_master_ip(master)
1359 result.Raise("Could not disable the master role")
1364 def _VerifyCertificate(filename):
1365 """Verifies a certificate for L{LUClusterVerifyConfig}.
1367 @type filename: string
1368 @param filename: Path to PEM file
1372 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373 utils.ReadFile(filename))
1374 except Exception, err: # pylint: disable=W0703
1375 return (LUClusterVerifyConfig.ETYPE_ERROR,
1376 "Failed to load X509 certificate %s: %s" % (filename, err))
1379 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380 constants.SSL_CERT_EXPIRATION_ERROR)
1383 fnamemsg = "While verifying %s: %s" % (filename, msg)
1388 return (None, fnamemsg)
1389 elif errcode == utils.CERT_WARNING:
1390 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391 elif errcode == utils.CERT_ERROR:
1392 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1394 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1397 def _GetAllHypervisorParameters(cluster, instances):
1398 """Compute the set of all hypervisor parameters.
1400 @type cluster: L{objects.Cluster}
1401 @param cluster: the cluster object
1402 @param instances: list of L{objects.Instance}
1403 @param instances: additional instances from which to obtain parameters
1404 @rtype: list of (origin, hypervisor, parameters)
1405 @return: a list with all parameters found, indicating the hypervisor they
1406 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411 for hv_name in cluster.enabled_hypervisors:
1412 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1414 for os_name, os_hvp in cluster.os_hvp.items():
1415 for hv_name, hv_params in os_hvp.items():
1417 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1420 # TODO: collapse identical parameter values in a single one
1421 for instance in instances:
1422 if instance.hvparams:
1423 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424 cluster.FillHV(instance)))
1429 class _VerifyErrors(object):
1430 """Mix-in for cluster/group verify LUs.
1432 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433 self.op and self._feedback_fn to be available.)
1436 TCLUSTER = "cluster"
1438 TINSTANCE = "instance"
1440 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452 ENODEDRBD = (TNODE, "ENODEDRBD")
1453 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456 ENODEHV = (TNODE, "ENODEHV")
1457 ENODELVM = (TNODE, "ENODELVM")
1458 ENODEN1 = (TNODE, "ENODEN1")
1459 ENODENET = (TNODE, "ENODENET")
1460 ENODEOS = (TNODE, "ENODEOS")
1461 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463 ENODERPC = (TNODE, "ENODERPC")
1464 ENODESSH = (TNODE, "ENODESSH")
1465 ENODEVERSION = (TNODE, "ENODEVERSION")
1466 ENODESETUP = (TNODE, "ENODESETUP")
1467 ENODETIME = (TNODE, "ENODETIME")
1468 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1470 ETYPE_FIELD = "code"
1471 ETYPE_ERROR = "ERROR"
1472 ETYPE_WARNING = "WARNING"
1474 def _Error(self, ecode, item, msg, *args, **kwargs):
1475 """Format an error message.
1477 Based on the opcode's error_codes parameter, either format a
1478 parseable error code, or a simpler error string.
1480 This must be called only from Exec and functions called from Exec.
1483 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1485 # first complete the msg
1488 # then format the whole message
1489 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1496 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497 # and finally report it via the feedback_fn
1498 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1500 def _ErrorIf(self, cond, *args, **kwargs):
1501 """Log an error message if the passed condition is True.
1505 or self.op.debug_simulate_errors) # pylint: disable=E1101
1507 self._Error(*args, **kwargs)
1508 # do not mark the operation as failed for WARN cases only
1509 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510 self.bad = self.bad or cond
1513 class LUClusterVerify(NoHooksLU):
1514 """Submits all jobs necessary to verify the cluster.
1519 def ExpandNames(self):
1520 self.needed_locks = {}
1522 def Exec(self, feedback_fn):
1525 if self.op.group_name:
1526 groups = [self.op.group_name]
1527 depends_fn = lambda: None
1529 groups = self.cfg.GetNodeGroupList()
1531 # Verify global configuration
1532 jobs.append([opcodes.OpClusterVerifyConfig()])
1534 # Always depend on global verification
1535 depends_fn = lambda: [(-len(jobs), [])]
1537 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538 depends=depends_fn())]
1539 for group in groups)
1541 # Fix up all parameters
1542 for op in itertools.chain(*jobs): # pylint: disable=W0142
1543 op.debug_simulate_errors = self.op.debug_simulate_errors
1544 op.verbose = self.op.verbose
1545 op.error_codes = self.op.error_codes
1547 op.skip_checks = self.op.skip_checks
1548 except AttributeError:
1549 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1551 return ResultWithJobs(jobs)
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555 """Verifies the cluster config.
1560 def _VerifyHVP(self, hvp_data):
1561 """Verifies locally the syntax of the hypervisor parameters.
1564 for item, hv_name, hv_params in hvp_data:
1565 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1568 hv_class = hypervisor.GetHypervisor(hv_name)
1569 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570 hv_class.CheckParameterSyntax(hv_params)
1571 except errors.GenericError, err:
1572 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1574 def ExpandNames(self):
1575 # Information can be safely retrieved as the BGL is acquired in exclusive
1577 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579 self.all_node_info = self.cfg.GetAllNodesInfo()
1580 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581 self.needed_locks = {}
1583 def Exec(self, feedback_fn):
1584 """Verify integrity of cluster, performing various test on nodes.
1588 self._feedback_fn = feedback_fn
1590 feedback_fn("* Verifying cluster config")
1592 for msg in self.cfg.VerifyConfig():
1593 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1595 feedback_fn("* Verifying cluster certificate files")
1597 for cert_filename in constants.ALL_CERT_FILES:
1598 (errcode, msg) = _VerifyCertificate(cert_filename)
1599 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1601 feedback_fn("* Verifying hypervisor parameters")
1603 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604 self.all_inst_info.values()))
1606 feedback_fn("* Verifying all nodes belong to an existing group")
1608 # We do this verification here because, should this bogus circumstance
1609 # occur, it would never be caught by VerifyGroup, which only acts on
1610 # nodes/instances reachable from existing node groups.
1612 dangling_nodes = set(node.name for node in self.all_node_info.values()
1613 if node.group not in self.all_group_info)
1615 dangling_instances = {}
1616 no_node_instances = []
1618 for inst in self.all_inst_info.values():
1619 if inst.primary_node in dangling_nodes:
1620 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621 elif inst.primary_node not in self.all_node_info:
1622 no_node_instances.append(inst.name)
1627 utils.CommaJoin(dangling_instances.get(node.name,
1629 for node in dangling_nodes]
1631 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632 "the following nodes (and their instances) belong to a non"
1633 " existing group: %s", utils.CommaJoin(pretty_dangling))
1635 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636 "the following instances have a non-existing primary-node:"
1637 " %s", utils.CommaJoin(no_node_instances))
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643 """Verifies the status of a node group.
1646 HPATH = "cluster-verify"
1647 HTYPE = constants.HTYPE_CLUSTER
1650 _HOOKS_INDENT_RE = re.compile("^", re.M)
1652 class NodeImage(object):
1653 """A class representing the logical and physical status of a node.
1656 @ivar name: the node name to which this object refers
1657 @ivar volumes: a structure as returned from
1658 L{ganeti.backend.GetVolumeList} (runtime)
1659 @ivar instances: a list of running instances (runtime)
1660 @ivar pinst: list of configured primary instances (config)
1661 @ivar sinst: list of configured secondary instances (config)
1662 @ivar sbp: dictionary of {primary-node: list of instances} for all
1663 instances for which this node is secondary (config)
1664 @ivar mfree: free memory, as reported by hypervisor (runtime)
1665 @ivar dfree: free disk, as reported by the node (runtime)
1666 @ivar offline: the offline status (config)
1667 @type rpc_fail: boolean
1668 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669 not whether the individual keys were correct) (runtime)
1670 @type lvm_fail: boolean
1671 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672 @type hyp_fail: boolean
1673 @ivar hyp_fail: whether the RPC call didn't return the instance list
1674 @type ghost: boolean
1675 @ivar ghost: whether this is a known node or not (config)
1676 @type os_fail: boolean
1677 @ivar os_fail: whether the RPC call didn't return valid OS data
1679 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680 @type vm_capable: boolean
1681 @ivar vm_capable: whether the node can host instances
1684 def __init__(self, offline=False, name=None, vm_capable=True):
1693 self.offline = offline
1694 self.vm_capable = vm_capable
1695 self.rpc_fail = False
1696 self.lvm_fail = False
1697 self.hyp_fail = False
1699 self.os_fail = False
1702 def ExpandNames(self):
1703 # This raises errors.OpPrereqError on its own:
1704 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1706 # Get instances in node group; this is unsafe and needs verification later
1707 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1709 self.needed_locks = {
1710 locking.LEVEL_INSTANCE: inst_names,
1711 locking.LEVEL_NODEGROUP: [self.group_uuid],
1712 locking.LEVEL_NODE: [],
1715 self.share_locks = _ShareAll()
1717 def DeclareLocks(self, level):
1718 if level == locking.LEVEL_NODE:
1719 # Get members of node group; this is unsafe and needs verification later
1720 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722 all_inst_info = self.cfg.GetAllInstancesInfo()
1724 # In Exec(), we warn about mirrored instances that have primary and
1725 # secondary living in separate node groups. To fully verify that
1726 # volumes for these instances are healthy, we will need to do an
1727 # extra call to their secondaries. We ensure here those nodes will
1729 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730 # Important: access only the instances whose lock is owned
1731 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732 nodes.update(all_inst_info[inst].secondary_nodes)
1734 self.needed_locks[locking.LEVEL_NODE] = nodes
1736 def CheckPrereq(self):
1737 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740 group_nodes = set(self.group_info.members)
1741 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1744 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1746 unlocked_instances = \
1747 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751 utils.CommaJoin(unlocked_nodes))
1753 if unlocked_instances:
1754 raise errors.OpPrereqError("Missing lock for instances: %s" %
1755 utils.CommaJoin(unlocked_instances))
1757 self.all_node_info = self.cfg.GetAllNodesInfo()
1758 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1760 self.my_node_names = utils.NiceSort(group_nodes)
1761 self.my_inst_names = utils.NiceSort(group_instances)
1763 self.my_node_info = dict((name, self.all_node_info[name])
1764 for name in self.my_node_names)
1766 self.my_inst_info = dict((name, self.all_inst_info[name])
1767 for name in self.my_inst_names)
1769 # We detect here the nodes that will need the extra RPC calls for verifying
1770 # split LV volumes; they should be locked.
1771 extra_lv_nodes = set()
1773 for inst in self.my_inst_info.values():
1774 if inst.disk_template in constants.DTS_INT_MIRROR:
1775 group = self.my_node_info[inst.primary_node].group
1776 for nname in inst.secondary_nodes:
1777 if self.all_node_info[nname].group != group:
1778 extra_lv_nodes.add(nname)
1780 unlocked_lv_nodes = \
1781 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1783 if unlocked_lv_nodes:
1784 raise errors.OpPrereqError("these nodes could be locked: %s" %
1785 utils.CommaJoin(unlocked_lv_nodes))
1786 self.extra_lv_nodes = list(extra_lv_nodes)
1788 def _VerifyNode(self, ninfo, nresult):
1789 """Perform some basic validation on data returned from a node.
1791 - check the result data structure is well formed and has all the
1793 - check ganeti version
1795 @type ninfo: L{objects.Node}
1796 @param ninfo: the node to check
1797 @param nresult: the results from the node
1799 @return: whether overall this call was successful (and we can expect
1800 reasonable values in the respose)
1804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1806 # main result, nresult should be a non-empty dict
1807 test = not nresult or not isinstance(nresult, dict)
1808 _ErrorIf(test, self.ENODERPC, node,
1809 "unable to verify node: no data returned")
1813 # compares ganeti version
1814 local_version = constants.PROTOCOL_VERSION
1815 remote_version = nresult.get("version", None)
1816 test = not (remote_version and
1817 isinstance(remote_version, (list, tuple)) and
1818 len(remote_version) == 2)
1819 _ErrorIf(test, self.ENODERPC, node,
1820 "connection to node returned invalid data")
1824 test = local_version != remote_version[0]
1825 _ErrorIf(test, self.ENODEVERSION, node,
1826 "incompatible protocol versions: master %s,"
1827 " node %s", local_version, remote_version[0])
1831 # node seems compatible, we can actually try to look into its results
1833 # full package version
1834 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835 self.ENODEVERSION, node,
1836 "software version mismatch: master %s, node %s",
1837 constants.RELEASE_VERSION, remote_version[1],
1838 code=self.ETYPE_WARNING)
1840 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841 if ninfo.vm_capable and isinstance(hyp_result, dict):
1842 for hv_name, hv_result in hyp_result.iteritems():
1843 test = hv_result is not None
1844 _ErrorIf(test, self.ENODEHV, node,
1845 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1847 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848 if ninfo.vm_capable and isinstance(hvp_result, list):
1849 for item, hv_name, hv_result in hvp_result:
1850 _ErrorIf(True, self.ENODEHV, node,
1851 "hypervisor %s parameter verify failure (source %s): %s",
1852 hv_name, item, hv_result)
1854 test = nresult.get(constants.NV_NODESETUP,
1855 ["Missing NODESETUP results"])
1856 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861 def _VerifyNodeTime(self, ninfo, nresult,
1862 nvinfo_starttime, nvinfo_endtime):
1863 """Check the node time.
1865 @type ninfo: L{objects.Node}
1866 @param ninfo: the node to check
1867 @param nresult: the remote results for the node
1868 @param nvinfo_starttime: the start time of the RPC call
1869 @param nvinfo_endtime: the end time of the RPC call
1873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1875 ntime = nresult.get(constants.NV_TIME, None)
1877 ntime_merged = utils.MergeTime(ntime)
1878 except (ValueError, TypeError):
1879 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1882 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1889 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890 "Node time diverges by at least %s from master node time",
1893 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894 """Check the node LVM results.
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param vg_name: the configured VG name
1906 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1908 # checks vg existence and size > 20G
1909 vglist = nresult.get(constants.NV_VGLIST, None)
1911 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1913 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914 constants.MIN_VG_SIZE)
1915 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1918 pvlist = nresult.get(constants.NV_PVLIST, None)
1919 test = pvlist is None
1920 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1922 # check that ':' is not present in PV names, since it's a
1923 # special character for lvcreate (denotes the range of PEs to
1925 for _, pvname, owner_vg in pvlist:
1926 test = ":" in pvname
1927 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928 " '%s' of VG '%s'", pvname, owner_vg)
1930 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931 """Check the node bridges.
1933 @type ninfo: L{objects.Node}
1934 @param ninfo: the node to check
1935 @param nresult: the remote results for the node
1936 @param bridges: the expected list of bridges
1943 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1945 missing = nresult.get(constants.NV_BRIDGES, None)
1946 test = not isinstance(missing, list)
1947 _ErrorIf(test, self.ENODENET, node,
1948 "did not return valid bridge information")
1950 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951 utils.CommaJoin(sorted(missing)))
1953 def _VerifyNodeNetwork(self, ninfo, nresult):
1954 """Check the node network connectivity results.
1956 @type ninfo: L{objects.Node}
1957 @param ninfo: the node to check
1958 @param nresult: the remote results for the node
1962 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1964 test = constants.NV_NODELIST not in nresult
1965 _ErrorIf(test, self.ENODESSH, node,
1966 "node hasn't returned node ssh connectivity data")
1968 if nresult[constants.NV_NODELIST]:
1969 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970 _ErrorIf(True, self.ENODESSH, node,
1971 "ssh communication with node '%s': %s", a_node, a_msg)
1973 test = constants.NV_NODENETTEST not in nresult
1974 _ErrorIf(test, self.ENODENET, node,
1975 "node hasn't returned node tcp connectivity data")
1977 if nresult[constants.NV_NODENETTEST]:
1978 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1980 _ErrorIf(True, self.ENODENET, node,
1981 "tcp communication with node '%s': %s",
1982 anode, nresult[constants.NV_NODENETTEST][anode])
1984 test = constants.NV_MASTERIP not in nresult
1985 _ErrorIf(test, self.ENODENET, node,
1986 "node hasn't returned node master IP reachability data")
1988 if not nresult[constants.NV_MASTERIP]:
1989 if node == self.master_node:
1990 msg = "the master node cannot reach the master IP (not configured?)"
1992 msg = "cannot reach the master IP"
1993 _ErrorIf(True, self.ENODENET, node, msg)
1995 def _VerifyInstance(self, instance, instanceconfig, node_image,
1997 """Verify an instance.
1999 This function checks to see if the required block devices are
2000 available on the instance's node.
2003 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004 node_current = instanceconfig.primary_node
2006 node_vol_should = {}
2007 instanceconfig.MapLVsByNode(node_vol_should)
2009 for node in node_vol_should:
2010 n_img = node_image[node]
2011 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012 # ignore missing volumes on offline or broken nodes
2014 for volume in node_vol_should[node]:
2015 test = volume not in n_img.volumes
2016 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017 "volume %s missing on node %s", volume, node)
2019 if instanceconfig.admin_up:
2020 pri_img = node_image[node_current]
2021 test = instance not in pri_img.instances and not pri_img.offline
2022 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023 "instance not running on its primary node %s",
2026 diskdata = [(nname, success, status, idx)
2027 for (nname, disks) in diskstatus.items()
2028 for idx, (success, status) in enumerate(disks)]
2030 for nname, success, bdev_status, idx in diskdata:
2031 # the 'ghost node' construction in Exec() ensures that we have a
2033 snode = node_image[nname]
2034 bad_snode = snode.ghost or snode.offline
2035 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036 self.EINSTANCEFAULTYDISK, instance,
2037 "couldn't retrieve status for disk/%s on %s: %s",
2038 idx, nname, bdev_status)
2039 _ErrorIf((instanceconfig.admin_up and success and
2040 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041 self.EINSTANCEFAULTYDISK, instance,
2042 "disk/%s on %s is faulty", idx, nname)
2044 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045 """Verify if there are any unknown volumes in the cluster.
2047 The .os, .swap and backup volumes are ignored. All other volumes are
2048 reported as unknown.
2050 @type reserved: L{ganeti.utils.FieldSet}
2051 @param reserved: a FieldSet of reserved volume names
2054 for node, n_img in node_image.items():
2055 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056 # skip non-healthy nodes
2058 for volume in n_img.volumes:
2059 test = ((node not in node_vol_should or
2060 volume not in node_vol_should[node]) and
2061 not reserved.Matches(volume))
2062 self._ErrorIf(test, self.ENODEORPHANLV, node,
2063 "volume %s is unknown", volume)
2065 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066 """Verify N+1 Memory Resilience.
2068 Check that if one single node dies we can still start all the
2069 instances it was primary for.
2072 cluster_info = self.cfg.GetClusterInfo()
2073 for node, n_img in node_image.items():
2074 # This code checks that every node which is now listed as
2075 # secondary has enough memory to host all instances it is
2076 # supposed to should a single other node in the cluster fail.
2077 # FIXME: not ready for failover to an arbitrary node
2078 # FIXME: does not support file-backed instances
2079 # WARNING: we currently take into account down instances as well
2080 # as up ones, considering that even if they're down someone
2081 # might want to start them even in the event of a node failure.
2083 # we're skipping offline nodes from the N+1 warning, since
2084 # most likely we don't have good memory infromation from them;
2085 # we already list instances living on such nodes, and that's
2088 for prinode, instances in n_img.sbp.items():
2090 for instance in instances:
2091 bep = cluster_info.FillBE(instance_cfg[instance])
2092 if bep[constants.BE_AUTO_BALANCE]:
2093 needed_mem += bep[constants.BE_MEMORY]
2094 test = n_img.mfree < needed_mem
2095 self._ErrorIf(test, self.ENODEN1, node,
2096 "not enough memory to accomodate instance failovers"
2097 " should node %s fail (%dMiB needed, %dMiB available)",
2098 prinode, needed_mem, n_img.mfree)
2101 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102 (files_all, files_all_opt, files_mc, files_vm)):
2103 """Verifies file checksums collected from all nodes.
2105 @param errorif: Callback for reporting errors
2106 @param nodeinfo: List of L{objects.Node} objects
2107 @param master_node: Name of master node
2108 @param all_nvinfo: RPC results
2111 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113 "Found file listed in more than one file list"
2115 # Define functions determining which nodes to consider for a file
2118 (files_all_opt, None),
2119 (files_mc, lambda node: (node.master_candidate or
2120 node.name == master_node)),
2121 (files_vm, lambda node: node.vm_capable),
2124 # Build mapping from filename to list of nodes which should have the file
2126 for (files, fn) in files2nodefn:
2128 filenodes = nodeinfo
2130 filenodes = filter(fn, nodeinfo)
2131 nodefiles.update((filename,
2132 frozenset(map(operator.attrgetter("name"), filenodes)))
2133 for filename in files)
2135 assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2137 fileinfo = dict((filename, {}) for filename in nodefiles)
2138 ignore_nodes = set()
2140 for node in nodeinfo:
2142 ignore_nodes.add(node.name)
2145 nresult = all_nvinfo[node.name]
2147 if nresult.fail_msg or not nresult.payload:
2150 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2152 test = not (node_files and isinstance(node_files, dict))
2153 errorif(test, cls.ENODEFILECHECK, node.name,
2154 "Node did not return file checksum data")
2156 ignore_nodes.add(node.name)
2159 # Build per-checksum mapping from filename to nodes having it
2160 for (filename, checksum) in node_files.items():
2161 assert filename in nodefiles
2162 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2164 for (filename, checksums) in fileinfo.items():
2165 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2167 # Nodes having the file
2168 with_file = frozenset(node_name
2169 for nodes in fileinfo[filename].values()
2170 for node_name in nodes) - ignore_nodes
2172 expected_nodes = nodefiles[filename] - ignore_nodes
2174 # Nodes missing file
2175 missing_file = expected_nodes - with_file
2177 if filename in files_all_opt:
2179 errorif(missing_file and missing_file != expected_nodes,
2180 cls.ECLUSTERFILECHECK, None,
2181 "File %s is optional, but it must exist on all or no"
2182 " nodes (not found on %s)",
2183 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2185 # Non-optional files
2186 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187 "File %s is missing from node(s) %s", filename,
2188 utils.CommaJoin(utils.NiceSort(missing_file)))
2190 # Warn if a node has a file it shouldn't
2191 unexpected = with_file - expected_nodes
2193 cls.ECLUSTERFILECHECK, None,
2194 "File %s should not exist on node(s) %s",
2195 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2197 # See if there are multiple versions of the file
2198 test = len(checksums) > 1
2200 variants = ["variant %s on %s" %
2201 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202 for (idx, (checksum, nodes)) in
2203 enumerate(sorted(checksums.items()))]
2207 errorif(test, cls.ECLUSTERFILECHECK, None,
2208 "File %s found with %s different checksums (%s)",
2209 filename, len(checksums), "; ".join(variants))
2211 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2213 """Verifies and the node DRBD status.
2215 @type ninfo: L{objects.Node}
2216 @param ninfo: the node to check
2217 @param nresult: the remote results for the node
2218 @param instanceinfo: the dict of instances
2219 @param drbd_helper: the configured DRBD usermode helper
2220 @param drbd_map: the DRBD map as returned by
2221 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2225 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229 test = (helper_result == None)
2230 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231 "no drbd usermode helper returned")
2233 status, payload = helper_result
2235 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236 "drbd usermode helper check unsuccessful: %s", payload)
2237 test = status and (payload != drbd_helper)
2238 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239 "wrong drbd usermode helper: %s", payload)
2241 # compute the DRBD minors
2243 for minor, instance in drbd_map[node].items():
2244 test = instance not in instanceinfo
2245 _ErrorIf(test, self.ECLUSTERCFG, None,
2246 "ghost instance '%s' in temporary DRBD map", instance)
2247 # ghost instance should not be running, but otherwise we
2248 # don't give double warnings (both ghost instance and
2249 # unallocated minor in use)
2251 node_drbd[minor] = (instance, False)
2253 instance = instanceinfo[instance]
2254 node_drbd[minor] = (instance.name, instance.admin_up)
2256 # and now check them
2257 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258 test = not isinstance(used_minors, (tuple, list))
2259 _ErrorIf(test, self.ENODEDRBD, node,
2260 "cannot parse drbd status file: %s", str(used_minors))
2262 # we cannot check drbd status
2265 for minor, (iname, must_exist) in node_drbd.items():
2266 test = minor not in used_minors and must_exist
2267 _ErrorIf(test, self.ENODEDRBD, node,
2268 "drbd minor %d of instance %s is not active", minor, iname)
2269 for minor in used_minors:
2270 test = minor not in node_drbd
2271 _ErrorIf(test, self.ENODEDRBD, node,
2272 "unallocated drbd minor %d is in use", minor)
2274 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275 """Builds the node OS structures.
2277 @type ninfo: L{objects.Node}
2278 @param ninfo: the node to check
2279 @param nresult: the remote results for the node
2280 @param nimg: the node image object
2284 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2286 remote_os = nresult.get(constants.NV_OSLIST, None)
2287 test = (not isinstance(remote_os, list) or
2288 not compat.all(isinstance(v, list) and len(v) == 7
2289 for v in remote_os))
2291 _ErrorIf(test, self.ENODEOS, node,
2292 "node hasn't returned valid OS data")
2301 for (name, os_path, status, diagnose,
2302 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2304 if name not in os_dict:
2307 # parameters is a list of lists instead of list of tuples due to
2308 # JSON lacking a real tuple type, fix it:
2309 parameters = [tuple(v) for v in parameters]
2310 os_dict[name].append((os_path, status, diagnose,
2311 set(variants), set(parameters), set(api_ver)))
2313 nimg.oslist = os_dict
2315 def _VerifyNodeOS(self, ninfo, nimg, base):
2316 """Verifies the node OS list.
2318 @type ninfo: L{objects.Node}
2319 @param ninfo: the node to check
2320 @param nimg: the node image object
2321 @param base: the 'template' node we match against (e.g. from the master)
2325 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2327 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2329 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330 for os_name, os_data in nimg.oslist.items():
2331 assert os_data, "Empty OS status for OS %s?!" % os_name
2332 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333 _ErrorIf(not f_status, self.ENODEOS, node,
2334 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336 "OS '%s' has multiple entries (first one shadows the rest): %s",
2337 os_name, utils.CommaJoin([v[0] for v in os_data]))
2338 # comparisons with the 'base' image
2339 test = os_name not in base.oslist
2340 _ErrorIf(test, self.ENODEOS, node,
2341 "Extra OS %s not present on reference node (%s)",
2345 assert base.oslist[os_name], "Base node has empty OS status?"
2346 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2348 # base OS is invalid, skipping
2350 for kind, a, b in [("API version", f_api, b_api),
2351 ("variants list", f_var, b_var),
2352 ("parameters", beautify_params(f_param),
2353 beautify_params(b_param))]:
2354 _ErrorIf(a != b, self.ENODEOS, node,
2355 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356 kind, os_name, base.name,
2357 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2359 # check any missing OSes
2360 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361 _ErrorIf(missing, self.ENODEOS, node,
2362 "OSes present on reference node %s but missing on this node: %s",
2363 base.name, utils.CommaJoin(missing))
2365 def _VerifyOob(self, ninfo, nresult):
2366 """Verifies out of band functionality of a node.
2368 @type ninfo: L{objects.Node}
2369 @param ninfo: the node to check
2370 @param nresult: the remote results for the node
2374 # We just have to verify the paths on master and/or master candidates
2375 # as the oob helper is invoked on the master
2376 if ((ninfo.master_candidate or ninfo.master_capable) and
2377 constants.NV_OOB_PATHS in nresult):
2378 for path_result in nresult[constants.NV_OOB_PATHS]:
2379 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2381 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382 """Verifies and updates the node volume data.
2384 This function will update a L{NodeImage}'s internal structures
2385 with data from the remote call.
2387 @type ninfo: L{objects.Node}
2388 @param ninfo: the node to check
2389 @param nresult: the remote results for the node
2390 @param nimg: the node image object
2391 @param vg_name: the configured VG name
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2397 nimg.lvm_fail = True
2398 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2401 elif isinstance(lvdata, basestring):
2402 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403 utils.SafeEncode(lvdata))
2404 elif not isinstance(lvdata, dict):
2405 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2407 nimg.volumes = lvdata
2408 nimg.lvm_fail = False
2410 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411 """Verifies and updates the node instance list.
2413 If the listing was successful, then updates this node's instance
2414 list. Otherwise, it marks the RPC call as failed for the instance
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param nimg: the node image object
2423 idata = nresult.get(constants.NV_INSTANCELIST, None)
2424 test = not isinstance(idata, list)
2425 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426 " (instancelist): %s", utils.SafeEncode(str(idata)))
2428 nimg.hyp_fail = True
2430 nimg.instances = idata
2432 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433 """Verifies and computes a node information map
2435 @type ninfo: L{objects.Node}
2436 @param ninfo: the node to check
2437 @param nresult: the remote results for the node
2438 @param nimg: the node image object
2439 @param vg_name: the configured VG name
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2445 # try to read free memory (from the hypervisor)
2446 hv_info = nresult.get(constants.NV_HVINFO, None)
2447 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2451 nimg.mfree = int(hv_info["memory_free"])
2452 except (ValueError, TypeError):
2453 _ErrorIf(True, self.ENODERPC, node,
2454 "node returned invalid nodeinfo, check hypervisor")
2456 # FIXME: devise a free space model for file based instances as well
2457 if vg_name is not None:
2458 test = (constants.NV_VGLIST not in nresult or
2459 vg_name not in nresult[constants.NV_VGLIST])
2460 _ErrorIf(test, self.ENODELVM, node,
2461 "node didn't return data for the volume group '%s'"
2462 " - it is either missing or broken", vg_name)
2465 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466 except (ValueError, TypeError):
2467 _ErrorIf(True, self.ENODERPC, node,
2468 "node returned invalid LVM info, check LVM status")
2470 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471 """Gets per-disk status information for all instances.
2473 @type nodelist: list of strings
2474 @param nodelist: Node names
2475 @type node_image: dict of (name, L{objects.Node})
2476 @param node_image: Node objects
2477 @type instanceinfo: dict of (name, L{objects.Instance})
2478 @param instanceinfo: Instance objects
2479 @rtype: {instance: {node: [(succes, payload)]}}
2480 @return: a dictionary of per-instance dictionaries with nodes as
2481 keys and disk information as values; the disk information is a
2482 list of tuples (success, payload)
2485 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2488 node_disks_devonly = {}
2489 diskless_instances = set()
2490 diskless = constants.DT_DISKLESS
2492 for nname in nodelist:
2493 node_instances = list(itertools.chain(node_image[nname].pinst,
2494 node_image[nname].sinst))
2495 diskless_instances.update(inst for inst in node_instances
2496 if instanceinfo[inst].disk_template == diskless)
2497 disks = [(inst, disk)
2498 for inst in node_instances
2499 for disk in instanceinfo[inst].disks]
2502 # No need to collect data
2505 node_disks[nname] = disks
2507 # Creating copies as SetDiskID below will modify the objects and that can
2508 # lead to incorrect data returned from nodes
2509 devonly = [dev.Copy() for (_, dev) in disks]
2512 self.cfg.SetDiskID(dev, nname)
2514 node_disks_devonly[nname] = devonly
2516 assert len(node_disks) == len(node_disks_devonly)
2518 # Collect data from all nodes with disks
2519 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2522 assert len(result) == len(node_disks)
2526 for (nname, nres) in result.items():
2527 disks = node_disks[nname]
2530 # No data from this node
2531 data = len(disks) * [(False, "node offline")]
2534 _ErrorIf(msg, self.ENODERPC, nname,
2535 "while getting disk information: %s", msg)
2537 # No data from this node
2538 data = len(disks) * [(False, msg)]
2541 for idx, i in enumerate(nres.payload):
2542 if isinstance(i, (tuple, list)) and len(i) == 2:
2545 logging.warning("Invalid result from node %s, entry %d: %s",
2547 data.append((False, "Invalid result from the remote node"))
2549 for ((inst, _), status) in zip(disks, data):
2550 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2552 # Add empty entries for diskless instances.
2553 for inst in diskless_instances:
2554 assert inst not in instdisk
2557 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559 compat.all(isinstance(s, (tuple, list)) and
2560 len(s) == 2 for s in statuses)
2561 for inst, nnames in instdisk.items()
2562 for nname, statuses in nnames.items())
2563 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2568 def _SshNodeSelector(group_uuid, all_nodes):
2569 """Create endless iterators for all potential SSH check hosts.
2572 nodes = [node for node in all_nodes
2573 if (node.group != group_uuid and
2575 keyfunc = operator.attrgetter("group")
2577 return map(itertools.cycle,
2578 [sorted(map(operator.attrgetter("name"), names))
2579 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2583 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584 """Choose which nodes should talk to which other nodes.
2586 We will make nodes contact all nodes in their group, and one node from
2589 @warning: This algorithm has a known issue if one node group is much
2590 smaller than others (e.g. just one node). In such a case all other
2591 nodes will talk to the single node.
2594 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2597 return (online_nodes,
2598 dict((name, sorted([i.next() for i in sel]))
2599 for name in online_nodes))
2601 def BuildHooksEnv(self):
2604 Cluster-Verify hooks just ran in the post phase and their failure makes
2605 the output be logged in the verify output and the verification to fail.
2609 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2612 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613 for node in self.my_node_info.values())
2617 def BuildHooksNodes(self):
2618 """Build hooks nodes.
2621 return ([], self.my_node_names)
2623 def Exec(self, feedback_fn):
2624 """Verify integrity of the node group, performing various test on nodes.
2627 # This method has too many local variables. pylint: disable=R0914
2628 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2630 if not self.my_node_names:
2632 feedback_fn("* Empty node group, skipping verification")
2636 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637 verbose = self.op.verbose
2638 self._feedback_fn = feedback_fn
2640 vg_name = self.cfg.GetVGName()
2641 drbd_helper = self.cfg.GetDRBDHelper()
2642 cluster = self.cfg.GetClusterInfo()
2643 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644 hypervisors = cluster.enabled_hypervisors
2645 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2647 i_non_redundant = [] # Non redundant instances
2648 i_non_a_balanced = [] # Non auto-balanced instances
2649 n_offline = 0 # Count of offline nodes
2650 n_drained = 0 # Count of nodes being drained
2651 node_vol_should = {}
2653 # FIXME: verify OS list
2656 filemap = _ComputeAncillaryFiles(cluster, False)
2658 # do local checksums
2659 master_node = self.master_node = self.cfg.GetMasterNode()
2660 master_ip = self.cfg.GetMasterIP()
2662 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2664 node_verify_param = {
2665 constants.NV_FILELIST:
2666 utils.UniqueSequence(filename
2667 for files in filemap
2668 for filename in files),
2669 constants.NV_NODELIST:
2670 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671 self.all_node_info.values()),
2672 constants.NV_HYPERVISOR: hypervisors,
2673 constants.NV_HVPARAMS:
2674 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676 for node in node_data_list
2677 if not node.offline],
2678 constants.NV_INSTANCELIST: hypervisors,
2679 constants.NV_VERSION: None,
2680 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681 constants.NV_NODESETUP: None,
2682 constants.NV_TIME: None,
2683 constants.NV_MASTERIP: (master_node, master_ip),
2684 constants.NV_OSLIST: None,
2685 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2688 if vg_name is not None:
2689 node_verify_param[constants.NV_VGLIST] = None
2690 node_verify_param[constants.NV_LVLIST] = vg_name
2691 node_verify_param[constants.NV_PVLIST] = [vg_name]
2692 node_verify_param[constants.NV_DRBDLIST] = None
2695 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2698 # FIXME: this needs to be changed per node-group, not cluster-wide
2700 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702 bridges.add(default_nicpp[constants.NIC_LINK])
2703 for instance in self.my_inst_info.values():
2704 for nic in instance.nics:
2705 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707 bridges.add(full_nic[constants.NIC_LINK])
2710 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2712 # Build our expected cluster state
2713 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2715 vm_capable=node.vm_capable))
2716 for node in node_data_list)
2720 for node in self.all_node_info.values():
2721 path = _SupportsOob(self.cfg, node)
2722 if path and path not in oob_paths:
2723 oob_paths.append(path)
2726 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2728 for instance in self.my_inst_names:
2729 inst_config = self.my_inst_info[instance]
2731 for nname in inst_config.all_nodes:
2732 if nname not in node_image:
2733 gnode = self.NodeImage(name=nname)
2734 gnode.ghost = (nname not in self.all_node_info)
2735 node_image[nname] = gnode
2737 inst_config.MapLVsByNode(node_vol_should)
2739 pnode = inst_config.primary_node
2740 node_image[pnode].pinst.append(instance)
2742 for snode in inst_config.secondary_nodes:
2743 nimg = node_image[snode]
2744 nimg.sinst.append(instance)
2745 if pnode not in nimg.sbp:
2746 nimg.sbp[pnode] = []
2747 nimg.sbp[pnode].append(instance)
2749 # At this point, we have the in-memory data structures complete,
2750 # except for the runtime information, which we'll gather next
2752 # Due to the way our RPC system works, exact response times cannot be
2753 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754 # time before and after executing the request, we can at least have a time
2756 nvinfo_starttime = time.time()
2757 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2759 self.cfg.GetClusterName())
2760 nvinfo_endtime = time.time()
2762 if self.extra_lv_nodes and vg_name is not None:
2764 self.rpc.call_node_verify(self.extra_lv_nodes,
2765 {constants.NV_LVLIST: vg_name},
2766 self.cfg.GetClusterName())
2768 extra_lv_nvinfo = {}
2770 all_drbd_map = self.cfg.ComputeDRBDMap()
2772 feedback_fn("* Gathering disk information (%s nodes)" %
2773 len(self.my_node_names))
2774 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2777 feedback_fn("* Verifying configuration file consistency")
2779 # If not all nodes are being checked, we need to make sure the master node
2780 # and a non-checked vm_capable node are in the list.
2781 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2783 vf_nvinfo = all_nvinfo.copy()
2784 vf_node_info = list(self.my_node_info.values())
2785 additional_nodes = []
2786 if master_node not in self.my_node_info:
2787 additional_nodes.append(master_node)
2788 vf_node_info.append(self.all_node_info[master_node])
2789 # Add the first vm_capable node we find which is not included
2790 for node in absent_nodes:
2791 nodeinfo = self.all_node_info[node]
2792 if nodeinfo.vm_capable and not nodeinfo.offline:
2793 additional_nodes.append(node)
2794 vf_node_info.append(self.all_node_info[node])
2796 key = constants.NV_FILELIST
2797 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798 {key: node_verify_param[key]},
2799 self.cfg.GetClusterName()))
2801 vf_nvinfo = all_nvinfo
2802 vf_node_info = self.my_node_info.values()
2804 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2806 feedback_fn("* Verifying node status")
2810 for node_i in node_data_list:
2812 nimg = node_image[node]
2816 feedback_fn("* Skipping offline node %s" % (node,))
2820 if node == master_node:
2822 elif node_i.master_candidate:
2823 ntype = "master candidate"
2824 elif node_i.drained:
2830 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2832 msg = all_nvinfo[node].fail_msg
2833 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2835 nimg.rpc_fail = True
2838 nresult = all_nvinfo[node].payload
2840 nimg.call_ok = self._VerifyNode(node_i, nresult)
2841 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842 self._VerifyNodeNetwork(node_i, nresult)
2843 self._VerifyOob(node_i, nresult)
2846 self._VerifyNodeLVM(node_i, nresult, vg_name)
2847 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2850 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851 self._UpdateNodeInstances(node_i, nresult, nimg)
2852 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853 self._UpdateNodeOS(node_i, nresult, nimg)
2855 if not nimg.os_fail:
2856 if refos_img is None:
2858 self._VerifyNodeOS(node_i, nimg, refos_img)
2859 self._VerifyNodeBridges(node_i, nresult, bridges)
2861 # Check whether all running instancies are primary for the node. (This
2862 # can no longer be done from _VerifyInstance below, since some of the
2863 # wrong instances could be from other node groups.)
2864 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2866 for inst in non_primary_inst:
2867 test = inst in self.all_inst_info
2868 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869 "instance should not run on node %s", node_i.name)
2870 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871 "node is running unknown instance %s", inst)
2873 for node, result in extra_lv_nvinfo.items():
2874 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875 node_image[node], vg_name)
2877 feedback_fn("* Verifying instance status")
2878 for instance in self.my_inst_names:
2880 feedback_fn("* Verifying instance %s" % instance)
2881 inst_config = self.my_inst_info[instance]
2882 self._VerifyInstance(instance, inst_config, node_image,
2884 inst_nodes_offline = []
2886 pnode = inst_config.primary_node
2887 pnode_img = node_image[pnode]
2888 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889 self.ENODERPC, pnode, "instance %s, connection to"
2890 " primary node failed", instance)
2892 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893 self.EINSTANCEBADNODE, instance,
2894 "instance is marked as running and lives on offline node %s",
2895 inst_config.primary_node)
2897 # If the instance is non-redundant we cannot survive losing its primary
2898 # node, so we are not N+1 compliant. On the other hand we have no disk
2899 # templates with more than one secondary so that situation is not well
2901 # FIXME: does not support file-backed instances
2902 if not inst_config.secondary_nodes:
2903 i_non_redundant.append(instance)
2905 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906 instance, "instance has multiple secondary nodes: %s",
2907 utils.CommaJoin(inst_config.secondary_nodes),
2908 code=self.ETYPE_WARNING)
2910 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911 pnode = inst_config.primary_node
2912 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913 instance_groups = {}
2915 for node in instance_nodes:
2916 instance_groups.setdefault(self.all_node_info[node].group,
2920 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921 # Sort so that we always list the primary node first.
2922 for group, nodes in sorted(instance_groups.items(),
2923 key=lambda (_, nodes): pnode in nodes,
2926 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927 instance, "instance has primary and secondary nodes in"
2928 " different groups: %s", utils.CommaJoin(pretty_list),
2929 code=self.ETYPE_WARNING)
2931 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932 i_non_a_balanced.append(instance)
2934 for snode in inst_config.secondary_nodes:
2935 s_img = node_image[snode]
2936 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937 "instance %s, connection to secondary node failed", instance)
2940 inst_nodes_offline.append(snode)
2942 # warn that the instance lives on offline nodes
2943 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944 "instance has offline secondary node(s) %s",
2945 utils.CommaJoin(inst_nodes_offline))
2946 # ... or ghost/non-vm_capable nodes
2947 for node in inst_config.all_nodes:
2948 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949 "instance lives on ghost node %s", node)
2950 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951 instance, "instance lives on non-vm_capable node %s", node)
2953 feedback_fn("* Verifying orphan volumes")
2954 reserved = utils.FieldSet(*cluster.reserved_lvs)
2956 # We will get spurious "unknown volume" warnings if any node of this group
2957 # is secondary for an instance whose primary is in another group. To avoid
2958 # them, we find these instances and add their volumes to node_vol_should.
2959 for inst in self.all_inst_info.values():
2960 for secondary in inst.secondary_nodes:
2961 if (secondary in self.my_node_info
2962 and inst.name not in self.my_inst_info):
2963 inst.MapLVsByNode(node_vol_should)
2966 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2968 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969 feedback_fn("* Verifying N+1 Memory redundancy")
2970 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2972 feedback_fn("* Other Notes")
2974 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2975 % len(i_non_redundant))
2977 if i_non_a_balanced:
2978 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2979 % len(i_non_a_balanced))
2982 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2985 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2989 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990 """Analyze the post-hooks' result
2992 This method analyses the hook result, handles it, and sends some
2993 nicely-formatted feedback back to the user.
2995 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997 @param hooks_results: the results of the multi-node hooks rpc call
2998 @param feedback_fn: function used send feedback back to the caller
2999 @param lu_result: previous Exec result
3000 @return: the new Exec result, based on the previous result
3004 # We only really run POST phase hooks, only for non-empty groups,
3005 # and are only interested in their results
3006 if not self.my_node_names:
3009 elif phase == constants.HOOKS_PHASE_POST:
3010 # Used to change hooks' output to proper indentation
3011 feedback_fn("* Hooks Results")
3012 assert hooks_results, "invalid result from hooks"
3014 for node_name in hooks_results:
3015 res = hooks_results[node_name]
3017 test = msg and not res.offline
3018 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019 "Communication failure in hooks execution: %s", msg)
3020 if res.offline or msg:
3021 # No need to investigate payload if node is offline or gave
3024 for script, hkr, output in res.payload:
3025 test = hkr == constants.HKR_FAIL
3026 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027 "Script %s failed, output:", script)
3029 output = self._HOOKS_INDENT_RE.sub(" ", output)
3030 feedback_fn("%s" % output)
3036 class LUClusterVerifyDisks(NoHooksLU):
3037 """Verifies the cluster disks status.
3042 def ExpandNames(self):
3043 self.share_locks = _ShareAll()
3044 self.needed_locks = {
3045 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3048 def Exec(self, feedback_fn):
3049 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3051 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053 for group in group_names])
3056 class LUGroupVerifyDisks(NoHooksLU):
3057 """Verifies the status of all disks in a node group.
3062 def ExpandNames(self):
3063 # Raises errors.OpPrereqError on its own if group can't be found
3064 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3066 self.share_locks = _ShareAll()
3067 self.needed_locks = {
3068 locking.LEVEL_INSTANCE: [],
3069 locking.LEVEL_NODEGROUP: [],
3070 locking.LEVEL_NODE: [],
3073 def DeclareLocks(self, level):
3074 if level == locking.LEVEL_INSTANCE:
3075 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3077 # Lock instances optimistically, needs verification once node and group
3078 # locks have been acquired
3079 self.needed_locks[locking.LEVEL_INSTANCE] = \
3080 self.cfg.GetNodeGroupInstances(self.group_uuid)
3082 elif level == locking.LEVEL_NODEGROUP:
3083 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3085 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086 set([self.group_uuid] +
3087 # Lock all groups used by instances optimistically; this requires
3088 # going via the node before it's locked, requiring verification
3091 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3094 elif level == locking.LEVEL_NODE:
3095 # This will only lock the nodes in the group to be verified which contain
3097 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098 self._LockInstancesNodes()
3100 # Lock all nodes in group to be verified
3101 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3105 def CheckPrereq(self):
3106 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3110 assert self.group_uuid in owned_groups
3112 # Check if locked instances are still correct
3113 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3115 # Get instance information
3116 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3118 # Check if node groups for locked instances are still correct
3119 for (instance_name, inst) in self.instances.items():
3120 assert owned_nodes.issuperset(inst.all_nodes), \
3121 "Instance %s's nodes changed while we kept the lock" % instance_name
3123 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3126 assert self.group_uuid in inst_groups, \
3127 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3129 def Exec(self, feedback_fn):
3130 """Verify integrity of cluster disks.
3132 @rtype: tuple of three items
3133 @return: a tuple of (dict of node-to-node_error, list of instances
3134 which need activate-disks, dict of instance: (node, volume) for
3139 res_instances = set()
3142 nv_dict = _MapInstanceDisksToNodes([inst
3143 for inst in self.instances.values()
3147 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148 set(self.cfg.GetVmCapableNodeList()))
3150 node_lvs = self.rpc.call_lv_list(nodes, [])
3152 for (node, node_res) in node_lvs.items():
3153 if node_res.offline:
3156 msg = node_res.fail_msg
3158 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159 res_nodes[node] = msg
3162 for lv_name, (_, _, lv_online) in node_res.payload.items():
3163 inst = nv_dict.pop((node, lv_name), None)
3164 if not (lv_online or inst is None):
3165 res_instances.add(inst)
3167 # any leftover items in nv_dict are missing LVs, let's arrange the data
3169 for key, inst in nv_dict.iteritems():
3170 res_missing.setdefault(inst, []).append(key)
3172 return (res_nodes, list(res_instances), res_missing)
3175 class LUClusterRepairDiskSizes(NoHooksLU):
3176 """Verifies the cluster disks sizes.
3181 def ExpandNames(self):
3182 if self.op.instances:
3183 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184 self.needed_locks = {
3185 locking.LEVEL_NODE: [],
3186 locking.LEVEL_INSTANCE: self.wanted_names,
3188 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3190 self.wanted_names = None
3191 self.needed_locks = {
3192 locking.LEVEL_NODE: locking.ALL_SET,
3193 locking.LEVEL_INSTANCE: locking.ALL_SET,
3195 self.share_locks = _ShareAll()
3197 def DeclareLocks(self, level):
3198 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3199 self._LockInstancesNodes(primary_only=True)
3201 def CheckPrereq(self):
3202 """Check prerequisites.
3204 This only checks the optional instance list against the existing names.
3207 if self.wanted_names is None:
3208 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3210 self.wanted_instances = \
3211 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3213 def _EnsureChildSizes(self, disk):
3214 """Ensure children of the disk have the needed disk size.
3216 This is valid mainly for DRBD8 and fixes an issue where the
3217 children have smaller disk size.
3219 @param disk: an L{ganeti.objects.Disk} object
3222 if disk.dev_type == constants.LD_DRBD8:
3223 assert disk.children, "Empty children for DRBD8?"
3224 fchild = disk.children[0]
3225 mismatch = fchild.size < disk.size
3227 self.LogInfo("Child disk has size %d, parent %d, fixing",
3228 fchild.size, disk.size)
3229 fchild.size = disk.size
3231 # and we recurse on this child only, not on the metadev
3232 return self._EnsureChildSizes(fchild) or mismatch
3236 def Exec(self, feedback_fn):
3237 """Verify the size of cluster disks.
3240 # TODO: check child disks too
3241 # TODO: check differences in size between primary/secondary nodes
3243 for instance in self.wanted_instances:
3244 pnode = instance.primary_node
3245 if pnode not in per_node_disks:
3246 per_node_disks[pnode] = []
3247 for idx, disk in enumerate(instance.disks):
3248 per_node_disks[pnode].append((instance, idx, disk))
3251 for node, dskl in per_node_disks.items():
3252 newl = [v[2].Copy() for v in dskl]
3254 self.cfg.SetDiskID(dsk, node)
3255 result = self.rpc.call_blockdev_getsize(node, newl)
3257 self.LogWarning("Failure in blockdev_getsize call to node"
3258 " %s, ignoring", node)
3260 if len(result.payload) != len(dskl):
3261 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3262 " result.payload=%s", node, len(dskl), result.payload)
3263 self.LogWarning("Invalid result from node %s, ignoring node results",
3266 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3268 self.LogWarning("Disk %d of instance %s did not return size"
3269 " information, ignoring", idx, instance.name)
3271 if not isinstance(size, (int, long)):
3272 self.LogWarning("Disk %d of instance %s did not return valid"
3273 " size information, ignoring", idx, instance.name)
3276 if size != disk.size:
3277 self.LogInfo("Disk %d of instance %s has mismatched size,"
3278 " correcting: recorded %d, actual %d", idx,
3279 instance.name, disk.size, size)
3281 self.cfg.Update(instance, feedback_fn)
3282 changed.append((instance.name, idx, size))
3283 if self._EnsureChildSizes(disk):
3284 self.cfg.Update(instance, feedback_fn)
3285 changed.append((instance.name, idx, disk.size))
3289 class LUClusterRename(LogicalUnit):
3290 """Rename the cluster.
3293 HPATH = "cluster-rename"
3294 HTYPE = constants.HTYPE_CLUSTER
3296 def BuildHooksEnv(self):
3301 "OP_TARGET": self.cfg.GetClusterName(),
3302 "NEW_NAME": self.op.name,
3305 def BuildHooksNodes(self):
3306 """Build hooks nodes.
3309 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3311 def CheckPrereq(self):
3312 """Verify that the passed name is a valid one.
3315 hostname = netutils.GetHostname(name=self.op.name,
3316 family=self.cfg.GetPrimaryIPFamily())
3318 new_name = hostname.name
3319 self.ip = new_ip = hostname.ip
3320 old_name = self.cfg.GetClusterName()
3321 old_ip = self.cfg.GetMasterIP()
3322 if new_name == old_name and new_ip == old_ip:
3323 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3324 " cluster has changed",
3326 if new_ip != old_ip:
3327 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3328 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3329 " reachable on the network" %
3330 new_ip, errors.ECODE_NOTUNIQUE)
3332 self.op.name = new_name
3334 def Exec(self, feedback_fn):
3335 """Rename the cluster.
3338 clustername = self.op.name
3341 # shutdown the master IP
3342 master = self.cfg.GetMasterNode()
3343 result = self.rpc.call_node_deactivate_master_ip(master)
3344 result.Raise("Could not disable the master role")
3347 cluster = self.cfg.GetClusterInfo()
3348 cluster.cluster_name = clustername
3349 cluster.master_ip = ip
3350 self.cfg.Update(cluster, feedback_fn)
3352 # update the known hosts file
3353 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3354 node_list = self.cfg.GetOnlineNodeList()
3356 node_list.remove(master)
3359 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3361 result = self.rpc.call_node_activate_master_ip(master)
3362 msg = result.fail_msg
3364 self.LogWarning("Could not re-enable the master role on"
3365 " the master, please restart manually: %s", msg)
3370 class LUClusterSetParams(LogicalUnit):
3371 """Change the parameters of the cluster.
3374 HPATH = "cluster-modify"
3375 HTYPE = constants.HTYPE_CLUSTER
3378 def CheckArguments(self):
3382 if self.op.uid_pool:
3383 uidpool.CheckUidPool(self.op.uid_pool)
3385 if self.op.add_uids:
3386 uidpool.CheckUidPool(self.op.add_uids)
3388 if self.op.remove_uids:
3389 uidpool.CheckUidPool(self.op.remove_uids)
3391 def ExpandNames(self):
3392 # FIXME: in the future maybe other cluster params won't require checking on
3393 # all nodes to be modified.
3394 self.needed_locks = {
3395 locking.LEVEL_NODE: locking.ALL_SET,
3397 self.share_locks[locking.LEVEL_NODE] = 1
3399 def BuildHooksEnv(self):
3404 "OP_TARGET": self.cfg.GetClusterName(),
3405 "NEW_VG_NAME": self.op.vg_name,
3408 def BuildHooksNodes(self):
3409 """Build hooks nodes.
3412 mn = self.cfg.GetMasterNode()
3415 def CheckPrereq(self):
3416 """Check prerequisites.
3418 This checks whether the given params don't conflict and
3419 if the given volume group is valid.
3422 if self.op.vg_name is not None and not self.op.vg_name:
3423 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3424 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3425 " instances exist", errors.ECODE_INVAL)
3427 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3428 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3429 raise errors.OpPrereqError("Cannot disable drbd helper while"
3430 " drbd-based instances exist",
3433 node_list = self.owned_locks(locking.LEVEL_NODE)
3435 # if vg_name not None, checks given volume group on all nodes
3437 vglist = self.rpc.call_vg_list(node_list)
3438 for node in node_list:
3439 msg = vglist[node].fail_msg
3441 # ignoring down node
3442 self.LogWarning("Error while gathering data on node %s"
3443 " (ignoring node): %s", node, msg)
3445 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3447 constants.MIN_VG_SIZE)
3449 raise errors.OpPrereqError("Error on node '%s': %s" %
3450 (node, vgstatus), errors.ECODE_ENVIRON)
3452 if self.op.drbd_helper:
3453 # checks given drbd helper on all nodes
3454 helpers = self.rpc.call_drbd_helper(node_list)
3455 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3457 self.LogInfo("Not checking drbd helper on offline node %s", node)
3459 msg = helpers[node].fail_msg
3461 raise errors.OpPrereqError("Error checking drbd helper on node"
3462 " '%s': %s" % (node, msg),
3463 errors.ECODE_ENVIRON)
3464 node_helper = helpers[node].payload
3465 if node_helper != self.op.drbd_helper:
3466 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3467 (node, node_helper), errors.ECODE_ENVIRON)
3469 self.cluster = cluster = self.cfg.GetClusterInfo()
3470 # validate params changes
3471 if self.op.beparams:
3472 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3473 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3475 if self.op.ndparams:
3476 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3477 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3479 # TODO: we need a more general way to handle resetting
3480 # cluster-level parameters to default values
3481 if self.new_ndparams["oob_program"] == "":
3482 self.new_ndparams["oob_program"] = \
3483 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3485 if self.op.nicparams:
3486 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3487 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3488 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3491 # check all instances for consistency
3492 for instance in self.cfg.GetAllInstancesInfo().values():
3493 for nic_idx, nic in enumerate(instance.nics):
3494 params_copy = copy.deepcopy(nic.nicparams)
3495 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3497 # check parameter syntax
3499 objects.NIC.CheckParameterSyntax(params_filled)
3500 except errors.ConfigurationError, err:
3501 nic_errors.append("Instance %s, nic/%d: %s" %
3502 (instance.name, nic_idx, err))
3504 # if we're moving instances to routed, check that they have an ip
3505 target_mode = params_filled[constants.NIC_MODE]
3506 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3507 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3508 " address" % (instance.name, nic_idx))
3510 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3511 "\n".join(nic_errors))
3513 # hypervisor list/parameters
3514 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3515 if self.op.hvparams:
3516 for hv_name, hv_dict in self.op.hvparams.items():
3517 if hv_name not in self.new_hvparams:
3518 self.new_hvparams[hv_name] = hv_dict
3520 self.new_hvparams[hv_name].update(hv_dict)
3522 # os hypervisor parameters
3523 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3525 for os_name, hvs in self.op.os_hvp.items():
3526 if os_name not in self.new_os_hvp:
3527 self.new_os_hvp[os_name] = hvs
3529 for hv_name, hv_dict in hvs.items():
3530 if hv_name not in self.new_os_hvp[os_name]:
3531 self.new_os_hvp[os_name][hv_name] = hv_dict
3533 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3536 self.new_osp = objects.FillDict(cluster.osparams, {})
3537 if self.op.osparams:
3538 for os_name, osp in self.op.osparams.items():
3539 if os_name not in self.new_osp:
3540 self.new_osp[os_name] = {}
3542 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3545 if not self.new_osp[os_name]:
3546 # we removed all parameters
3547 del self.new_osp[os_name]
3549 # check the parameter validity (remote check)
3550 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3551 os_name, self.new_osp[os_name])
3553 # changes to the hypervisor list
3554 if self.op.enabled_hypervisors is not None:
3555 self.hv_list = self.op.enabled_hypervisors
3556 for hv in self.hv_list:
3557 # if the hypervisor doesn't already exist in the cluster
3558 # hvparams, we initialize it to empty, and then (in both
3559 # cases) we make sure to fill the defaults, as we might not
3560 # have a complete defaults list if the hypervisor wasn't
3562 if hv not in new_hvp:
3564 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3565 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3567 self.hv_list = cluster.enabled_hypervisors
3569 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3570 # either the enabled list has changed, or the parameters have, validate
3571 for hv_name, hv_params in self.new_hvparams.items():
3572 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3573 (self.op.enabled_hypervisors and
3574 hv_name in self.op.enabled_hypervisors)):
3575 # either this is a new hypervisor, or its parameters have changed
3576 hv_class = hypervisor.GetHypervisor(hv_name)
3577 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3578 hv_class.CheckParameterSyntax(hv_params)
3579 _CheckHVParams(self, node_list, hv_name, hv_params)
3582 # no need to check any newly-enabled hypervisors, since the
3583 # defaults have already been checked in the above code-block
3584 for os_name, os_hvp in self.new_os_hvp.items():
3585 for hv_name, hv_params in os_hvp.items():
3586 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3587 # we need to fill in the new os_hvp on top of the actual hv_p
3588 cluster_defaults = self.new_hvparams.get(hv_name, {})
3589 new_osp = objects.FillDict(cluster_defaults, hv_params)
3590 hv_class = hypervisor.GetHypervisor(hv_name)
3591 hv_class.CheckParameterSyntax(new_osp)
3592 _CheckHVParams(self, node_list, hv_name, new_osp)
3594 if self.op.default_iallocator:
3595 alloc_script = utils.FindFile(self.op.default_iallocator,
3596 constants.IALLOCATOR_SEARCH_PATH,
3598 if alloc_script is None:
3599 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3600 " specified" % self.op.default_iallocator,
3603 def Exec(self, feedback_fn):
3604 """Change the parameters of the cluster.
3607 if self.op.vg_name is not None:
3608 new_volume = self.op.vg_name
3611 if new_volume != self.cfg.GetVGName():
3612 self.cfg.SetVGName(new_volume)
3614 feedback_fn("Cluster LVM configuration already in desired"
3615 " state, not changing")
3616 if self.op.drbd_helper is not None:
3617 new_helper = self.op.drbd_helper
3620 if new_helper != self.cfg.GetDRBDHelper():
3621 self.cfg.SetDRBDHelper(new_helper)
3623 feedback_fn("Cluster DRBD helper already in desired state,"
3625 if self.op.hvparams:
3626 self.cluster.hvparams = self.new_hvparams
3628 self.cluster.os_hvp = self.new_os_hvp
3629 if self.op.enabled_hypervisors is not None:
3630 self.cluster.hvparams = self.new_hvparams
3631 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3632 if self.op.beparams:
3633 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3634 if self.op.nicparams:
3635 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3636 if self.op.osparams:
3637 self.cluster.osparams = self.new_osp
3638 if self.op.ndparams:
3639 self.cluster.ndparams = self.new_ndparams
3641 if self.op.candidate_pool_size is not None:
3642 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3643 # we need to update the pool size here, otherwise the save will fail
3644 _AdjustCandidatePool(self, [])
3646 if self.op.maintain_node_health is not None:
3647 self.cluster.maintain_node_health = self.op.maintain_node_health
3649 if self.op.prealloc_wipe_disks is not None:
3650 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3652 if self.op.add_uids is not None:
3653 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3655 if self.op.remove_uids is not None:
3656 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3658 if self.op.uid_pool is not None:
3659 self.cluster.uid_pool = self.op.uid_pool
3661 if self.op.default_iallocator is not None:
3662 self.cluster.default_iallocator = self.op.default_iallocator
3664 if self.op.reserved_lvs is not None:
3665 self.cluster.reserved_lvs = self.op.reserved_lvs
3667 def helper_os(aname, mods, desc):
3669 lst = getattr(self.cluster, aname)
3670 for key, val in mods:
3671 if key == constants.DDM_ADD:
3673 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3676 elif key == constants.DDM_REMOVE:
3680 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3682 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3684 if self.op.hidden_os:
3685 helper_os("hidden_os", self.op.hidden_os, "hidden")
3687 if self.op.blacklisted_os:
3688 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3690 if self.op.master_netdev:
3691 master = self.cfg.GetMasterNode()
3692 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3693 self.cluster.master_netdev)
3694 result = self.rpc.call_node_deactivate_master_ip(master)
3695 result.Raise("Could not disable the master ip")
3696 feedback_fn("Changing master_netdev from %s to %s" %
3697 (self.cluster.master_netdev, self.op.master_netdev))
3698 self.cluster.master_netdev = self.op.master_netdev
3700 self.cfg.Update(self.cluster, feedback_fn)
3702 if self.op.master_netdev:
3703 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3704 self.op.master_netdev)
3705 result = self.rpc.call_node_activate_master_ip(master)
3707 self.LogWarning("Could not re-enable the master ip on"
3708 " the master, please restart manually: %s",
3712 def _UploadHelper(lu, nodes, fname):
3713 """Helper for uploading a file and showing warnings.
3716 if os.path.exists(fname):
3717 result = lu.rpc.call_upload_file(nodes, fname)
3718 for to_node, to_result in result.items():
3719 msg = to_result.fail_msg
3721 msg = ("Copy of file %s to node %s failed: %s" %
3722 (fname, to_node, msg))
3723 lu.proc.LogWarning(msg)
3726 def _ComputeAncillaryFiles(cluster, redist):
3727 """Compute files external to Ganeti which need to be consistent.
3729 @type redist: boolean
3730 @param redist: Whether to include files which need to be redistributed
3733 # Compute files for all nodes
3735 constants.SSH_KNOWN_HOSTS_FILE,
3736 constants.CONFD_HMAC_KEY,
3737 constants.CLUSTER_DOMAIN_SECRET_FILE,
3741 files_all.update(constants.ALL_CERT_FILES)
3742 files_all.update(ssconf.SimpleStore().GetFileList())
3744 # we need to ship at least the RAPI certificate
3745 files_all.add(constants.RAPI_CERT_FILE)
3747 if cluster.modify_etc_hosts:
3748 files_all.add(constants.ETC_HOSTS)
3750 # Files which must either exist on all nodes or on none
3751 files_all_opt = set([
3752 constants.RAPI_USERS_FILE,
3755 # Files which should only be on master candidates
3758 files_mc.add(constants.CLUSTER_CONF_FILE)
3760 # Files which should only be on VM-capable nodes
3761 files_vm = set(filename
3762 for hv_name in cluster.enabled_hypervisors
3763 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3765 # Filenames must be unique
3766 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3767 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3768 "Found file listed in more than one file list"
3770 return (files_all, files_all_opt, files_mc, files_vm)
3773 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3774 """Distribute additional files which are part of the cluster configuration.
3776 ConfigWriter takes care of distributing the config and ssconf files, but
3777 there are more files which should be distributed to all nodes. This function
3778 makes sure those are copied.
3780 @param lu: calling logical unit
3781 @param additional_nodes: list of nodes not in the config to distribute to
3782 @type additional_vm: boolean
3783 @param additional_vm: whether the additional nodes are vm-capable or not
3786 # Gather target nodes
3787 cluster = lu.cfg.GetClusterInfo()
3788 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3790 online_nodes = lu.cfg.GetOnlineNodeList()
3791 vm_nodes = lu.cfg.GetVmCapableNodeList()
3793 if additional_nodes is not None:
3794 online_nodes.extend(additional_nodes)
3796 vm_nodes.extend(additional_nodes)
3798 # Never distribute to master node
3799 for nodelist in [online_nodes, vm_nodes]:
3800 if master_info.name in nodelist:
3801 nodelist.remove(master_info.name)
3804 (files_all, files_all_opt, files_mc, files_vm) = \
3805 _ComputeAncillaryFiles(cluster, True)
3807 # Never re-distribute configuration file from here
3808 assert not (constants.CLUSTER_CONF_FILE in files_all or
3809 constants.CLUSTER_CONF_FILE in files_vm)
3810 assert not files_mc, "Master candidates not handled in this function"
3813 (online_nodes, files_all),
3814 (online_nodes, files_all_opt),
3815 (vm_nodes, files_vm),
3819 for (node_list, files) in filemap:
3821 _UploadHelper(lu, node_list, fname)
3824 class LUClusterRedistConf(NoHooksLU):
3825 """Force the redistribution of cluster configuration.
3827 This is a very simple LU.
3832 def ExpandNames(self):
3833 self.needed_locks = {
3834 locking.LEVEL_NODE: locking.ALL_SET,
3836 self.share_locks[locking.LEVEL_NODE] = 1
3838 def Exec(self, feedback_fn):
3839 """Redistribute the configuration.
3842 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3843 _RedistributeAncillaryFiles(self)
3846 class LUClusterActivateMasterIp(NoHooksLU):
3847 """Activate the master IP on the master node.
3850 def Exec(self, feedback_fn):
3851 """Activate the master IP.
3854 master = self.cfg.GetMasterNode()
3855 self.rpc.call_node_activate_master_ip(master)
3858 class LUClusterDeactivateMasterIp(NoHooksLU):
3859 """Deactivate the master IP on the master node.
3862 def Exec(self, feedback_fn):
3863 """Deactivate the master IP.
3866 master = self.cfg.GetMasterNode()
3867 self.rpc.call_node_deactivate_master_ip(master)
3870 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3871 """Sleep and poll for an instance's disk to sync.
3874 if not instance.disks or disks is not None and not disks:
3877 disks = _ExpandCheckDisks(instance, disks)
3880 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3882 node = instance.primary_node
3885 lu.cfg.SetDiskID(dev, node)
3887 # TODO: Convert to utils.Retry
3890 degr_retries = 10 # in seconds, as we sleep 1 second each time
3894 cumul_degraded = False
3895 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3896 msg = rstats.fail_msg
3898 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3901 raise errors.RemoteError("Can't contact node %s for mirror data,"
3902 " aborting." % node)
3905 rstats = rstats.payload
3907 for i, mstat in enumerate(rstats):
3909 lu.LogWarning("Can't compute data for node %s/%s",
3910 node, disks[i].iv_name)
3913 cumul_degraded = (cumul_degraded or
3914 (mstat.is_degraded and mstat.sync_percent is None))
3915 if mstat.sync_percent is not None:
3917 if mstat.estimated_time is not None:
3918 rem_time = ("%s remaining (estimated)" %
3919 utils.FormatSeconds(mstat.estimated_time))
3920 max_time = mstat.estimated_time
3922 rem_time = "no time estimate"
3923 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3924 (disks[i].iv_name, mstat.sync_percent, rem_time))
3926 # if we're done but degraded, let's do a few small retries, to
3927 # make sure we see a stable and not transient situation; therefore
3928 # we force restart of the loop
3929 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3930 logging.info("Degraded disks found, %d retries left", degr_retries)
3938 time.sleep(min(60, max_time))
3941 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3942 return not cumul_degraded
3945 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3946 """Check that mirrors are not degraded.
3948 The ldisk parameter, if True, will change the test from the
3949 is_degraded attribute (which represents overall non-ok status for
3950 the device(s)) to the ldisk (representing the local storage status).
3953 lu.cfg.SetDiskID(dev, node)
3957 if on_primary or dev.AssembleOnSecondary():
3958 rstats = lu.rpc.call_blockdev_find(node, dev)
3959 msg = rstats.fail_msg
3961 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3963 elif not rstats.payload:
3964 lu.LogWarning("Can't find disk on node %s", node)
3968 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3970 result = result and not rstats.payload.is_degraded
3973 for child in dev.children:
3974 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3979 class LUOobCommand(NoHooksLU):
3980 """Logical unit for OOB handling.
3984 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3986 def ExpandNames(self):
3987 """Gather locks we need.
3990 if self.op.node_names:
3991 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3992 lock_names = self.op.node_names
3994 lock_names = locking.ALL_SET
3996 self.needed_locks = {
3997 locking.LEVEL_NODE: lock_names,
4000 def CheckPrereq(self):
4001 """Check prerequisites.
4004 - the node exists in the configuration
4007 Any errors are signaled by raising errors.OpPrereqError.
4011 self.master_node = self.cfg.GetMasterNode()
4013 assert self.op.power_delay >= 0.0
4015 if self.op.node_names:
4016 if (self.op.command in self._SKIP_MASTER and
4017 self.master_node in self.op.node_names):
4018 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4019 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4021 if master_oob_handler:
4022 additional_text = ("run '%s %s %s' if you want to operate on the"
4023 " master regardless") % (master_oob_handler,
4027 additional_text = "it does not support out-of-band operations"
4029 raise errors.OpPrereqError(("Operating on the master node %s is not"
4030 " allowed for %s; %s") %
4031 (self.master_node, self.op.command,
4032 additional_text), errors.ECODE_INVAL)
4034 self.op.node_names = self.cfg.GetNodeList()
4035 if self.op.command in self._SKIP_MASTER:
4036 self.op.node_names.remove(self.master_node)
4038 if self.op.command in self._SKIP_MASTER:
4039 assert self.master_node not in self.op.node_names
4041 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4043 raise errors.OpPrereqError("Node %s not found" % node_name,
4046 self.nodes.append(node)
4048 if (not self.op.ignore_status and
4049 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4050 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4051 " not marked offline") % node_name,
4054 def Exec(self, feedback_fn):
4055 """Execute OOB and return result if we expect any.
4058 master_node = self.master_node
4061 for idx, node in enumerate(utils.NiceSort(self.nodes,
4062 key=lambda node: node.name)):
4063 node_entry = [(constants.RS_NORMAL, node.name)]
4064 ret.append(node_entry)
4066 oob_program = _SupportsOob(self.cfg, node)
4069 node_entry.append((constants.RS_UNAVAIL, None))
4072 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4073 self.op.command, oob_program, node.name)
4074 result = self.rpc.call_run_oob(master_node, oob_program,
4075 self.op.command, node.name,
4079 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4080 node.name, result.fail_msg)
4081 node_entry.append((constants.RS_NODATA, None))
4084 self._CheckPayload(result)
4085 except errors.OpExecError, err:
4086 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4088 node_entry.append((constants.RS_NODATA, None))
4090 if self.op.command == constants.OOB_HEALTH:
4091 # For health we should log important events
4092 for item, status in result.payload:
4093 if status in [constants.OOB_STATUS_WARNING,
4094 constants.OOB_STATUS_CRITICAL]:
4095 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4096 item, node.name, status)
4098 if self.op.command == constants.OOB_POWER_ON:
4100 elif self.op.command == constants.OOB_POWER_OFF:
4101 node.powered = False
4102 elif self.op.command == constants.OOB_POWER_STATUS:
4103 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4104 if powered != node.powered:
4105 logging.warning(("Recorded power state (%s) of node '%s' does not"
4106 " match actual power state (%s)"), node.powered,
4109 # For configuration changing commands we should update the node
4110 if self.op.command in (constants.OOB_POWER_ON,
4111 constants.OOB_POWER_OFF):
4112 self.cfg.Update(node, feedback_fn)
4114 node_entry.append((constants.RS_NORMAL, result.payload))
4116 if (self.op.command == constants.OOB_POWER_ON and
4117 idx < len(self.nodes) - 1):
4118 time.sleep(self.op.power_delay)
4122 def _CheckPayload(self, result):
4123 """Checks if the payload is valid.
4125 @param result: RPC result
4126 @raises errors.OpExecError: If payload is not valid
4130 if self.op.command == constants.OOB_HEALTH:
4131 if not isinstance(result.payload, list):
4132 errs.append("command 'health' is expected to return a list but got %s" %
4133 type(result.payload))
4135 for item, status in result.payload:
4136 if status not in constants.OOB_STATUSES:
4137 errs.append("health item '%s' has invalid status '%s'" %
4140 if self.op.command == constants.OOB_POWER_STATUS:
4141 if not isinstance(result.payload, dict):
4142 errs.append("power-status is expected to return a dict but got %s" %
4143 type(result.payload))
4145 if self.op.command in [
4146 constants.OOB_POWER_ON,
4147 constants.OOB_POWER_OFF,
4148 constants.OOB_POWER_CYCLE,
4150 if result.payload is not None:
4151 errs.append("%s is expected to not return payload but got '%s'" %
4152 (self.op.command, result.payload))
4155 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4156 utils.CommaJoin(errs))
4159 class _OsQuery(_QueryBase):
4160 FIELDS = query.OS_FIELDS
4162 def ExpandNames(self, lu):
4163 # Lock all nodes in shared mode
4164 # Temporary removal of locks, should be reverted later
4165 # TODO: reintroduce locks when they are lighter-weight
4166 lu.needed_locks = {}
4167 #self.share_locks[locking.LEVEL_NODE] = 1
4168 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4170 # The following variables interact with _QueryBase._GetNames
4172 self.wanted = self.names
4174 self.wanted = locking.ALL_SET
4176 self.do_locking = self.use_locking
4178 def DeclareLocks(self, lu, level):
4182 def _DiagnoseByOS(rlist):
4183 """Remaps a per-node return list into an a per-os per-node dictionary
4185 @param rlist: a map with node names as keys and OS objects as values
4188 @return: a dictionary with osnames as keys and as value another
4189 map, with nodes as keys and tuples of (path, status, diagnose,
4190 variants, parameters, api_versions) as values, eg::
4192 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4193 (/srv/..., False, "invalid api")],
4194 "node2": [(/srv/..., True, "", [], [])]}
4199 # we build here the list of nodes that didn't fail the RPC (at RPC
4200 # level), so that nodes with a non-responding node daemon don't
4201 # make all OSes invalid
4202 good_nodes = [node_name for node_name in rlist
4203 if not rlist[node_name].fail_msg]
4204 for node_name, nr in rlist.items():
4205 if nr.fail_msg or not nr.payload:
4207 for (name, path, status, diagnose, variants,
4208 params, api_versions) in nr.payload:
4209 if name not in all_os:
4210 # build a list of nodes for this os containing empty lists
4211 # for each node in node_list
4213 for nname in good_nodes:
4214 all_os[name][nname] = []
4215 # convert params from [name, help] to (name, help)
4216 params = [tuple(v) for v in params]
4217 all_os[name][node_name].append((path, status, diagnose,
4218 variants, params, api_versions))
4221 def _GetQueryData(self, lu):
4222 """Computes the list of nodes and their attributes.
4225 # Locking is not used
4226 assert not (compat.any(lu.glm.is_owned(level)
4227 for level in locking.LEVELS
4228 if level != locking.LEVEL_CLUSTER) or
4229 self.do_locking or self.use_locking)
4231 valid_nodes = [node.name
4232 for node in lu.cfg.GetAllNodesInfo().values()
4233 if not node.offline and node.vm_capable]
4234 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4235 cluster = lu.cfg.GetClusterInfo()
4239 for (os_name, os_data) in pol.items():
4240 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4241 hidden=(os_name in cluster.hidden_os),
4242 blacklisted=(os_name in cluster.blacklisted_os))
4246 api_versions = set()
4248 for idx, osl in enumerate(os_data.values()):
4249 info.valid = bool(info.valid and osl and osl[0][1])
4253 (node_variants, node_params, node_api) = osl[0][3:6]
4256 variants.update(node_variants)
4257 parameters.update(node_params)
4258 api_versions.update(node_api)
4260 # Filter out inconsistent values
4261 variants.intersection_update(node_variants)
4262 parameters.intersection_update(node_params)
4263 api_versions.intersection_update(node_api)
4265 info.variants = list(variants)
4266 info.parameters = list(parameters)
4267 info.api_versions = list(api_versions)
4269 data[os_name] = info
4271 # Prepare data in requested order
4272 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4276 class LUOsDiagnose(NoHooksLU):
4277 """Logical unit for OS diagnose/query.
4283 def _BuildFilter(fields, names):
4284 """Builds a filter for querying OSes.
4287 name_filter = qlang.MakeSimpleFilter("name", names)
4289 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4290 # respective field is not requested
4291 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4292 for fname in ["hidden", "blacklisted"]
4293 if fname not in fields]
4294 if "valid" not in fields:
4295 status_filter.append([qlang.OP_TRUE, "valid"])
4298 status_filter.insert(0, qlang.OP_AND)
4300 status_filter = None
4302 if name_filter and status_filter:
4303 return [qlang.OP_AND, name_filter, status_filter]
4307 return status_filter
4309 def CheckArguments(self):
4310 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4311 self.op.output_fields, False)
4313 def ExpandNames(self):
4314 self.oq.ExpandNames(self)
4316 def Exec(self, feedback_fn):
4317 return self.oq.OldStyleQuery(self)
4320 class LUNodeRemove(LogicalUnit):
4321 """Logical unit for removing a node.
4324 HPATH = "node-remove"
4325 HTYPE = constants.HTYPE_NODE
4327 def BuildHooksEnv(self):
4330 This doesn't run on the target node in the pre phase as a failed
4331 node would then be impossible to remove.
4335 "OP_TARGET": self.op.node_name,
4336 "NODE_NAME": self.op.node_name,
4339 def BuildHooksNodes(self):
4340 """Build hooks nodes.
4343 all_nodes = self.cfg.GetNodeList()
4345 all_nodes.remove(self.op.node_name)
4347 logging.warning("Node '%s', which is about to be removed, was not found"
4348 " in the list of all nodes", self.op.node_name)
4349 return (all_nodes, all_nodes)
4351 def CheckPrereq(self):
4352 """Check prerequisites.
4355 - the node exists in the configuration
4356 - it does not have primary or secondary instances
4357 - it's not the master
4359 Any errors are signaled by raising errors.OpPrereqError.
4362 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4363 node = self.cfg.GetNodeInfo(self.op.node_name)
4364 assert node is not None
4366 masternode = self.cfg.GetMasterNode()
4367 if node.name == masternode:
4368 raise errors.OpPrereqError("Node is the master node, failover to another"
4369 " node is required", errors.ECODE_INVAL)
4371 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4372 if node.name in instance.all_nodes:
4373 raise errors.OpPrereqError("Instance %s is still running on the node,"
4374 " please remove first" % instance_name,
4376 self.op.node_name = node.name
4379 def Exec(self, feedback_fn):
4380 """Removes the node from the cluster.
4384 logging.info("Stopping the node daemon and removing configs from node %s",
4387 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4389 # Promote nodes to master candidate as needed
4390 _AdjustCandidatePool(self, exceptions=[node.name])
4391 self.context.RemoveNode(node.name)
4393 # Run post hooks on the node before it's removed
4394 _RunPostHook(self, node.name)
4396 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4397 msg = result.fail_msg
4399 self.LogWarning("Errors encountered on the remote node while leaving"
4400 " the cluster: %s", msg)
4402 # Remove node from our /etc/hosts
4403 if self.cfg.GetClusterInfo().modify_etc_hosts:
4404 master_node = self.cfg.GetMasterNode()
4405 result = self.rpc.call_etc_hosts_modify(master_node,
4406 constants.ETC_HOSTS_REMOVE,
4408 result.Raise("Can't update hosts file with new host data")
4409 _RedistributeAncillaryFiles(self)
4412 class _NodeQuery(_QueryBase):
4413 FIELDS = query.NODE_FIELDS
4415 def ExpandNames(self, lu):
4416 lu.needed_locks = {}
4417 lu.share_locks = _ShareAll()
4420 self.wanted = _GetWantedNodes(lu, self.names)
4422 self.wanted = locking.ALL_SET
4424 self.do_locking = (self.use_locking and
4425 query.NQ_LIVE in self.requested_data)
4428 # If any non-static field is requested we need to lock the nodes
4429 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4431 def DeclareLocks(self, lu, level):
4434 def _GetQueryData(self, lu):
4435 """Computes the list of nodes and their attributes.
4438 all_info = lu.cfg.GetAllNodesInfo()
4440 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4442 # Gather data as requested
4443 if query.NQ_LIVE in self.requested_data:
4444 # filter out non-vm_capable nodes
4445 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4447 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4448 lu.cfg.GetHypervisorType())
4449 live_data = dict((name, nresult.payload)
4450 for (name, nresult) in node_data.items()
4451 if not nresult.fail_msg and nresult.payload)
4455 if query.NQ_INST in self.requested_data:
4456 node_to_primary = dict([(name, set()) for name in nodenames])
4457 node_to_secondary = dict([(name, set()) for name in nodenames])
4459 inst_data = lu.cfg.GetAllInstancesInfo()
4461 for inst in inst_data.values():
4462 if inst.primary_node in node_to_primary:
4463 node_to_primary[inst.primary_node].add(inst.name)
4464 for secnode in inst.secondary_nodes:
4465 if secnode in node_to_secondary:
4466 node_to_secondary[secnode].add(inst.name)
4468 node_to_primary = None
4469 node_to_secondary = None
4471 if query.NQ_OOB in self.requested_data:
4472 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4473 for name, node in all_info.iteritems())
4477 if query.NQ_GROUP in self.requested_data:
4478 groups = lu.cfg.GetAllNodeGroupsInfo()
4482 return query.NodeQueryData([all_info[name] for name in nodenames],
4483 live_data, lu.cfg.GetMasterNode(),
4484 node_to_primary, node_to_secondary, groups,
4485 oob_support, lu.cfg.GetClusterInfo())
4488 class LUNodeQuery(NoHooksLU):
4489 """Logical unit for querying nodes.
4492 # pylint: disable=W0142
4495 def CheckArguments(self):
4496 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4497 self.op.output_fields, self.op.use_locking)
4499 def ExpandNames(self):
4500 self.nq.ExpandNames(self)
4502 def Exec(self, feedback_fn):
4503 return self.nq.OldStyleQuery(self)
4506 class LUNodeQueryvols(NoHooksLU):
4507 """Logical unit for getting volumes on node(s).
4511 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4512 _FIELDS_STATIC = utils.FieldSet("node")
4514 def CheckArguments(self):
4515 _CheckOutputFields(static=self._FIELDS_STATIC,
4516 dynamic=self._FIELDS_DYNAMIC,
4517 selected=self.op.output_fields)
4519 def ExpandNames(self):
4520 self.needed_locks = {}
4521 self.share_locks[locking.LEVEL_NODE] = 1
4522 if not self.op.nodes:
4523 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4525 self.needed_locks[locking.LEVEL_NODE] = \
4526 _GetWantedNodes(self, self.op.nodes)
4528 def Exec(self, feedback_fn):
4529 """Computes the list of nodes and their attributes.
4532 nodenames = self.owned_locks(locking.LEVEL_NODE)
4533 volumes = self.rpc.call_node_volumes(nodenames)
4535 ilist = self.cfg.GetAllInstancesInfo()
4536 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4539 for node in nodenames:
4540 nresult = volumes[node]
4543 msg = nresult.fail_msg
4545 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4548 node_vols = sorted(nresult.payload,
4549 key=operator.itemgetter("dev"))
4551 for vol in node_vols:
4553 for field in self.op.output_fields:
4556 elif field == "phys":
4560 elif field == "name":
4562 elif field == "size":
4563 val = int(float(vol["size"]))
4564 elif field == "instance":
4565 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4567 raise errors.ParameterError(field)
4568 node_output.append(str(val))
4570 output.append(node_output)
4575 class LUNodeQueryStorage(NoHooksLU):
4576 """Logical unit for getting information on storage units on node(s).
4579 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4582 def CheckArguments(self):
4583 _CheckOutputFields(static=self._FIELDS_STATIC,
4584 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4585 selected=self.op.output_fields)
4587 def ExpandNames(self):
4588 self.needed_locks = {}
4589 self.share_locks[locking.LEVEL_NODE] = 1
4592 self.needed_locks[locking.LEVEL_NODE] = \
4593 _GetWantedNodes(self, self.op.nodes)
4595 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4597 def Exec(self, feedback_fn):
4598 """Computes the list of nodes and their attributes.
4601 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4603 # Always get name to sort by
4604 if constants.SF_NAME in self.op.output_fields:
4605 fields = self.op.output_fields[:]
4607 fields = [constants.SF_NAME] + self.op.output_fields
4609 # Never ask for node or type as it's only known to the LU
4610 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4611 while extra in fields:
4612 fields.remove(extra)
4614 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4615 name_idx = field_idx[constants.SF_NAME]
4617 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4618 data = self.rpc.call_storage_list(self.nodes,
4619 self.op.storage_type, st_args,
4620 self.op.name, fields)
4624 for node in utils.NiceSort(self.nodes):
4625 nresult = data[node]
4629 msg = nresult.fail_msg
4631 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4634 rows = dict([(row[name_idx], row) for row in nresult.payload])
4636 for name in utils.NiceSort(rows.keys()):
4641 for field in self.op.output_fields:
4642 if field == constants.SF_NODE:
4644 elif field == constants.SF_TYPE:
4645 val = self.op.storage_type
4646 elif field in field_idx:
4647 val = row[field_idx[field]]
4649 raise errors.ParameterError(field)
4658 class _InstanceQuery(_QueryBase):
4659 FIELDS = query.INSTANCE_FIELDS
4661 def ExpandNames(self, lu):
4662 lu.needed_locks = {}
4663 lu.share_locks = _ShareAll()
4666 self.wanted = _GetWantedInstances(lu, self.names)
4668 self.wanted = locking.ALL_SET
4670 self.do_locking = (self.use_locking and
4671 query.IQ_LIVE in self.requested_data)
4673 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4674 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4675 lu.needed_locks[locking.LEVEL_NODE] = []
4676 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4678 self.do_grouplocks = (self.do_locking and
4679 query.IQ_NODES in self.requested_data)
4681 def DeclareLocks(self, lu, level):
4683 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4684 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4686 # Lock all groups used by instances optimistically; this requires going
4687 # via the node before it's locked, requiring verification later on
4688 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4690 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4691 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4692 elif level == locking.LEVEL_NODE:
4693 lu._LockInstancesNodes() # pylint: disable=W0212
4696 def _CheckGroupLocks(lu):
4697 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4698 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4700 # Check if node groups for locked instances are still correct
4701 for instance_name in owned_instances:
4702 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4704 def _GetQueryData(self, lu):
4705 """Computes the list of instances and their attributes.
4708 if self.do_grouplocks:
4709 self._CheckGroupLocks(lu)
4711 cluster = lu.cfg.GetClusterInfo()
4712 all_info = lu.cfg.GetAllInstancesInfo()
4714 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4716 instance_list = [all_info[name] for name in instance_names]
4717 nodes = frozenset(itertools.chain(*(inst.all_nodes
4718 for inst in instance_list)))
4719 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4722 wrongnode_inst = set()
4724 # Gather data as requested
4725 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4727 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4729 result = node_data[name]
4731 # offline nodes will be in both lists
4732 assert result.fail_msg
4733 offline_nodes.append(name)
4735 bad_nodes.append(name)
4736 elif result.payload:
4737 for inst in result.payload:
4738 if inst in all_info:
4739 if all_info[inst].primary_node == name:
4740 live_data.update(result.payload)
4742 wrongnode_inst.add(inst)
4744 # orphan instance; we don't list it here as we don't
4745 # handle this case yet in the output of instance listing
4746 logging.warning("Orphan instance '%s' found on node %s",
4748 # else no instance is alive
4752 if query.IQ_DISKUSAGE in self.requested_data:
4753 disk_usage = dict((inst.name,
4754 _ComputeDiskSize(inst.disk_template,
4755 [{constants.IDISK_SIZE: disk.size}
4756 for disk in inst.disks]))
4757 for inst in instance_list)
4761 if query.IQ_CONSOLE in self.requested_data:
4763 for inst in instance_list:
4764 if inst.name in live_data:
4765 # Instance is running
4766 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4768 consinfo[inst.name] = None
4769 assert set(consinfo.keys()) == set(instance_names)
4773 if query.IQ_NODES in self.requested_data:
4774 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4776 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4777 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4778 for uuid in set(map(operator.attrgetter("group"),
4784 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4785 disk_usage, offline_nodes, bad_nodes,
4786 live_data, wrongnode_inst, consinfo,
4790 class LUQuery(NoHooksLU):
4791 """Query for resources/items of a certain kind.
4794 # pylint: disable=W0142
4797 def CheckArguments(self):
4798 qcls = _GetQueryImplementation(self.op.what)
4800 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4802 def ExpandNames(self):
4803 self.impl.ExpandNames(self)
4805 def DeclareLocks(self, level):
4806 self.impl.DeclareLocks(self, level)
4808 def Exec(self, feedback_fn):
4809 return self.impl.NewStyleQuery(self)
4812 class LUQueryFields(NoHooksLU):
4813 """Query for resources/items of a certain kind.
4816 # pylint: disable=W0142
4819 def CheckArguments(self):
4820 self.qcls = _GetQueryImplementation(self.op.what)
4822 def ExpandNames(self):
4823 self.needed_locks = {}
4825 def Exec(self, feedback_fn):
4826 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4829 class LUNodeModifyStorage(NoHooksLU):
4830 """Logical unit for modifying a storage volume on a node.
4835 def CheckArguments(self):
4836 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4838 storage_type = self.op.storage_type
4841 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4843 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4844 " modified" % storage_type,
4847 diff = set(self.op.changes.keys()) - modifiable
4849 raise errors.OpPrereqError("The following fields can not be modified for"
4850 " storage units of type '%s': %r" %
4851 (storage_type, list(diff)),
4854 def ExpandNames(self):
4855 self.needed_locks = {
4856 locking.LEVEL_NODE: self.op.node_name,
4859 def Exec(self, feedback_fn):
4860 """Computes the list of nodes and their attributes.
4863 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4864 result = self.rpc.call_storage_modify(self.op.node_name,
4865 self.op.storage_type, st_args,
4866 self.op.name, self.op.changes)
4867 result.Raise("Failed to modify storage unit '%s' on %s" %
4868 (self.op.name, self.op.node_name))
4871 class LUNodeAdd(LogicalUnit):
4872 """Logical unit for adding node to the cluster.
4876 HTYPE = constants.HTYPE_NODE
4877 _NFLAGS = ["master_capable", "vm_capable"]
4879 def CheckArguments(self):
4880 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4881 # validate/normalize the node name
4882 self.hostname = netutils.GetHostname(name=self.op.node_name,
4883 family=self.primary_ip_family)
4884 self.op.node_name = self.hostname.name
4886 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4887 raise errors.OpPrereqError("Cannot readd the master node",
4890 if self.op.readd and self.op.group:
4891 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4892 " being readded", errors.ECODE_INVAL)
4894 def BuildHooksEnv(self):
4897 This will run on all nodes before, and on all nodes + the new node after.
4901 "OP_TARGET": self.op.node_name,
4902 "NODE_NAME": self.op.node_name,
4903 "NODE_PIP": self.op.primary_ip,
4904 "NODE_SIP": self.op.secondary_ip,
4905 "MASTER_CAPABLE": str(self.op.master_capable),
4906 "VM_CAPABLE": str(self.op.vm_capable),
4909 def BuildHooksNodes(self):
4910 """Build hooks nodes.
4913 # Exclude added node
4914 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4915 post_nodes = pre_nodes + [self.op.node_name, ]
4917 return (pre_nodes, post_nodes)
4919 def CheckPrereq(self):
4920 """Check prerequisites.
4923 - the new node is not already in the config
4925 - its parameters (single/dual homed) matches the cluster
4927 Any errors are signaled by raising errors.OpPrereqError.
4931 hostname = self.hostname
4932 node = hostname.name
4933 primary_ip = self.op.primary_ip = hostname.ip
4934 if self.op.secondary_ip is None:
4935 if self.primary_ip_family == netutils.IP6Address.family:
4936 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4937 " IPv4 address must be given as secondary",
4939 self.op.secondary_ip = primary_ip
4941 secondary_ip = self.op.secondary_ip
4942 if not netutils.IP4Address.IsValid(secondary_ip):
4943 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4944 " address" % secondary_ip, errors.ECODE_INVAL)
4946 node_list = cfg.GetNodeList()
4947 if not self.op.readd and node in node_list:
4948 raise errors.OpPrereqError("Node %s is already in the configuration" %
4949 node, errors.ECODE_EXISTS)
4950 elif self.op.readd and node not in node_list:
4951 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4954 self.changed_primary_ip = False
4956 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4957 if self.op.readd and node == existing_node_name:
4958 if existing_node.secondary_ip != secondary_ip:
4959 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4960 " address configuration as before",
4962 if existing_node.primary_ip != primary_ip:
4963 self.changed_primary_ip = True
4967 if (existing_node.primary_ip == primary_ip or
4968 existing_node.secondary_ip == primary_ip or
4969 existing_node.primary_ip == secondary_ip or
4970 existing_node.secondary_ip == secondary_ip):
4971 raise errors.OpPrereqError("New node ip address(es) conflict with"
4972 " existing node %s" % existing_node.name,
4973 errors.ECODE_NOTUNIQUE)
4975 # After this 'if' block, None is no longer a valid value for the
4976 # _capable op attributes
4978 old_node = self.cfg.GetNodeInfo(node)
4979 assert old_node is not None, "Can't retrieve locked node %s" % node
4980 for attr in self._NFLAGS:
4981 if getattr(self.op, attr) is None:
4982 setattr(self.op, attr, getattr(old_node, attr))
4984 for attr in self._NFLAGS:
4985 if getattr(self.op, attr) is None:
4986 setattr(self.op, attr, True)
4988 if self.op.readd and not self.op.vm_capable:
4989 pri, sec = cfg.GetNodeInstances(node)
4991 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4992 " flag set to false, but it already holds"
4993 " instances" % node,
4996 # check that the type of the node (single versus dual homed) is the
4997 # same as for the master
4998 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4999 master_singlehomed = myself.secondary_ip == myself.primary_ip
5000 newbie_singlehomed = secondary_ip == primary_ip
5001 if master_singlehomed != newbie_singlehomed:
5002 if master_singlehomed:
5003 raise errors.OpPrereqError("The master has no secondary ip but the"
5004 " new node has one",
5007 raise errors.OpPrereqError("The master has a secondary ip but the"
5008 " new node doesn't have one",
5011 # checks reachability
5012 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5013 raise errors.OpPrereqError("Node not reachable by ping",
5014 errors.ECODE_ENVIRON)
5016 if not newbie_singlehomed:
5017 # check reachability from my secondary ip to newbie's secondary ip
5018 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5019 source=myself.secondary_ip):
5020 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5021 " based ping to node daemon port",
5022 errors.ECODE_ENVIRON)
5029 if self.op.master_capable:
5030 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5032 self.master_candidate = False
5035 self.new_node = old_node
5037 node_group = cfg.LookupNodeGroup(self.op.group)
5038 self.new_node = objects.Node(name=node,
5039 primary_ip=primary_ip,
5040 secondary_ip=secondary_ip,
5041 master_candidate=self.master_candidate,
5042 offline=False, drained=False,
5045 if self.op.ndparams:
5046 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5048 def Exec(self, feedback_fn):
5049 """Adds the new node to the cluster.
5052 new_node = self.new_node
5053 node = new_node.name
5055 # We adding a new node so we assume it's powered
5056 new_node.powered = True
5058 # for re-adds, reset the offline/drained/master-candidate flags;
5059 # we need to reset here, otherwise offline would prevent RPC calls
5060 # later in the procedure; this also means that if the re-add
5061 # fails, we are left with a non-offlined, broken node
5063 new_node.drained = new_node.offline = False # pylint: disable=W0201
5064 self.LogInfo("Readding a node, the offline/drained flags were reset")
5065 # if we demote the node, we do cleanup later in the procedure
5066 new_node.master_candidate = self.master_candidate
5067 if self.changed_primary_ip:
5068 new_node.primary_ip = self.op.primary_ip
5070 # copy the master/vm_capable flags
5071 for attr in self._NFLAGS:
5072 setattr(new_node, attr, getattr(self.op, attr))
5074 # notify the user about any possible mc promotion
5075 if new_node.master_candidate:
5076 self.LogInfo("Node will be a master candidate")
5078 if self.op.ndparams:
5079 new_node.ndparams = self.op.ndparams
5081 new_node.ndparams = {}
5083 # check connectivity
5084 result = self.rpc.call_version([node])[node]
5085 result.Raise("Can't get version information from node %s" % node)
5086 if constants.PROTOCOL_VERSION == result.payload:
5087 logging.info("Communication to node %s fine, sw version %s match",
5088 node, result.payload)
5090 raise errors.OpExecError("Version mismatch master version %s,"
5091 " node version %s" %
5092 (constants.PROTOCOL_VERSION, result.payload))
5094 # Add node to our /etc/hosts, and add key to known_hosts
5095 if self.cfg.GetClusterInfo().modify_etc_hosts:
5096 master_node = self.cfg.GetMasterNode()
5097 result = self.rpc.call_etc_hosts_modify(master_node,
5098 constants.ETC_HOSTS_ADD,
5101 result.Raise("Can't update hosts file with new host data")
5103 if new_node.secondary_ip != new_node.primary_ip:
5104 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5107 node_verify_list = [self.cfg.GetMasterNode()]
5108 node_verify_param = {
5109 constants.NV_NODELIST: ([node], {}),
5110 # TODO: do a node-net-test as well?
5113 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5114 self.cfg.GetClusterName())
5115 for verifier in node_verify_list:
5116 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5117 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5119 for failed in nl_payload:
5120 feedback_fn("ssh/hostname verification failed"
5121 " (checking from %s): %s" %
5122 (verifier, nl_payload[failed]))
5123 raise errors.OpExecError("ssh/hostname verification failed")
5126 _RedistributeAncillaryFiles(self)
5127 self.context.ReaddNode(new_node)
5128 # make sure we redistribute the config
5129 self.cfg.Update(new_node, feedback_fn)
5130 # and make sure the new node will not have old files around
5131 if not new_node.master_candidate:
5132 result = self.rpc.call_node_demote_from_mc(new_node.name)
5133 msg = result.fail_msg
5135 self.LogWarning("Node failed to demote itself from master"
5136 " candidate status: %s" % msg)
5138 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5139 additional_vm=self.op.vm_capable)
5140 self.context.AddNode(new_node, self.proc.GetECId())
5143 class LUNodeSetParams(LogicalUnit):
5144 """Modifies the parameters of a node.
5146 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5147 to the node role (as _ROLE_*)
5148 @cvar _R2F: a dictionary from node role to tuples of flags
5149 @cvar _FLAGS: a list of attribute names corresponding to the flags
5152 HPATH = "node-modify"
5153 HTYPE = constants.HTYPE_NODE
5155 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5157 (True, False, False): _ROLE_CANDIDATE,
5158 (False, True, False): _ROLE_DRAINED,
5159 (False, False, True): _ROLE_OFFLINE,
5160 (False, False, False): _ROLE_REGULAR,
5162 _R2F = dict((v, k) for k, v in _F2R.items())
5163 _FLAGS = ["master_candidate", "drained", "offline"]
5165 def CheckArguments(self):
5166 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5167 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5168 self.op.master_capable, self.op.vm_capable,
5169 self.op.secondary_ip, self.op.ndparams]
5170 if all_mods.count(None) == len(all_mods):
5171 raise errors.OpPrereqError("Please pass at least one modification",
5173 if all_mods.count(True) > 1:
5174 raise errors.OpPrereqError("Can't set the node into more than one"
5175 " state at the same time",
5178 # Boolean value that tells us whether we might be demoting from MC
5179 self.might_demote = (self.op.master_candidate == False or
5180 self.op.offline == True or
5181 self.op.drained == True or
5182 self.op.master_capable == False)
5184 if self.op.secondary_ip:
5185 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5186 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5187 " address" % self.op.secondary_ip,
5190 self.lock_all = self.op.auto_promote and self.might_demote
5191 self.lock_instances = self.op.secondary_ip is not None
5193 def ExpandNames(self):
5195 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5197 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5199 if self.lock_instances:
5200 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5202 def DeclareLocks(self, level):
5203 # If we have locked all instances, before waiting to lock nodes, release
5204 # all the ones living on nodes unrelated to the current operation.
5205 if level == locking.LEVEL_NODE and self.lock_instances:
5206 self.affected_instances = []
5207 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5210 # Build list of instances to release
5211 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5212 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5213 if (instance.disk_template in constants.DTS_INT_MIRROR and
5214 self.op.node_name in instance.all_nodes):
5215 instances_keep.append(instance_name)
5216 self.affected_instances.append(instance)
5218 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5220 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5221 set(instances_keep))
5223 def BuildHooksEnv(self):
5226 This runs on the master node.
5230 "OP_TARGET": self.op.node_name,
5231 "MASTER_CANDIDATE": str(self.op.master_candidate),
5232 "OFFLINE": str(self.op.offline),
5233 "DRAINED": str(self.op.drained),
5234 "MASTER_CAPABLE": str(self.op.master_capable),
5235 "VM_CAPABLE": str(self.op.vm_capable),
5238 def BuildHooksNodes(self):
5239 """Build hooks nodes.
5242 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5245 def CheckPrereq(self):
5246 """Check prerequisites.
5248 This only checks the instance list against the existing names.
5251 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5253 if (self.op.master_candidate is not None or
5254 self.op.drained is not None or
5255 self.op.offline is not None):
5256 # we can't change the master's node flags
5257 if self.op.node_name == self.cfg.GetMasterNode():
5258 raise errors.OpPrereqError("The master role can be changed"
5259 " only via master-failover",
5262 if self.op.master_candidate and not node.master_capable:
5263 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5264 " it a master candidate" % node.name,
5267 if self.op.vm_capable == False:
5268 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5270 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5271 " the vm_capable flag" % node.name,
5274 if node.master_candidate and self.might_demote and not self.lock_all:
5275 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5276 # check if after removing the current node, we're missing master
5278 (mc_remaining, mc_should, _) = \
5279 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5280 if mc_remaining < mc_should:
5281 raise errors.OpPrereqError("Not enough master candidates, please"
5282 " pass auto promote option to allow"
5283 " promotion", errors.ECODE_STATE)
5285 self.old_flags = old_flags = (node.master_candidate,
5286 node.drained, node.offline)
5287 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5288 self.old_role = old_role = self._F2R[old_flags]
5290 # Check for ineffective changes
5291 for attr in self._FLAGS:
5292 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5293 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5294 setattr(self.op, attr, None)
5296 # Past this point, any flag change to False means a transition
5297 # away from the respective state, as only real changes are kept
5299 # TODO: We might query the real power state if it supports OOB
5300 if _SupportsOob(self.cfg, node):
5301 if self.op.offline is False and not (node.powered or
5302 self.op.powered == True):
5303 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5304 " offline status can be reset") %
5306 elif self.op.powered is not None:
5307 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5308 " as it does not support out-of-band"
5309 " handling") % self.op.node_name)
5311 # If we're being deofflined/drained, we'll MC ourself if needed
5312 if (self.op.drained == False or self.op.offline == False or
5313 (self.op.master_capable and not node.master_capable)):
5314 if _DecideSelfPromotion(self):
5315 self.op.master_candidate = True
5316 self.LogInfo("Auto-promoting node to master candidate")
5318 # If we're no longer master capable, we'll demote ourselves from MC
5319 if self.op.master_capable == False and node.master_candidate:
5320 self.LogInfo("Demoting from master candidate")
5321 self.op.master_candidate = False
5324 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5325 if self.op.master_candidate:
5326 new_role = self._ROLE_CANDIDATE
5327 elif self.op.drained:
5328 new_role = self._ROLE_DRAINED
5329 elif self.op.offline:
5330 new_role = self._ROLE_OFFLINE
5331 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5332 # False is still in new flags, which means we're un-setting (the
5334 new_role = self._ROLE_REGULAR
5335 else: # no new flags, nothing, keep old role
5338 self.new_role = new_role
5340 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5341 # Trying to transition out of offline status
5342 result = self.rpc.call_version([node.name])[node.name]
5344 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5345 " to report its version: %s" %
5346 (node.name, result.fail_msg),
5349 self.LogWarning("Transitioning node from offline to online state"
5350 " without using re-add. Please make sure the node"
5353 if self.op.secondary_ip:
5354 # Ok even without locking, because this can't be changed by any LU
5355 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5356 master_singlehomed = master.secondary_ip == master.primary_ip
5357 if master_singlehomed and self.op.secondary_ip:
5358 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5359 " homed cluster", errors.ECODE_INVAL)
5362 if self.affected_instances:
5363 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5364 " node has instances (%s) configured"
5365 " to use it" % self.affected_instances)
5367 # On online nodes, check that no instances are running, and that
5368 # the node has the new ip and we can reach it.
5369 for instance in self.affected_instances:
5370 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5372 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5373 if master.name != node.name:
5374 # check reachability from master secondary ip to new secondary ip
5375 if not netutils.TcpPing(self.op.secondary_ip,
5376 constants.DEFAULT_NODED_PORT,
5377 source=master.secondary_ip):
5378 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5379 " based ping to node daemon port",
5380 errors.ECODE_ENVIRON)
5382 if self.op.ndparams:
5383 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5384 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5385 self.new_ndparams = new_ndparams
5387 def Exec(self, feedback_fn):
5392 old_role = self.old_role
5393 new_role = self.new_role
5397 if self.op.ndparams:
5398 node.ndparams = self.new_ndparams
5400 if self.op.powered is not None:
5401 node.powered = self.op.powered
5403 for attr in ["master_capable", "vm_capable"]:
5404 val = getattr(self.op, attr)
5406 setattr(node, attr, val)
5407 result.append((attr, str(val)))
5409 if new_role != old_role:
5410 # Tell the node to demote itself, if no longer MC and not offline
5411 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5412 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5414 self.LogWarning("Node failed to demote itself: %s", msg)
5416 new_flags = self._R2F[new_role]
5417 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5419 result.append((desc, str(nf)))
5420 (node.master_candidate, node.drained, node.offline) = new_flags
5422 # we locked all nodes, we adjust the CP before updating this node
5424 _AdjustCandidatePool(self, [node.name])
5426 if self.op.secondary_ip:
5427 node.secondary_ip = self.op.secondary_ip
5428 result.append(("secondary_ip", self.op.secondary_ip))
5430 # this will trigger configuration file update, if needed
5431 self.cfg.Update(node, feedback_fn)
5433 # this will trigger job queue propagation or cleanup if the mc
5435 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5436 self.context.ReaddNode(node)
5441 class LUNodePowercycle(NoHooksLU):
5442 """Powercycles a node.
5447 def CheckArguments(self):
5448 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5449 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5450 raise errors.OpPrereqError("The node is the master and the force"
5451 " parameter was not set",
5454 def ExpandNames(self):
5455 """Locking for PowercycleNode.
5457 This is a last-resort option and shouldn't block on other
5458 jobs. Therefore, we grab no locks.
5461 self.needed_locks = {}
5463 def Exec(self, feedback_fn):
5467 result = self.rpc.call_node_powercycle(self.op.node_name,
5468 self.cfg.GetHypervisorType())
5469 result.Raise("Failed to schedule the reboot")
5470 return result.payload
5473 class LUClusterQuery(NoHooksLU):
5474 """Query cluster configuration.
5479 def ExpandNames(self):
5480 self.needed_locks = {}
5482 def Exec(self, feedback_fn):
5483 """Return cluster config.
5486 cluster = self.cfg.GetClusterInfo()
5489 # Filter just for enabled hypervisors
5490 for os_name, hv_dict in cluster.os_hvp.items():
5491 os_hvp[os_name] = {}
5492 for hv_name, hv_params in hv_dict.items():
5493 if hv_name in cluster.enabled_hypervisors:
5494 os_hvp[os_name][hv_name] = hv_params
5496 # Convert ip_family to ip_version
5497 primary_ip_version = constants.IP4_VERSION
5498 if cluster.primary_ip_family == netutils.IP6Address.family:
5499 primary_ip_version = constants.IP6_VERSION
5502 "software_version": constants.RELEASE_VERSION,
5503 "protocol_version": constants.PROTOCOL_VERSION,
5504 "config_version": constants.CONFIG_VERSION,
5505 "os_api_version": max(constants.OS_API_VERSIONS),
5506 "export_version": constants.EXPORT_VERSION,
5507 "architecture": (platform.architecture()[0], platform.machine()),
5508 "name": cluster.cluster_name,
5509 "master": cluster.master_node,
5510 "default_hypervisor": cluster.enabled_hypervisors[0],
5511 "enabled_hypervisors": cluster.enabled_hypervisors,
5512 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5513 for hypervisor_name in cluster.enabled_hypervisors]),
5515 "beparams": cluster.beparams,
5516 "osparams": cluster.osparams,
5517 "nicparams": cluster.nicparams,
5518 "ndparams": cluster.ndparams,
5519 "candidate_pool_size": cluster.candidate_pool_size,
5520 "master_netdev": cluster.master_netdev,
5521 "volume_group_name": cluster.volume_group_name,
5522 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5523 "file_storage_dir": cluster.file_storage_dir,
5524 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5525 "maintain_node_health": cluster.maintain_node_health,
5526 "ctime": cluster.ctime,
5527 "mtime": cluster.mtime,
5528 "uuid": cluster.uuid,
5529 "tags": list(cluster.GetTags()),
5530 "uid_pool": cluster.uid_pool,
5531 "default_iallocator": cluster.default_iallocator,
5532 "reserved_lvs": cluster.reserved_lvs,
5533 "primary_ip_version": primary_ip_version,
5534 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5535 "hidden_os": cluster.hidden_os,
5536 "blacklisted_os": cluster.blacklisted_os,
5542 class LUClusterConfigQuery(NoHooksLU):
5543 """Return configuration values.
5547 _FIELDS_DYNAMIC = utils.FieldSet()
5548 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5549 "watcher_pause", "volume_group_name")
5551 def CheckArguments(self):
5552 _CheckOutputFields(static=self._FIELDS_STATIC,
5553 dynamic=self._FIELDS_DYNAMIC,
5554 selected=self.op.output_fields)
5556 def ExpandNames(self):
5557 self.needed_locks = {}
5559 def Exec(self, feedback_fn):
5560 """Dump a representation of the cluster config to the standard output.
5564 for field in self.op.output_fields:
5565 if field == "cluster_name":
5566 entry = self.cfg.GetClusterName()
5567 elif field == "master_node":
5568 entry = self.cfg.GetMasterNode()
5569 elif field == "drain_flag":
5570 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5571 elif field == "watcher_pause":
5572 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5573 elif field == "volume_group_name":
5574 entry = self.cfg.GetVGName()
5576 raise errors.ParameterError(field)
5577 values.append(entry)
5581 class LUInstanceActivateDisks(NoHooksLU):
5582 """Bring up an instance's disks.
5587 def ExpandNames(self):
5588 self._ExpandAndLockInstance()
5589 self.needed_locks[locking.LEVEL_NODE] = []
5590 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5592 def DeclareLocks(self, level):
5593 if level == locking.LEVEL_NODE:
5594 self._LockInstancesNodes()
5596 def CheckPrereq(self):
5597 """Check prerequisites.
5599 This checks that the instance is in the cluster.
5602 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5603 assert self.instance is not None, \
5604 "Cannot retrieve locked instance %s" % self.op.instance_name
5605 _CheckNodeOnline(self, self.instance.primary_node)
5607 def Exec(self, feedback_fn):
5608 """Activate the disks.
5611 disks_ok, disks_info = \
5612 _AssembleInstanceDisks(self, self.instance,
5613 ignore_size=self.op.ignore_size)
5615 raise errors.OpExecError("Cannot activate block devices")
5620 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5622 """Prepare the block devices for an instance.
5624 This sets up the block devices on all nodes.
5626 @type lu: L{LogicalUnit}
5627 @param lu: the logical unit on whose behalf we execute
5628 @type instance: L{objects.Instance}
5629 @param instance: the instance for whose disks we assemble
5630 @type disks: list of L{objects.Disk} or None
5631 @param disks: which disks to assemble (or all, if None)
5632 @type ignore_secondaries: boolean
5633 @param ignore_secondaries: if true, errors on secondary nodes
5634 won't result in an error return from the function
5635 @type ignore_size: boolean
5636 @param ignore_size: if true, the current known size of the disk
5637 will not be used during the disk activation, useful for cases
5638 when the size is wrong
5639 @return: False if the operation failed, otherwise a list of
5640 (host, instance_visible_name, node_visible_name)
5641 with the mapping from node devices to instance devices
5646 iname = instance.name
5647 disks = _ExpandCheckDisks(instance, disks)
5649 # With the two passes mechanism we try to reduce the window of
5650 # opportunity for the race condition of switching DRBD to primary
5651 # before handshaking occured, but we do not eliminate it
5653 # The proper fix would be to wait (with some limits) until the
5654 # connection has been made and drbd transitions from WFConnection
5655 # into any other network-connected state (Connected, SyncTarget,
5658 # 1st pass, assemble on all nodes in secondary mode
5659 for idx, inst_disk in enumerate(disks):
5660 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5662 node_disk = node_disk.Copy()
5663 node_disk.UnsetSize()
5664 lu.cfg.SetDiskID(node_disk, node)
5665 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5666 msg = result.fail_msg
5668 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5669 " (is_primary=False, pass=1): %s",
5670 inst_disk.iv_name, node, msg)
5671 if not ignore_secondaries:
5674 # FIXME: race condition on drbd migration to primary
5676 # 2nd pass, do only the primary node
5677 for idx, inst_disk in enumerate(disks):
5680 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5681 if node != instance.primary_node:
5684 node_disk = node_disk.Copy()
5685 node_disk.UnsetSize()
5686 lu.cfg.SetDiskID(node_disk, node)
5687 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5688 msg = result.fail_msg
5690 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5691 " (is_primary=True, pass=2): %s",
5692 inst_disk.iv_name, node, msg)
5695 dev_path = result.payload
5697 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5699 # leave the disks configured for the primary node
5700 # this is a workaround that would be fixed better by
5701 # improving the logical/physical id handling
5703 lu.cfg.SetDiskID(disk, instance.primary_node)
5705 return disks_ok, device_info
5708 def _StartInstanceDisks(lu, instance, force):
5709 """Start the disks of an instance.
5712 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5713 ignore_secondaries=force)
5715 _ShutdownInstanceDisks(lu, instance)
5716 if force is not None and not force:
5717 lu.proc.LogWarning("", hint="If the message above refers to a"
5719 " you can retry the operation using '--force'.")
5720 raise errors.OpExecError("Disk consistency error")
5723 class LUInstanceDeactivateDisks(NoHooksLU):
5724 """Shutdown an instance's disks.
5729 def ExpandNames(self):
5730 self._ExpandAndLockInstance()
5731 self.needed_locks[locking.LEVEL_NODE] = []
5732 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5734 def DeclareLocks(self, level):
5735 if level == locking.LEVEL_NODE:
5736 self._LockInstancesNodes()
5738 def CheckPrereq(self):
5739 """Check prerequisites.
5741 This checks that the instance is in the cluster.
5744 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5745 assert self.instance is not None, \
5746 "Cannot retrieve locked instance %s" % self.op.instance_name
5748 def Exec(self, feedback_fn):
5749 """Deactivate the disks
5752 instance = self.instance
5754 _ShutdownInstanceDisks(self, instance)
5756 _SafeShutdownInstanceDisks(self, instance)
5759 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5760 """Shutdown block devices of an instance.
5762 This function checks if an instance is running, before calling
5763 _ShutdownInstanceDisks.
5766 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5767 _ShutdownInstanceDisks(lu, instance, disks=disks)
5770 def _ExpandCheckDisks(instance, disks):
5771 """Return the instance disks selected by the disks list
5773 @type disks: list of L{objects.Disk} or None
5774 @param disks: selected disks
5775 @rtype: list of L{objects.Disk}
5776 @return: selected instance disks to act on
5780 return instance.disks
5782 if not set(disks).issubset(instance.disks):
5783 raise errors.ProgrammerError("Can only act on disks belonging to the"
5788 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5789 """Shutdown block devices of an instance.
5791 This does the shutdown on all nodes of the instance.
5793 If the ignore_primary is false, errors on the primary node are
5798 disks = _ExpandCheckDisks(instance, disks)
5801 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5802 lu.cfg.SetDiskID(top_disk, node)
5803 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5804 msg = result.fail_msg
5806 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5807 disk.iv_name, node, msg)
5808 if ((node == instance.primary_node and not ignore_primary) or
5809 (node != instance.primary_node and not result.offline)):
5814 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5815 """Checks if a node has enough free memory.
5817 This function check if a given node has the needed amount of free
5818 memory. In case the node has less memory or we cannot get the
5819 information from the node, this function raise an OpPrereqError
5822 @type lu: C{LogicalUnit}
5823 @param lu: a logical unit from which we get configuration data
5825 @param node: the node to check
5826 @type reason: C{str}
5827 @param reason: string to use in the error message
5828 @type requested: C{int}
5829 @param requested: the amount of memory in MiB to check for
5830 @type hypervisor_name: C{str}
5831 @param hypervisor_name: the hypervisor to ask for memory stats
5832 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5833 we cannot check the node
5836 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5837 nodeinfo[node].Raise("Can't get data from node %s" % node,
5838 prereq=True, ecode=errors.ECODE_ENVIRON)
5839 free_mem = nodeinfo[node].payload.get("memory_free", None)
5840 if not isinstance(free_mem, int):
5841 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5842 " was '%s'" % (node, free_mem),
5843 errors.ECODE_ENVIRON)
5844 if requested > free_mem:
5845 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5846 " needed %s MiB, available %s MiB" %
5847 (node, reason, requested, free_mem),
5851 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5852 """Checks if nodes have enough free disk space in the all VGs.
5854 This function check if all given nodes have the needed amount of
5855 free disk. In case any node has less disk or we cannot get the
5856 information from the node, this function raise an OpPrereqError
5859 @type lu: C{LogicalUnit}
5860 @param lu: a logical unit from which we get configuration data
5861 @type nodenames: C{list}
5862 @param nodenames: the list of node names to check
5863 @type req_sizes: C{dict}
5864 @param req_sizes: the hash of vg and corresponding amount of disk in
5866 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5867 or we cannot check the node
5870 for vg, req_size in req_sizes.items():
5871 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5874 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5875 """Checks if nodes have enough free disk space in the specified VG.
5877 This function check if all given nodes have the needed amount of
5878 free disk. In case any node has less disk or we cannot get the
5879 information from the node, this function raise an OpPrereqError
5882 @type lu: C{LogicalUnit}
5883 @param lu: a logical unit from which we get configuration data
5884 @type nodenames: C{list}
5885 @param nodenames: the list of node names to check
5887 @param vg: the volume group to check
5888 @type requested: C{int}
5889 @param requested: the amount of disk in MiB to check for
5890 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5891 or we cannot check the node
5894 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5895 for node in nodenames:
5896 info = nodeinfo[node]
5897 info.Raise("Cannot get current information from node %s" % node,
5898 prereq=True, ecode=errors.ECODE_ENVIRON)
5899 vg_free = info.payload.get("vg_free", None)
5900 if not isinstance(vg_free, int):
5901 raise errors.OpPrereqError("Can't compute free disk space on node"
5902 " %s for vg %s, result was '%s'" %
5903 (node, vg, vg_free), errors.ECODE_ENVIRON)
5904 if requested > vg_free:
5905 raise errors.OpPrereqError("Not enough disk space on target node %s"
5906 " vg %s: required %d MiB, available %d MiB" %
5907 (node, vg, requested, vg_free),
5911 class LUInstanceStartup(LogicalUnit):
5912 """Starts an instance.
5915 HPATH = "instance-start"
5916 HTYPE = constants.HTYPE_INSTANCE
5919 def CheckArguments(self):
5921 if self.op.beparams:
5922 # fill the beparams dict
5923 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5925 def ExpandNames(self):
5926 self._ExpandAndLockInstance()
5928 def BuildHooksEnv(self):
5931 This runs on master, primary and secondary nodes of the instance.
5935 "FORCE": self.op.force,
5938 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5942 def BuildHooksNodes(self):
5943 """Build hooks nodes.
5946 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5949 def CheckPrereq(self):
5950 """Check prerequisites.
5952 This checks that the instance is in the cluster.
5955 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5956 assert self.instance is not None, \
5957 "Cannot retrieve locked instance %s" % self.op.instance_name
5960 if self.op.hvparams:
5961 # check hypervisor parameter syntax (locally)
5962 cluster = self.cfg.GetClusterInfo()
5963 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5964 filled_hvp = cluster.FillHV(instance)
5965 filled_hvp.update(self.op.hvparams)
5966 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5967 hv_type.CheckParameterSyntax(filled_hvp)
5968 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5970 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5972 if self.primary_offline and self.op.ignore_offline_nodes:
5973 self.proc.LogWarning("Ignoring offline primary node")
5975 if self.op.hvparams or self.op.beparams:
5976 self.proc.LogWarning("Overridden parameters are ignored")
5978 _CheckNodeOnline(self, instance.primary_node)
5980 bep = self.cfg.GetClusterInfo().FillBE(instance)
5982 # check bridges existence
5983 _CheckInstanceBridgesExist(self, instance)
5985 remote_info = self.rpc.call_instance_info(instance.primary_node,
5987 instance.hypervisor)
5988 remote_info.Raise("Error checking node %s" % instance.primary_node,
5989 prereq=True, ecode=errors.ECODE_ENVIRON)
5990 if not remote_info.payload: # not running already
5991 _CheckNodeFreeMemory(self, instance.primary_node,
5992 "starting instance %s" % instance.name,
5993 bep[constants.BE_MEMORY], instance.hypervisor)
5995 def Exec(self, feedback_fn):
5996 """Start the instance.
5999 instance = self.instance
6000 force = self.op.force
6002 if not self.op.no_remember:
6003 self.cfg.MarkInstanceUp(instance.name)
6005 if self.primary_offline:
6006 assert self.op.ignore_offline_nodes
6007 self.proc.LogInfo("Primary node offline, marked instance as started")
6009 node_current = instance.primary_node
6011 _StartInstanceDisks(self, instance, force)
6013 result = self.rpc.call_instance_start(node_current, instance,
6014 self.op.hvparams, self.op.beparams,
6015 self.op.startup_paused)
6016 msg = result.fail_msg
6018 _ShutdownInstanceDisks(self, instance)
6019 raise errors.OpExecError("Could not start instance: %s" % msg)
6022 class LUInstanceReboot(LogicalUnit):
6023 """Reboot an instance.
6026 HPATH = "instance-reboot"
6027 HTYPE = constants.HTYPE_INSTANCE
6030 def ExpandNames(self):
6031 self._ExpandAndLockInstance()
6033 def BuildHooksEnv(self):
6036 This runs on master, primary and secondary nodes of the instance.
6040 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6041 "REBOOT_TYPE": self.op.reboot_type,
6042 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6045 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6049 def BuildHooksNodes(self):
6050 """Build hooks nodes.
6053 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6056 def CheckPrereq(self):
6057 """Check prerequisites.
6059 This checks that the instance is in the cluster.
6062 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6063 assert self.instance is not None, \
6064 "Cannot retrieve locked instance %s" % self.op.instance_name
6066 _CheckNodeOnline(self, instance.primary_node)
6068 # check bridges existence
6069 _CheckInstanceBridgesExist(self, instance)
6071 def Exec(self, feedback_fn):
6072 """Reboot the instance.
6075 instance = self.instance
6076 ignore_secondaries = self.op.ignore_secondaries
6077 reboot_type = self.op.reboot_type
6079 remote_info = self.rpc.call_instance_info(instance.primary_node,
6081 instance.hypervisor)
6082 remote_info.Raise("Error checking node %s" % instance.primary_node)
6083 instance_running = bool(remote_info.payload)
6085 node_current = instance.primary_node
6087 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6088 constants.INSTANCE_REBOOT_HARD]:
6089 for disk in instance.disks:
6090 self.cfg.SetDiskID(disk, node_current)
6091 result = self.rpc.call_instance_reboot(node_current, instance,
6093 self.op.shutdown_timeout)
6094 result.Raise("Could not reboot instance")
6096 if instance_running:
6097 result = self.rpc.call_instance_shutdown(node_current, instance,
6098 self.op.shutdown_timeout)
6099 result.Raise("Could not shutdown instance for full reboot")
6100 _ShutdownInstanceDisks(self, instance)
6102 self.LogInfo("Instance %s was already stopped, starting now",
6104 _StartInstanceDisks(self, instance, ignore_secondaries)
6105 result = self.rpc.call_instance_start(node_current, instance,
6107 msg = result.fail_msg
6109 _ShutdownInstanceDisks(self, instance)
6110 raise errors.OpExecError("Could not start instance for"
6111 " full reboot: %s" % msg)
6113 self.cfg.MarkInstanceUp(instance.name)
6116 class LUInstanceShutdown(LogicalUnit):
6117 """Shutdown an instance.
6120 HPATH = "instance-stop"
6121 HTYPE = constants.HTYPE_INSTANCE
6124 def ExpandNames(self):
6125 self._ExpandAndLockInstance()
6127 def BuildHooksEnv(self):
6130 This runs on master, primary and secondary nodes of the instance.
6133 env = _BuildInstanceHookEnvByObject(self, self.instance)
6134 env["TIMEOUT"] = self.op.timeout
6137 def BuildHooksNodes(self):
6138 """Build hooks nodes.
6141 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6144 def CheckPrereq(self):
6145 """Check prerequisites.
6147 This checks that the instance is in the cluster.
6150 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6151 assert self.instance is not None, \
6152 "Cannot retrieve locked instance %s" % self.op.instance_name
6154 self.primary_offline = \
6155 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6157 if self.primary_offline and self.op.ignore_offline_nodes:
6158 self.proc.LogWarning("Ignoring offline primary node")
6160 _CheckNodeOnline(self, self.instance.primary_node)
6162 def Exec(self, feedback_fn):
6163 """Shutdown the instance.
6166 instance = self.instance
6167 node_current = instance.primary_node
6168 timeout = self.op.timeout
6170 if not self.op.no_remember:
6171 self.cfg.MarkInstanceDown(instance.name)
6173 if self.primary_offline:
6174 assert self.op.ignore_offline_nodes
6175 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6177 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6178 msg = result.fail_msg
6180 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6182 _ShutdownInstanceDisks(self, instance)
6185 class LUInstanceReinstall(LogicalUnit):
6186 """Reinstall an instance.
6189 HPATH = "instance-reinstall"
6190 HTYPE = constants.HTYPE_INSTANCE
6193 def ExpandNames(self):
6194 self._ExpandAndLockInstance()
6196 def BuildHooksEnv(self):
6199 This runs on master, primary and secondary nodes of the instance.
6202 return _BuildInstanceHookEnvByObject(self, self.instance)
6204 def BuildHooksNodes(self):
6205 """Build hooks nodes.
6208 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6211 def CheckPrereq(self):
6212 """Check prerequisites.
6214 This checks that the instance is in the cluster and is not running.
6217 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6218 assert instance is not None, \
6219 "Cannot retrieve locked instance %s" % self.op.instance_name
6220 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6221 " offline, cannot reinstall")
6222 for node in instance.secondary_nodes:
6223 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6224 " cannot reinstall")
6226 if instance.disk_template == constants.DT_DISKLESS:
6227 raise errors.OpPrereqError("Instance '%s' has no disks" %
6228 self.op.instance_name,
6230 _CheckInstanceDown(self, instance, "cannot reinstall")
6232 if self.op.os_type is not None:
6234 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6235 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6236 instance_os = self.op.os_type
6238 instance_os = instance.os
6240 nodelist = list(instance.all_nodes)
6242 if self.op.osparams:
6243 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6244 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6245 self.os_inst = i_osdict # the new dict (without defaults)
6249 self.instance = instance
6251 def Exec(self, feedback_fn):
6252 """Reinstall the instance.
6255 inst = self.instance
6257 if self.op.os_type is not None:
6258 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6259 inst.os = self.op.os_type
6260 # Write to configuration
6261 self.cfg.Update(inst, feedback_fn)
6263 _StartInstanceDisks(self, inst, None)
6265 feedback_fn("Running the instance OS create scripts...")
6266 # FIXME: pass debug option from opcode to backend
6267 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6268 self.op.debug_level,
6269 osparams=self.os_inst)
6270 result.Raise("Could not install OS for instance %s on node %s" %
6271 (inst.name, inst.primary_node))
6273 _ShutdownInstanceDisks(self, inst)
6276 class LUInstanceRecreateDisks(LogicalUnit):
6277 """Recreate an instance's missing disks.
6280 HPATH = "instance-recreate-disks"
6281 HTYPE = constants.HTYPE_INSTANCE
6284 def CheckArguments(self):
6285 # normalise the disk list
6286 self.op.disks = sorted(frozenset(self.op.disks))
6288 def ExpandNames(self):
6289 self._ExpandAndLockInstance()
6290 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6292 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6293 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6295 self.needed_locks[locking.LEVEL_NODE] = []
6297 def DeclareLocks(self, level):
6298 if level == locking.LEVEL_NODE:
6299 # if we replace the nodes, we only need to lock the old primary,
6300 # otherwise we need to lock all nodes for disk re-creation
6301 primary_only = bool(self.op.nodes)
6302 self._LockInstancesNodes(primary_only=primary_only)
6304 def BuildHooksEnv(self):
6307 This runs on master, primary and secondary nodes of the instance.
6310 return _BuildInstanceHookEnvByObject(self, self.instance)
6312 def BuildHooksNodes(self):
6313 """Build hooks nodes.
6316 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6319 def CheckPrereq(self):
6320 """Check prerequisites.
6322 This checks that the instance is in the cluster and is not running.
6325 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6326 assert instance is not None, \
6327 "Cannot retrieve locked instance %s" % self.op.instance_name
6329 if len(self.op.nodes) != len(instance.all_nodes):
6330 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6331 " %d replacement nodes were specified" %
6332 (instance.name, len(instance.all_nodes),
6333 len(self.op.nodes)),
6335 assert instance.disk_template != constants.DT_DRBD8 or \
6336 len(self.op.nodes) == 2
6337 assert instance.disk_template != constants.DT_PLAIN or \
6338 len(self.op.nodes) == 1
6339 primary_node = self.op.nodes[0]
6341 primary_node = instance.primary_node
6342 _CheckNodeOnline(self, primary_node)
6344 if instance.disk_template == constants.DT_DISKLESS:
6345 raise errors.OpPrereqError("Instance '%s' has no disks" %
6346 self.op.instance_name, errors.ECODE_INVAL)
6347 # if we replace nodes *and* the old primary is offline, we don't
6349 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6350 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6351 if not (self.op.nodes and old_pnode.offline):
6352 _CheckInstanceDown(self, instance, "cannot recreate disks")
6354 if not self.op.disks:
6355 self.op.disks = range(len(instance.disks))
6357 for idx in self.op.disks:
6358 if idx >= len(instance.disks):
6359 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6361 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6362 raise errors.OpPrereqError("Can't recreate disks partially and"
6363 " change the nodes at the same time",
6365 self.instance = instance
6367 def Exec(self, feedback_fn):
6368 """Recreate the disks.
6371 instance = self.instance
6374 mods = [] # keeps track of needed logical_id changes
6376 for idx, disk in enumerate(instance.disks):
6377 if idx not in self.op.disks: # disk idx has not been passed in
6380 # update secondaries for disks, if needed
6382 if disk.dev_type == constants.LD_DRBD8:
6383 # need to update the nodes and minors
6384 assert len(self.op.nodes) == 2
6385 assert len(disk.logical_id) == 6 # otherwise disk internals
6387 (_, _, old_port, _, _, old_secret) = disk.logical_id
6388 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6389 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6390 new_minors[0], new_minors[1], old_secret)
6391 assert len(disk.logical_id) == len(new_id)
6392 mods.append((idx, new_id))
6394 # now that we have passed all asserts above, we can apply the mods
6395 # in a single run (to avoid partial changes)
6396 for idx, new_id in mods:
6397 instance.disks[idx].logical_id = new_id
6399 # change primary node, if needed
6401 instance.primary_node = self.op.nodes[0]
6402 self.LogWarning("Changing the instance's nodes, you will have to"
6403 " remove any disks left on the older nodes manually")
6406 self.cfg.Update(instance, feedback_fn)
6408 _CreateDisks(self, instance, to_skip=to_skip)
6411 class LUInstanceRename(LogicalUnit):
6412 """Rename an instance.
6415 HPATH = "instance-rename"
6416 HTYPE = constants.HTYPE_INSTANCE
6418 def CheckArguments(self):
6422 if self.op.ip_check and not self.op.name_check:
6423 # TODO: make the ip check more flexible and not depend on the name check
6424 raise errors.OpPrereqError("IP address check requires a name check",
6427 def BuildHooksEnv(self):
6430 This runs on master, primary and secondary nodes of the instance.
6433 env = _BuildInstanceHookEnvByObject(self, self.instance)
6434 env["INSTANCE_NEW_NAME"] = self.op.new_name
6437 def BuildHooksNodes(self):
6438 """Build hooks nodes.
6441 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6444 def CheckPrereq(self):
6445 """Check prerequisites.
6447 This checks that the instance is in the cluster and is not running.
6450 self.op.instance_name = _ExpandInstanceName(self.cfg,
6451 self.op.instance_name)
6452 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6453 assert instance is not None
6454 _CheckNodeOnline(self, instance.primary_node)
6455 _CheckInstanceDown(self, instance, "cannot rename")
6456 self.instance = instance
6458 new_name = self.op.new_name
6459 if self.op.name_check:
6460 hostname = netutils.GetHostname(name=new_name)
6461 if hostname != new_name:
6462 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6464 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6465 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6466 " same as given hostname '%s'") %
6467 (hostname.name, self.op.new_name),
6469 new_name = self.op.new_name = hostname.name
6470 if (self.op.ip_check and
6471 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6472 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6473 (hostname.ip, new_name),
6474 errors.ECODE_NOTUNIQUE)
6476 instance_list = self.cfg.GetInstanceList()
6477 if new_name in instance_list and new_name != instance.name:
6478 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6479 new_name, errors.ECODE_EXISTS)
6481 def Exec(self, feedback_fn):
6482 """Rename the instance.
6485 inst = self.instance
6486 old_name = inst.name
6488 rename_file_storage = False
6489 if (inst.disk_template in constants.DTS_FILEBASED and
6490 self.op.new_name != inst.name):
6491 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6492 rename_file_storage = True
6494 self.cfg.RenameInstance(inst.name, self.op.new_name)
6495 # Change the instance lock. This is definitely safe while we hold the BGL.
6496 # Otherwise the new lock would have to be added in acquired mode.
6498 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6499 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6501 # re-read the instance from the configuration after rename
6502 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6504 if rename_file_storage:
6505 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6506 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6507 old_file_storage_dir,
6508 new_file_storage_dir)
6509 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6510 " (but the instance has been renamed in Ganeti)" %
6511 (inst.primary_node, old_file_storage_dir,
6512 new_file_storage_dir))
6514 _StartInstanceDisks(self, inst, None)
6516 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6517 old_name, self.op.debug_level)
6518 msg = result.fail_msg
6520 msg = ("Could not run OS rename script for instance %s on node %s"
6521 " (but the instance has been renamed in Ganeti): %s" %
6522 (inst.name, inst.primary_node, msg))
6523 self.proc.LogWarning(msg)
6525 _ShutdownInstanceDisks(self, inst)
6530 class LUInstanceRemove(LogicalUnit):
6531 """Remove an instance.
6534 HPATH = "instance-remove"
6535 HTYPE = constants.HTYPE_INSTANCE
6538 def ExpandNames(self):
6539 self._ExpandAndLockInstance()
6540 self.needed_locks[locking.LEVEL_NODE] = []
6541 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6543 def DeclareLocks(self, level):
6544 if level == locking.LEVEL_NODE:
6545 self._LockInstancesNodes()
6547 def BuildHooksEnv(self):
6550 This runs on master, primary and secondary nodes of the instance.
6553 env = _BuildInstanceHookEnvByObject(self, self.instance)
6554 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6557 def BuildHooksNodes(self):
6558 """Build hooks nodes.
6561 nl = [self.cfg.GetMasterNode()]
6562 nl_post = list(self.instance.all_nodes) + nl
6563 return (nl, nl_post)
6565 def CheckPrereq(self):
6566 """Check prerequisites.
6568 This checks that the instance is in the cluster.
6571 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6572 assert self.instance is not None, \
6573 "Cannot retrieve locked instance %s" % self.op.instance_name
6575 def Exec(self, feedback_fn):
6576 """Remove the instance.
6579 instance = self.instance
6580 logging.info("Shutting down instance %s on node %s",
6581 instance.name, instance.primary_node)
6583 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6584 self.op.shutdown_timeout)
6585 msg = result.fail_msg
6587 if self.op.ignore_failures:
6588 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6590 raise errors.OpExecError("Could not shutdown instance %s on"
6592 (instance.name, instance.primary_node, msg))
6594 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6597 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6598 """Utility function to remove an instance.
6601 logging.info("Removing block devices for instance %s", instance.name)
6603 if not _RemoveDisks(lu, instance):
6604 if not ignore_failures:
6605 raise errors.OpExecError("Can't remove instance's disks")
6606 feedback_fn("Warning: can't remove instance's disks")
6608 logging.info("Removing instance %s out of cluster config", instance.name)
6610 lu.cfg.RemoveInstance(instance.name)
6612 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6613 "Instance lock removal conflict"
6615 # Remove lock for the instance
6616 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6619 class LUInstanceQuery(NoHooksLU):
6620 """Logical unit for querying instances.
6623 # pylint: disable=W0142
6626 def CheckArguments(self):
6627 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6628 self.op.output_fields, self.op.use_locking)
6630 def ExpandNames(self):
6631 self.iq.ExpandNames(self)
6633 def DeclareLocks(self, level):
6634 self.iq.DeclareLocks(self, level)
6636 def Exec(self, feedback_fn):
6637 return self.iq.OldStyleQuery(self)
6640 class LUInstanceFailover(LogicalUnit):
6641 """Failover an instance.
6644 HPATH = "instance-failover"
6645 HTYPE = constants.HTYPE_INSTANCE
6648 def CheckArguments(self):
6649 """Check the arguments.
6652 self.iallocator = getattr(self.op, "iallocator", None)
6653 self.target_node = getattr(self.op, "target_node", None)
6655 def ExpandNames(self):
6656 self._ExpandAndLockInstance()
6658 if self.op.target_node is not None:
6659 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6661 self.needed_locks[locking.LEVEL_NODE] = []
6662 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6664 ignore_consistency = self.op.ignore_consistency
6665 shutdown_timeout = self.op.shutdown_timeout
6666 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6669 ignore_consistency=ignore_consistency,
6670 shutdown_timeout=shutdown_timeout)
6671 self.tasklets = [self._migrater]
6673 def DeclareLocks(self, level):
6674 if level == locking.LEVEL_NODE:
6675 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6676 if instance.disk_template in constants.DTS_EXT_MIRROR:
6677 if self.op.target_node is None:
6678 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6680 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6681 self.op.target_node]
6682 del self.recalculate_locks[locking.LEVEL_NODE]
6684 self._LockInstancesNodes()
6686 def BuildHooksEnv(self):
6689 This runs on master, primary and secondary nodes of the instance.
6692 instance = self._migrater.instance
6693 source_node = instance.primary_node
6694 target_node = self.op.target_node
6696 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6697 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6698 "OLD_PRIMARY": source_node,
6699 "NEW_PRIMARY": target_node,
6702 if instance.disk_template in constants.DTS_INT_MIRROR:
6703 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6704 env["NEW_SECONDARY"] = source_node
6706 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6708 env.update(_BuildInstanceHookEnvByObject(self, instance))
6712 def BuildHooksNodes(self):
6713 """Build hooks nodes.
6716 instance = self._migrater.instance
6717 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6718 return (nl, nl + [instance.primary_node])
6721 class LUInstanceMigrate(LogicalUnit):
6722 """Migrate an instance.
6724 This is migration without shutting down, compared to the failover,
6725 which is done with shutdown.
6728 HPATH = "instance-migrate"
6729 HTYPE = constants.HTYPE_INSTANCE
6732 def ExpandNames(self):
6733 self._ExpandAndLockInstance()
6735 if self.op.target_node is not None:
6736 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6738 self.needed_locks[locking.LEVEL_NODE] = []
6739 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6741 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6742 cleanup=self.op.cleanup,
6744 fallback=self.op.allow_failover)
6745 self.tasklets = [self._migrater]
6747 def DeclareLocks(self, level):
6748 if level == locking.LEVEL_NODE:
6749 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6750 if instance.disk_template in constants.DTS_EXT_MIRROR:
6751 if self.op.target_node is None:
6752 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6754 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6755 self.op.target_node]
6756 del self.recalculate_locks[locking.LEVEL_NODE]
6758 self._LockInstancesNodes()
6760 def BuildHooksEnv(self):
6763 This runs on master, primary and secondary nodes of the instance.
6766 instance = self._migrater.instance
6767 source_node = instance.primary_node
6768 target_node = self.op.target_node
6769 env = _BuildInstanceHookEnvByObject(self, instance)
6771 "MIGRATE_LIVE": self._migrater.live,
6772 "MIGRATE_CLEANUP": self.op.cleanup,
6773 "OLD_PRIMARY": source_node,
6774 "NEW_PRIMARY": target_node,
6777 if instance.disk_template in constants.DTS_INT_MIRROR:
6778 env["OLD_SECONDARY"] = target_node
6779 env["NEW_SECONDARY"] = source_node
6781 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6785 def BuildHooksNodes(self):
6786 """Build hooks nodes.
6789 instance = self._migrater.instance
6790 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6791 return (nl, nl + [instance.primary_node])
6794 class LUInstanceMove(LogicalUnit):
6795 """Move an instance by data-copying.
6798 HPATH = "instance-move"
6799 HTYPE = constants.HTYPE_INSTANCE
6802 def ExpandNames(self):
6803 self._ExpandAndLockInstance()
6804 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6805 self.op.target_node = target_node
6806 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6807 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6809 def DeclareLocks(self, level):
6810 if level == locking.LEVEL_NODE:
6811 self._LockInstancesNodes(primary_only=True)
6813 def BuildHooksEnv(self):
6816 This runs on master, primary and secondary nodes of the instance.
6820 "TARGET_NODE": self.op.target_node,
6821 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6823 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6826 def BuildHooksNodes(self):
6827 """Build hooks nodes.
6831 self.cfg.GetMasterNode(),
6832 self.instance.primary_node,
6833 self.op.target_node,
6837 def CheckPrereq(self):
6838 """Check prerequisites.
6840 This checks that the instance is in the cluster.
6843 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6844 assert self.instance is not None, \
6845 "Cannot retrieve locked instance %s" % self.op.instance_name
6847 node = self.cfg.GetNodeInfo(self.op.target_node)
6848 assert node is not None, \
6849 "Cannot retrieve locked node %s" % self.op.target_node
6851 self.target_node = target_node = node.name
6853 if target_node == instance.primary_node:
6854 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6855 (instance.name, target_node),
6858 bep = self.cfg.GetClusterInfo().FillBE(instance)
6860 for idx, dsk in enumerate(instance.disks):
6861 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6862 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6863 " cannot copy" % idx, errors.ECODE_STATE)
6865 _CheckNodeOnline(self, target_node)
6866 _CheckNodeNotDrained(self, target_node)
6867 _CheckNodeVmCapable(self, target_node)
6869 if instance.admin_up:
6870 # check memory requirements on the secondary node
6871 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6872 instance.name, bep[constants.BE_MEMORY],
6873 instance.hypervisor)
6875 self.LogInfo("Not checking memory on the secondary node as"
6876 " instance will not be started")
6878 # check bridge existance
6879 _CheckInstanceBridgesExist(self, instance, node=target_node)
6881 def Exec(self, feedback_fn):
6882 """Move an instance.
6884 The move is done by shutting it down on its present node, copying
6885 the data over (slow) and starting it on the new node.
6888 instance = self.instance
6890 source_node = instance.primary_node
6891 target_node = self.target_node
6893 self.LogInfo("Shutting down instance %s on source node %s",
6894 instance.name, source_node)
6896 result = self.rpc.call_instance_shutdown(source_node, instance,
6897 self.op.shutdown_timeout)
6898 msg = result.fail_msg
6900 if self.op.ignore_consistency:
6901 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6902 " Proceeding anyway. Please make sure node"
6903 " %s is down. Error details: %s",
6904 instance.name, source_node, source_node, msg)
6906 raise errors.OpExecError("Could not shutdown instance %s on"
6908 (instance.name, source_node, msg))
6910 # create the target disks
6912 _CreateDisks(self, instance, target_node=target_node)
6913 except errors.OpExecError:
6914 self.LogWarning("Device creation failed, reverting...")
6916 _RemoveDisks(self, instance, target_node=target_node)
6918 self.cfg.ReleaseDRBDMinors(instance.name)
6921 cluster_name = self.cfg.GetClusterInfo().cluster_name
6924 # activate, get path, copy the data over
6925 for idx, disk in enumerate(instance.disks):
6926 self.LogInfo("Copying data for disk %d", idx)
6927 result = self.rpc.call_blockdev_assemble(target_node, disk,
6928 instance.name, True, idx)
6930 self.LogWarning("Can't assemble newly created disk %d: %s",
6931 idx, result.fail_msg)
6932 errs.append(result.fail_msg)
6934 dev_path = result.payload
6935 result = self.rpc.call_blockdev_export(source_node, disk,
6936 target_node, dev_path,
6939 self.LogWarning("Can't copy data over for disk %d: %s",
6940 idx, result.fail_msg)
6941 errs.append(result.fail_msg)
6945 self.LogWarning("Some disks failed to copy, aborting")
6947 _RemoveDisks(self, instance, target_node=target_node)
6949 self.cfg.ReleaseDRBDMinors(instance.name)
6950 raise errors.OpExecError("Errors during disk copy: %s" %
6953 instance.primary_node = target_node
6954 self.cfg.Update(instance, feedback_fn)
6956 self.LogInfo("Removing the disks on the original node")
6957 _RemoveDisks(self, instance, target_node=source_node)
6959 # Only start the instance if it's marked as up
6960 if instance.admin_up:
6961 self.LogInfo("Starting instance %s on node %s",
6962 instance.name, target_node)
6964 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6965 ignore_secondaries=True)
6967 _ShutdownInstanceDisks(self, instance)
6968 raise errors.OpExecError("Can't activate the instance's disks")
6970 result = self.rpc.call_instance_start(target_node, instance,
6972 msg = result.fail_msg
6974 _ShutdownInstanceDisks(self, instance)
6975 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6976 (instance.name, target_node, msg))
6979 class LUNodeMigrate(LogicalUnit):
6980 """Migrate all instances from a node.
6983 HPATH = "node-migrate"
6984 HTYPE = constants.HTYPE_NODE
6987 def CheckArguments(self):
6990 def ExpandNames(self):
6991 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6993 self.share_locks = _ShareAll()
6994 self.needed_locks = {
6995 locking.LEVEL_NODE: [self.op.node_name],
6998 def BuildHooksEnv(self):
7001 This runs on the master, the primary and all the secondaries.
7005 "NODE_NAME": self.op.node_name,
7008 def BuildHooksNodes(self):
7009 """Build hooks nodes.
7012 nl = [self.cfg.GetMasterNode()]
7015 def CheckPrereq(self):
7018 def Exec(self, feedback_fn):
7019 # Prepare jobs for migration instances
7021 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7024 iallocator=self.op.iallocator,
7025 target_node=self.op.target_node)]
7026 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7029 # TODO: Run iallocator in this opcode and pass correct placement options to
7030 # OpInstanceMigrate. Since other jobs can modify the cluster between
7031 # running the iallocator and the actual migration, a good consistency model
7032 # will have to be found.
7034 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7035 frozenset([self.op.node_name]))
7037 return ResultWithJobs(jobs)
7040 class TLMigrateInstance(Tasklet):
7041 """Tasklet class for instance migration.
7044 @ivar live: whether the migration will be done live or non-live;
7045 this variable is initalized only after CheckPrereq has run
7046 @type cleanup: boolean
7047 @ivar cleanup: Wheater we cleanup from a failed migration
7048 @type iallocator: string
7049 @ivar iallocator: The iallocator used to determine target_node
7050 @type target_node: string
7051 @ivar target_node: If given, the target_node to reallocate the instance to
7052 @type failover: boolean
7053 @ivar failover: Whether operation results in failover or migration
7054 @type fallback: boolean
7055 @ivar fallback: Whether fallback to failover is allowed if migration not
7057 @type ignore_consistency: boolean
7058 @ivar ignore_consistency: Wheter we should ignore consistency between source
7060 @type shutdown_timeout: int
7061 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7064 def __init__(self, lu, instance_name, cleanup=False,
7065 failover=False, fallback=False,
7066 ignore_consistency=False,
7067 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7068 """Initializes this class.
7071 Tasklet.__init__(self, lu)
7074 self.instance_name = instance_name
7075 self.cleanup = cleanup
7076 self.live = False # will be overridden later
7077 self.failover = failover
7078 self.fallback = fallback
7079 self.ignore_consistency = ignore_consistency
7080 self.shutdown_timeout = shutdown_timeout
7082 def CheckPrereq(self):
7083 """Check prerequisites.
7085 This checks that the instance is in the cluster.
7088 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7089 instance = self.cfg.GetInstanceInfo(instance_name)
7090 assert instance is not None
7091 self.instance = instance
7093 if (not self.cleanup and not instance.admin_up and not self.failover and
7095 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7097 self.failover = True
7099 if instance.disk_template not in constants.DTS_MIRRORED:
7104 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7105 " %s" % (instance.disk_template, text),
7108 if instance.disk_template in constants.DTS_EXT_MIRROR:
7109 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7111 if self.lu.op.iallocator:
7112 self._RunAllocator()
7114 # We set set self.target_node as it is required by
7116 self.target_node = self.lu.op.target_node
7118 # self.target_node is already populated, either directly or by the
7120 target_node = self.target_node
7121 if self.target_node == instance.primary_node:
7122 raise errors.OpPrereqError("Cannot migrate instance %s"
7123 " to its primary (%s)" %
7124 (instance.name, instance.primary_node))
7126 if len(self.lu.tasklets) == 1:
7127 # It is safe to release locks only when we're the only tasklet
7129 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7130 keep=[instance.primary_node, self.target_node])
7133 secondary_nodes = instance.secondary_nodes
7134 if not secondary_nodes:
7135 raise errors.ConfigurationError("No secondary node but using"
7136 " %s disk template" %
7137 instance.disk_template)
7138 target_node = secondary_nodes[0]
7139 if self.lu.op.iallocator or (self.lu.op.target_node and
7140 self.lu.op.target_node != target_node):
7142 text = "failed over"
7145 raise errors.OpPrereqError("Instances with disk template %s cannot"
7146 " be %s to arbitrary nodes"
7147 " (neither an iallocator nor a target"
7148 " node can be passed)" %
7149 (instance.disk_template, text),
7152 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7154 # check memory requirements on the secondary node
7155 if not self.failover or instance.admin_up:
7156 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7157 instance.name, i_be[constants.BE_MEMORY],
7158 instance.hypervisor)
7160 self.lu.LogInfo("Not checking memory on the secondary node as"
7161 " instance will not be started")
7163 # check bridge existance
7164 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7166 if not self.cleanup:
7167 _CheckNodeNotDrained(self.lu, target_node)
7168 if not self.failover:
7169 result = self.rpc.call_instance_migratable(instance.primary_node,
7171 if result.fail_msg and self.fallback:
7172 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7174 self.failover = True
7176 result.Raise("Can't migrate, please use failover",
7177 prereq=True, ecode=errors.ECODE_STATE)
7179 assert not (self.failover and self.cleanup)
7181 if not self.failover:
7182 if self.lu.op.live is not None and self.lu.op.mode is not None:
7183 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7184 " parameters are accepted",
7186 if self.lu.op.live is not None:
7188 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7190 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7191 # reset the 'live' parameter to None so that repeated
7192 # invocations of CheckPrereq do not raise an exception
7193 self.lu.op.live = None
7194 elif self.lu.op.mode is None:
7195 # read the default value from the hypervisor
7196 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7198 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7200 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7202 # Failover is never live
7205 def _RunAllocator(self):
7206 """Run the allocator based on input opcode.
7209 ial = IAllocator(self.cfg, self.rpc,
7210 mode=constants.IALLOCATOR_MODE_RELOC,
7211 name=self.instance_name,
7212 # TODO See why hail breaks with a single node below
7213 relocate_from=[self.instance.primary_node,
7214 self.instance.primary_node],
7217 ial.Run(self.lu.op.iallocator)
7220 raise errors.OpPrereqError("Can't compute nodes using"
7221 " iallocator '%s': %s" %
7222 (self.lu.op.iallocator, ial.info),
7224 if len(ial.result) != ial.required_nodes:
7225 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7226 " of nodes (%s), required %s" %
7227 (self.lu.op.iallocator, len(ial.result),
7228 ial.required_nodes), errors.ECODE_FAULT)
7229 self.target_node = ial.result[0]
7230 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7231 self.instance_name, self.lu.op.iallocator,
7232 utils.CommaJoin(ial.result))
7234 def _WaitUntilSync(self):
7235 """Poll with custom rpc for disk sync.
7237 This uses our own step-based rpc call.
7240 self.feedback_fn("* wait until resync is done")
7244 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7246 self.instance.disks)
7248 for node, nres in result.items():
7249 nres.Raise("Cannot resync disks on node %s" % node)
7250 node_done, node_percent = nres.payload
7251 all_done = all_done and node_done
7252 if node_percent is not None:
7253 min_percent = min(min_percent, node_percent)
7255 if min_percent < 100:
7256 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7259 def _EnsureSecondary(self, node):
7260 """Demote a node to secondary.
7263 self.feedback_fn("* switching node %s to secondary mode" % node)
7265 for dev in self.instance.disks:
7266 self.cfg.SetDiskID(dev, node)
7268 result = self.rpc.call_blockdev_close(node, self.instance.name,
7269 self.instance.disks)
7270 result.Raise("Cannot change disk to secondary on node %s" % node)
7272 def _GoStandalone(self):
7273 """Disconnect from the network.
7276 self.feedback_fn("* changing into standalone mode")
7277 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7278 self.instance.disks)
7279 for node, nres in result.items():
7280 nres.Raise("Cannot disconnect disks node %s" % node)
7282 def _GoReconnect(self, multimaster):
7283 """Reconnect to the network.
7289 msg = "single-master"
7290 self.feedback_fn("* changing disks into %s mode" % msg)
7291 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7292 self.instance.disks,
7293 self.instance.name, multimaster)
7294 for node, nres in result.items():
7295 nres.Raise("Cannot change disks config on node %s" % node)
7297 def _ExecCleanup(self):
7298 """Try to cleanup after a failed migration.
7300 The cleanup is done by:
7301 - check that the instance is running only on one node
7302 (and update the config if needed)
7303 - change disks on its secondary node to secondary
7304 - wait until disks are fully synchronized
7305 - disconnect from the network
7306 - change disks into single-master mode
7307 - wait again until disks are fully synchronized
7310 instance = self.instance
7311 target_node = self.target_node
7312 source_node = self.source_node
7314 # check running on only one node
7315 self.feedback_fn("* checking where the instance actually runs"
7316 " (if this hangs, the hypervisor might be in"
7318 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7319 for node, result in ins_l.items():
7320 result.Raise("Can't contact node %s" % node)
7322 runningon_source = instance.name in ins_l[source_node].payload
7323 runningon_target = instance.name in ins_l[target_node].payload
7325 if runningon_source and runningon_target:
7326 raise errors.OpExecError("Instance seems to be running on two nodes,"
7327 " or the hypervisor is confused; you will have"
7328 " to ensure manually that it runs only on one"
7329 " and restart this operation")
7331 if not (runningon_source or runningon_target):
7332 raise errors.OpExecError("Instance does not seem to be running at all;"
7333 " in this case it's safer to repair by"
7334 " running 'gnt-instance stop' to ensure disk"
7335 " shutdown, and then restarting it")
7337 if runningon_target:
7338 # the migration has actually succeeded, we need to update the config
7339 self.feedback_fn("* instance running on secondary node (%s),"
7340 " updating config" % target_node)
7341 instance.primary_node = target_node
7342 self.cfg.Update(instance, self.feedback_fn)
7343 demoted_node = source_node
7345 self.feedback_fn("* instance confirmed to be running on its"
7346 " primary node (%s)" % source_node)
7347 demoted_node = target_node
7349 if instance.disk_template in constants.DTS_INT_MIRROR:
7350 self._EnsureSecondary(demoted_node)
7352 self._WaitUntilSync()
7353 except errors.OpExecError:
7354 # we ignore here errors, since if the device is standalone, it
7355 # won't be able to sync
7357 self._GoStandalone()
7358 self._GoReconnect(False)
7359 self._WaitUntilSync()
7361 self.feedback_fn("* done")
7363 def _RevertDiskStatus(self):
7364 """Try to revert the disk status after a failed migration.
7367 target_node = self.target_node
7368 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7372 self._EnsureSecondary(target_node)
7373 self._GoStandalone()
7374 self._GoReconnect(False)
7375 self._WaitUntilSync()
7376 except errors.OpExecError, err:
7377 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7378 " please try to recover the instance manually;"
7379 " error '%s'" % str(err))
7381 def _AbortMigration(self):
7382 """Call the hypervisor code to abort a started migration.
7385 instance = self.instance
7386 target_node = self.target_node
7387 migration_info = self.migration_info
7389 abort_result = self.rpc.call_finalize_migration(target_node,
7393 abort_msg = abort_result.fail_msg
7395 logging.error("Aborting migration failed on target node %s: %s",
7396 target_node, abort_msg)
7397 # Don't raise an exception here, as we stil have to try to revert the
7398 # disk status, even if this step failed.
7400 def _ExecMigration(self):
7401 """Migrate an instance.
7403 The migrate is done by:
7404 - change the disks into dual-master mode
7405 - wait until disks are fully synchronized again
7406 - migrate the instance
7407 - change disks on the new secondary node (the old primary) to secondary
7408 - wait until disks are fully synchronized
7409 - change disks into single-master mode
7412 instance = self.instance
7413 target_node = self.target_node
7414 source_node = self.source_node
7416 # Check for hypervisor version mismatch and warn the user.
7417 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7418 None, self.instance.hypervisor)
7419 src_info = nodeinfo[source_node]
7420 dst_info = nodeinfo[target_node]
7422 if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7423 (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7424 src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7425 dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7426 if src_version != dst_version:
7427 self.feedback_fn("* warning: hypervisor version mismatch between"
7428 " source (%s) and target (%s) node" %
7429 (src_version, dst_version))
7431 self.feedback_fn("* checking disk consistency between source and target")
7432 for dev in instance.disks:
7433 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7434 raise errors.OpExecError("Disk %s is degraded or not fully"
7435 " synchronized on target node,"
7436 " aborting migration" % dev.iv_name)
7438 # First get the migration information from the remote node
7439 result = self.rpc.call_migration_info(source_node, instance)
7440 msg = result.fail_msg
7442 log_err = ("Failed fetching source migration information from %s: %s" %
7444 logging.error(log_err)
7445 raise errors.OpExecError(log_err)
7447 self.migration_info = migration_info = result.payload
7449 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7450 # Then switch the disks to master/master mode
7451 self._EnsureSecondary(target_node)
7452 self._GoStandalone()
7453 self._GoReconnect(True)
7454 self._WaitUntilSync()
7456 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7457 result = self.rpc.call_accept_instance(target_node,
7460 self.nodes_ip[target_node])
7462 msg = result.fail_msg
7464 logging.error("Instance pre-migration failed, trying to revert"
7465 " disk status: %s", msg)
7466 self.feedback_fn("Pre-migration failed, aborting")
7467 self._AbortMigration()
7468 self._RevertDiskStatus()
7469 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7470 (instance.name, msg))
7472 self.feedback_fn("* migrating instance to %s" % target_node)
7473 result = self.rpc.call_instance_migrate(source_node, instance,
7474 self.nodes_ip[target_node],
7476 msg = result.fail_msg
7478 logging.error("Instance migration failed, trying to revert"
7479 " disk status: %s", msg)
7480 self.feedback_fn("Migration failed, aborting")
7481 self._AbortMigration()
7482 self._RevertDiskStatus()
7483 raise errors.OpExecError("Could not migrate instance %s: %s" %
7484 (instance.name, msg))
7486 instance.primary_node = target_node
7487 # distribute new instance config to the other nodes
7488 self.cfg.Update(instance, self.feedback_fn)
7490 result = self.rpc.call_finalize_migration(target_node,
7494 msg = result.fail_msg
7496 logging.error("Instance migration succeeded, but finalization failed:"
7498 raise errors.OpExecError("Could not finalize instance migration: %s" %
7501 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7502 self._EnsureSecondary(source_node)
7503 self._WaitUntilSync()
7504 self._GoStandalone()
7505 self._GoReconnect(False)
7506 self._WaitUntilSync()
7508 self.feedback_fn("* done")
7510 def _ExecFailover(self):
7511 """Failover an instance.
7513 The failover is done by shutting it down on its present node and
7514 starting it on the secondary.
7517 instance = self.instance
7518 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7520 source_node = instance.primary_node
7521 target_node = self.target_node
7523 if instance.admin_up:
7524 self.feedback_fn("* checking disk consistency between source and target")
7525 for dev in instance.disks:
7526 # for drbd, these are drbd over lvm
7527 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7528 if primary_node.offline:
7529 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7531 (primary_node.name, dev.iv_name, target_node))
7532 elif not self.ignore_consistency:
7533 raise errors.OpExecError("Disk %s is degraded on target node,"
7534 " aborting failover" % dev.iv_name)
7536 self.feedback_fn("* not checking disk consistency as instance is not"
7539 self.feedback_fn("* shutting down instance on source node")
7540 logging.info("Shutting down instance %s on node %s",
7541 instance.name, source_node)
7543 result = self.rpc.call_instance_shutdown(source_node, instance,
7544 self.shutdown_timeout)
7545 msg = result.fail_msg
7547 if self.ignore_consistency or primary_node.offline:
7548 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7549 " proceeding anyway; please make sure node"
7550 " %s is down; error details: %s",
7551 instance.name, source_node, source_node, msg)
7553 raise errors.OpExecError("Could not shutdown instance %s on"
7555 (instance.name, source_node, msg))
7557 self.feedback_fn("* deactivating the instance's disks on source node")
7558 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7559 raise errors.OpExecError("Can't shut down the instance's disks")
7561 instance.primary_node = target_node
7562 # distribute new instance config to the other nodes
7563 self.cfg.Update(instance, self.feedback_fn)
7565 # Only start the instance if it's marked as up
7566 if instance.admin_up:
7567 self.feedback_fn("* activating the instance's disks on target node %s" %
7569 logging.info("Starting instance %s on node %s",
7570 instance.name, target_node)
7572 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7573 ignore_secondaries=True)
7575 _ShutdownInstanceDisks(self.lu, instance)
7576 raise errors.OpExecError("Can't activate the instance's disks")
7578 self.feedback_fn("* starting the instance on the target node %s" %
7580 result = self.rpc.call_instance_start(target_node, instance, None, None,
7582 msg = result.fail_msg
7584 _ShutdownInstanceDisks(self.lu, instance)
7585 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7586 (instance.name, target_node, msg))
7588 def Exec(self, feedback_fn):
7589 """Perform the migration.
7592 self.feedback_fn = feedback_fn
7593 self.source_node = self.instance.primary_node
7595 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7596 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7597 self.target_node = self.instance.secondary_nodes[0]
7598 # Otherwise self.target_node has been populated either
7599 # directly, or through an iallocator.
7601 self.all_nodes = [self.source_node, self.target_node]
7602 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7603 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7606 feedback_fn("Failover instance %s" % self.instance.name)
7607 self._ExecFailover()
7609 feedback_fn("Migrating instance %s" % self.instance.name)
7612 return self._ExecCleanup()
7614 return self._ExecMigration()
7617 def _CreateBlockDev(lu, node, instance, device, force_create,
7619 """Create a tree of block devices on a given node.
7621 If this device type has to be created on secondaries, create it and
7624 If not, just recurse to children keeping the same 'force' value.
7626 @param lu: the lu on whose behalf we execute
7627 @param node: the node on which to create the device
7628 @type instance: L{objects.Instance}
7629 @param instance: the instance which owns the device
7630 @type device: L{objects.Disk}
7631 @param device: the device to create
7632 @type force_create: boolean
7633 @param force_create: whether to force creation of this device; this
7634 will be change to True whenever we find a device which has
7635 CreateOnSecondary() attribute
7636 @param info: the extra 'metadata' we should attach to the device
7637 (this will be represented as a LVM tag)
7638 @type force_open: boolean
7639 @param force_open: this parameter will be passes to the
7640 L{backend.BlockdevCreate} function where it specifies
7641 whether we run on primary or not, and it affects both
7642 the child assembly and the device own Open() execution
7645 if device.CreateOnSecondary():
7649 for child in device.children:
7650 _CreateBlockDev(lu, node, instance, child, force_create,
7653 if not force_create:
7656 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7659 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7660 """Create a single block device on a given node.
7662 This will not recurse over children of the device, so they must be
7665 @param lu: the lu on whose behalf we execute
7666 @param node: the node on which to create the device
7667 @type instance: L{objects.Instance}
7668 @param instance: the instance which owns the device
7669 @type device: L{objects.Disk}
7670 @param device: the device to create
7671 @param info: the extra 'metadata' we should attach to the device
7672 (this will be represented as a LVM tag)
7673 @type force_open: boolean
7674 @param force_open: this parameter will be passes to the
7675 L{backend.BlockdevCreate} function where it specifies
7676 whether we run on primary or not, and it affects both
7677 the child assembly and the device own Open() execution
7680 lu.cfg.SetDiskID(device, node)
7681 result = lu.rpc.call_blockdev_create(node, device, device.size,
7682 instance.name, force_open, info)
7683 result.Raise("Can't create block device %s on"
7684 " node %s for instance %s" % (device, node, instance.name))
7685 if device.physical_id is None:
7686 device.physical_id = result.payload
7689 def _GenerateUniqueNames(lu, exts):
7690 """Generate a suitable LV name.
7692 This will generate a logical volume name for the given instance.
7697 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7698 results.append("%s%s" % (new_id, val))
7702 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7703 iv_name, p_minor, s_minor):
7704 """Generate a drbd8 device complete with its children.
7707 assert len(vgnames) == len(names) == 2
7708 port = lu.cfg.AllocatePort()
7709 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7710 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7711 logical_id=(vgnames[0], names[0]))
7712 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7713 logical_id=(vgnames[1], names[1]))
7714 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7715 logical_id=(primary, secondary, port,
7718 children=[dev_data, dev_meta],
7723 def _GenerateDiskTemplate(lu, template_name,
7724 instance_name, primary_node,
7725 secondary_nodes, disk_info,
7726 file_storage_dir, file_driver,
7727 base_index, feedback_fn):
7728 """Generate the entire disk layout for a given template type.
7731 #TODO: compute space requirements
7733 vgname = lu.cfg.GetVGName()
7734 disk_count = len(disk_info)
7736 if template_name == constants.DT_DISKLESS:
7738 elif template_name == constants.DT_PLAIN:
7739 if len(secondary_nodes) != 0:
7740 raise errors.ProgrammerError("Wrong template configuration")
7742 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7743 for i in range(disk_count)])
7744 for idx, disk in enumerate(disk_info):
7745 disk_index = idx + base_index
7746 vg = disk.get(constants.IDISK_VG, vgname)
7747 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7748 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7749 size=disk[constants.IDISK_SIZE],
7750 logical_id=(vg, names[idx]),
7751 iv_name="disk/%d" % disk_index,
7752 mode=disk[constants.IDISK_MODE])
7753 disks.append(disk_dev)
7754 elif template_name == constants.DT_DRBD8:
7755 if len(secondary_nodes) != 1:
7756 raise errors.ProgrammerError("Wrong template configuration")
7757 remote_node = secondary_nodes[0]
7758 minors = lu.cfg.AllocateDRBDMinor(
7759 [primary_node, remote_node] * len(disk_info), instance_name)
7762 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7763 for i in range(disk_count)]):
7764 names.append(lv_prefix + "_data")
7765 names.append(lv_prefix + "_meta")
7766 for idx, disk in enumerate(disk_info):
7767 disk_index = idx + base_index
7768 data_vg = disk.get(constants.IDISK_VG, vgname)
7769 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7770 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7771 disk[constants.IDISK_SIZE],
7773 names[idx * 2:idx * 2 + 2],
7774 "disk/%d" % disk_index,
7775 minors[idx * 2], minors[idx * 2 + 1])
7776 disk_dev.mode = disk[constants.IDISK_MODE]
7777 disks.append(disk_dev)
7778 elif template_name == constants.DT_FILE:
7779 if len(secondary_nodes) != 0:
7780 raise errors.ProgrammerError("Wrong template configuration")
7782 opcodes.RequireFileStorage()
7784 for idx, disk in enumerate(disk_info):
7785 disk_index = idx + base_index
7786 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7787 size=disk[constants.IDISK_SIZE],
7788 iv_name="disk/%d" % disk_index,
7789 logical_id=(file_driver,
7790 "%s/disk%d" % (file_storage_dir,
7792 mode=disk[constants.IDISK_MODE])
7793 disks.append(disk_dev)
7794 elif template_name == constants.DT_SHARED_FILE:
7795 if len(secondary_nodes) != 0:
7796 raise errors.ProgrammerError("Wrong template configuration")
7798 opcodes.RequireSharedFileStorage()
7800 for idx, disk in enumerate(disk_info):
7801 disk_index = idx + base_index
7802 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7803 size=disk[constants.IDISK_SIZE],
7804 iv_name="disk/%d" % disk_index,
7805 logical_id=(file_driver,
7806 "%s/disk%d" % (file_storage_dir,
7808 mode=disk[constants.IDISK_MODE])
7809 disks.append(disk_dev)
7810 elif template_name == constants.DT_BLOCK:
7811 if len(secondary_nodes) != 0:
7812 raise errors.ProgrammerError("Wrong template configuration")
7814 for idx, disk in enumerate(disk_info):
7815 disk_index = idx + base_index
7816 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7817 size=disk[constants.IDISK_SIZE],
7818 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7819 disk[constants.IDISK_ADOPT]),
7820 iv_name="disk/%d" % disk_index,
7821 mode=disk[constants.IDISK_MODE])
7822 disks.append(disk_dev)
7825 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7829 def _GetInstanceInfoText(instance):
7830 """Compute that text that should be added to the disk's metadata.
7833 return "originstname+%s" % instance.name
7836 def _CalcEta(time_taken, written, total_size):
7837 """Calculates the ETA based on size written and total size.
7839 @param time_taken: The time taken so far
7840 @param written: amount written so far
7841 @param total_size: The total size of data to be written
7842 @return: The remaining time in seconds
7845 avg_time = time_taken / float(written)
7846 return (total_size - written) * avg_time
7849 def _WipeDisks(lu, instance):
7850 """Wipes instance disks.
7852 @type lu: L{LogicalUnit}
7853 @param lu: the logical unit on whose behalf we execute
7854 @type instance: L{objects.Instance}
7855 @param instance: the instance whose disks we should create
7856 @return: the success of the wipe
7859 node = instance.primary_node
7861 for device in instance.disks:
7862 lu.cfg.SetDiskID(device, node)
7864 logging.info("Pause sync of instance %s disks", instance.name)
7865 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7867 for idx, success in enumerate(result.payload):
7869 logging.warn("pause-sync of instance %s for disks %d failed",
7873 for idx, device in enumerate(instance.disks):
7874 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7875 # MAX_WIPE_CHUNK at max
7876 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7877 constants.MIN_WIPE_CHUNK_PERCENT)
7878 # we _must_ make this an int, otherwise rounding errors will
7880 wipe_chunk_size = int(wipe_chunk_size)
7882 lu.LogInfo("* Wiping disk %d", idx)
7883 logging.info("Wiping disk %d for instance %s, node %s using"
7884 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7889 start_time = time.time()
7891 while offset < size:
7892 wipe_size = min(wipe_chunk_size, size - offset)
7893 logging.debug("Wiping disk %d, offset %s, chunk %s",
7894 idx, offset, wipe_size)
7895 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7896 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7897 (idx, offset, wipe_size))
7900 if now - last_output >= 60:
7901 eta = _CalcEta(now - start_time, offset, size)
7902 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7903 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7906 logging.info("Resume sync of instance %s disks", instance.name)
7908 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7910 for idx, success in enumerate(result.payload):
7912 lu.LogWarning("Resume sync of disk %d failed, please have a"
7913 " look at the status and troubleshoot the issue", idx)
7914 logging.warn("resume-sync of instance %s for disks %d failed",
7918 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7919 """Create all disks for an instance.
7921 This abstracts away some work from AddInstance.
7923 @type lu: L{LogicalUnit}
7924 @param lu: the logical unit on whose behalf we execute
7925 @type instance: L{objects.Instance}
7926 @param instance: the instance whose disks we should create
7928 @param to_skip: list of indices to skip
7929 @type target_node: string
7930 @param target_node: if passed, overrides the target node for creation
7932 @return: the success of the creation
7935 info = _GetInstanceInfoText(instance)
7936 if target_node is None:
7937 pnode = instance.primary_node
7938 all_nodes = instance.all_nodes
7943 if instance.disk_template in constants.DTS_FILEBASED:
7944 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7945 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7947 result.Raise("Failed to create directory '%s' on"
7948 " node %s" % (file_storage_dir, pnode))
7950 # Note: this needs to be kept in sync with adding of disks in
7951 # LUInstanceSetParams
7952 for idx, device in enumerate(instance.disks):
7953 if to_skip and idx in to_skip:
7955 logging.info("Creating volume %s for instance %s",
7956 device.iv_name, instance.name)
7958 for node in all_nodes:
7959 f_create = node == pnode
7960 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7963 def _RemoveDisks(lu, instance, target_node=None):
7964 """Remove all disks for an instance.
7966 This abstracts away some work from `AddInstance()` and
7967 `RemoveInstance()`. Note that in case some of the devices couldn't
7968 be removed, the removal will continue with the other ones (compare
7969 with `_CreateDisks()`).
7971 @type lu: L{LogicalUnit}
7972 @param lu: the logical unit on whose behalf we execute
7973 @type instance: L{objects.Instance}
7974 @param instance: the instance whose disks we should remove
7975 @type target_node: string
7976 @param target_node: used to override the node on which to remove the disks
7978 @return: the success of the removal
7981 logging.info("Removing block devices for instance %s", instance.name)
7984 for device in instance.disks:
7986 edata = [(target_node, device)]
7988 edata = device.ComputeNodeTree(instance.primary_node)
7989 for node, disk in edata:
7990 lu.cfg.SetDiskID(disk, node)
7991 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7993 lu.LogWarning("Could not remove block device %s on node %s,"
7994 " continuing anyway: %s", device.iv_name, node, msg)
7997 if instance.disk_template == constants.DT_FILE:
7998 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8002 tgt = instance.primary_node
8003 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8005 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8006 file_storage_dir, instance.primary_node, result.fail_msg)
8012 def _ComputeDiskSizePerVG(disk_template, disks):
8013 """Compute disk size requirements in the volume group
8016 def _compute(disks, payload):
8017 """Universal algorithm.
8022 vgs[disk[constants.IDISK_VG]] = \
8023 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8027 # Required free disk space as a function of disk and swap space
8029 constants.DT_DISKLESS: {},
8030 constants.DT_PLAIN: _compute(disks, 0),
8031 # 128 MB are added for drbd metadata for each disk
8032 constants.DT_DRBD8: _compute(disks, 128),
8033 constants.DT_FILE: {},
8034 constants.DT_SHARED_FILE: {},
8037 if disk_template not in req_size_dict:
8038 raise errors.ProgrammerError("Disk template '%s' size requirement"
8039 " is unknown" % disk_template)
8041 return req_size_dict[disk_template]
8044 def _ComputeDiskSize(disk_template, disks):
8045 """Compute disk size requirements in the volume group
8048 # Required free disk space as a function of disk and swap space
8050 constants.DT_DISKLESS: None,
8051 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8052 # 128 MB are added for drbd metadata for each disk
8053 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8054 constants.DT_FILE: None,
8055 constants.DT_SHARED_FILE: 0,
8056 constants.DT_BLOCK: 0,
8059 if disk_template not in req_size_dict:
8060 raise errors.ProgrammerError("Disk template '%s' size requirement"
8061 " is unknown" % disk_template)
8063 return req_size_dict[disk_template]
8066 def _FilterVmNodes(lu, nodenames):
8067 """Filters out non-vm_capable nodes from a list.
8069 @type lu: L{LogicalUnit}
8070 @param lu: the logical unit for which we check
8071 @type nodenames: list
8072 @param nodenames: the list of nodes on which we should check
8074 @return: the list of vm-capable nodes
8077 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8078 return [name for name in nodenames if name not in vm_nodes]
8081 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8082 """Hypervisor parameter validation.
8084 This function abstract the hypervisor parameter validation to be
8085 used in both instance create and instance modify.
8087 @type lu: L{LogicalUnit}
8088 @param lu: the logical unit for which we check
8089 @type nodenames: list
8090 @param nodenames: the list of nodes on which we should check
8091 @type hvname: string
8092 @param hvname: the name of the hypervisor we should use
8093 @type hvparams: dict
8094 @param hvparams: the parameters which we need to check
8095 @raise errors.OpPrereqError: if the parameters are not valid
8098 nodenames = _FilterVmNodes(lu, nodenames)
8099 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8102 for node in nodenames:
8106 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8109 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8110 """OS parameters validation.
8112 @type lu: L{LogicalUnit}
8113 @param lu: the logical unit for which we check
8114 @type required: boolean
8115 @param required: whether the validation should fail if the OS is not
8117 @type nodenames: list
8118 @param nodenames: the list of nodes on which we should check
8119 @type osname: string
8120 @param osname: the name of the hypervisor we should use
8121 @type osparams: dict
8122 @param osparams: the parameters which we need to check
8123 @raise errors.OpPrereqError: if the parameters are not valid
8126 nodenames = _FilterVmNodes(lu, nodenames)
8127 result = lu.rpc.call_os_validate(required, nodenames, osname,
8128 [constants.OS_VALIDATE_PARAMETERS],
8130 for node, nres in result.items():
8131 # we don't check for offline cases since this should be run only
8132 # against the master node and/or an instance's nodes
8133 nres.Raise("OS Parameters validation failed on node %s" % node)
8134 if not nres.payload:
8135 lu.LogInfo("OS %s not found on node %s, validation skipped",
8139 class LUInstanceCreate(LogicalUnit):
8140 """Create an instance.
8143 HPATH = "instance-add"
8144 HTYPE = constants.HTYPE_INSTANCE
8147 def CheckArguments(self):
8151 # do not require name_check to ease forward/backward compatibility
8153 if self.op.no_install and self.op.start:
8154 self.LogInfo("No-installation mode selected, disabling startup")
8155 self.op.start = False
8156 # validate/normalize the instance name
8157 self.op.instance_name = \
8158 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8160 if self.op.ip_check and not self.op.name_check:
8161 # TODO: make the ip check more flexible and not depend on the name check
8162 raise errors.OpPrereqError("Cannot do IP address check without a name"
8163 " check", errors.ECODE_INVAL)
8165 # check nics' parameter names
8166 for nic in self.op.nics:
8167 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8169 # check disks. parameter names and consistent adopt/no-adopt strategy
8170 has_adopt = has_no_adopt = False
8171 for disk in self.op.disks:
8172 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8173 if constants.IDISK_ADOPT in disk:
8177 if has_adopt and has_no_adopt:
8178 raise errors.OpPrereqError("Either all disks are adopted or none is",
8181 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8182 raise errors.OpPrereqError("Disk adoption is not supported for the"
8183 " '%s' disk template" %
8184 self.op.disk_template,
8186 if self.op.iallocator is not None:
8187 raise errors.OpPrereqError("Disk adoption not allowed with an"
8188 " iallocator script", errors.ECODE_INVAL)
8189 if self.op.mode == constants.INSTANCE_IMPORT:
8190 raise errors.OpPrereqError("Disk adoption not allowed for"
8191 " instance import", errors.ECODE_INVAL)
8193 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8194 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8195 " but no 'adopt' parameter given" %
8196 self.op.disk_template,
8199 self.adopt_disks = has_adopt
8201 # instance name verification
8202 if self.op.name_check:
8203 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8204 self.op.instance_name = self.hostname1.name
8205 # used in CheckPrereq for ip ping check
8206 self.check_ip = self.hostname1.ip
8208 self.check_ip = None
8210 # file storage checks
8211 if (self.op.file_driver and
8212 not self.op.file_driver in constants.FILE_DRIVER):
8213 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8214 self.op.file_driver, errors.ECODE_INVAL)
8216 if self.op.disk_template == constants.DT_FILE:
8217 opcodes.RequireFileStorage()
8218 elif self.op.disk_template == constants.DT_SHARED_FILE:
8219 opcodes.RequireSharedFileStorage()
8221 ### Node/iallocator related checks
8222 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8224 if self.op.pnode is not None:
8225 if self.op.disk_template in constants.DTS_INT_MIRROR:
8226 if self.op.snode is None:
8227 raise errors.OpPrereqError("The networked disk templates need"
8228 " a mirror node", errors.ECODE_INVAL)
8230 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8232 self.op.snode = None
8234 self._cds = _GetClusterDomainSecret()
8236 if self.op.mode == constants.INSTANCE_IMPORT:
8237 # On import force_variant must be True, because if we forced it at
8238 # initial install, our only chance when importing it back is that it
8240 self.op.force_variant = True
8242 if self.op.no_install:
8243 self.LogInfo("No-installation mode has no effect during import")
8245 elif self.op.mode == constants.INSTANCE_CREATE:
8246 if self.op.os_type is None:
8247 raise errors.OpPrereqError("No guest OS specified",
8249 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8250 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8251 " installation" % self.op.os_type,
8253 if self.op.disk_template is None:
8254 raise errors.OpPrereqError("No disk template specified",
8257 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8258 # Check handshake to ensure both clusters have the same domain secret
8259 src_handshake = self.op.source_handshake
8260 if not src_handshake:
8261 raise errors.OpPrereqError("Missing source handshake",
8264 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8267 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8270 # Load and check source CA
8271 self.source_x509_ca_pem = self.op.source_x509_ca
8272 if not self.source_x509_ca_pem:
8273 raise errors.OpPrereqError("Missing source X509 CA",
8277 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8279 except OpenSSL.crypto.Error, err:
8280 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8281 (err, ), errors.ECODE_INVAL)
8283 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8284 if errcode is not None:
8285 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8288 self.source_x509_ca = cert
8290 src_instance_name = self.op.source_instance_name
8291 if not src_instance_name:
8292 raise errors.OpPrereqError("Missing source instance name",
8295 self.source_instance_name = \
8296 netutils.GetHostname(name=src_instance_name).name
8299 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8300 self.op.mode, errors.ECODE_INVAL)
8302 def ExpandNames(self):
8303 """ExpandNames for CreateInstance.
8305 Figure out the right locks for instance creation.
8308 self.needed_locks = {}
8310 instance_name = self.op.instance_name
8311 # this is just a preventive check, but someone might still add this
8312 # instance in the meantime, and creation will fail at lock-add time
8313 if instance_name in self.cfg.GetInstanceList():
8314 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8315 instance_name, errors.ECODE_EXISTS)
8317 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8319 if self.op.iallocator:
8320 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8322 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8323 nodelist = [self.op.pnode]
8324 if self.op.snode is not None:
8325 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8326 nodelist.append(self.op.snode)
8327 self.needed_locks[locking.LEVEL_NODE] = nodelist
8329 # in case of import lock the source node too
8330 if self.op.mode == constants.INSTANCE_IMPORT:
8331 src_node = self.op.src_node
8332 src_path = self.op.src_path
8334 if src_path is None:
8335 self.op.src_path = src_path = self.op.instance_name
8337 if src_node is None:
8338 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8339 self.op.src_node = None
8340 if os.path.isabs(src_path):
8341 raise errors.OpPrereqError("Importing an instance from a path"
8342 " requires a source node option",
8345 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8346 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8347 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8348 if not os.path.isabs(src_path):
8349 self.op.src_path = src_path = \
8350 utils.PathJoin(constants.EXPORT_DIR, src_path)
8352 def _RunAllocator(self):
8353 """Run the allocator based on input opcode.
8356 nics = [n.ToDict() for n in self.nics]
8357 ial = IAllocator(self.cfg, self.rpc,
8358 mode=constants.IALLOCATOR_MODE_ALLOC,
8359 name=self.op.instance_name,
8360 disk_template=self.op.disk_template,
8363 vcpus=self.be_full[constants.BE_VCPUS],
8364 memory=self.be_full[constants.BE_MEMORY],
8367 hypervisor=self.op.hypervisor,
8370 ial.Run(self.op.iallocator)
8373 raise errors.OpPrereqError("Can't compute nodes using"
8374 " iallocator '%s': %s" %
8375 (self.op.iallocator, ial.info),
8377 if len(ial.result) != ial.required_nodes:
8378 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8379 " of nodes (%s), required %s" %
8380 (self.op.iallocator, len(ial.result),
8381 ial.required_nodes), errors.ECODE_FAULT)
8382 self.op.pnode = ial.result[0]
8383 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8384 self.op.instance_name, self.op.iallocator,
8385 utils.CommaJoin(ial.result))
8386 if ial.required_nodes == 2:
8387 self.op.snode = ial.result[1]
8389 def BuildHooksEnv(self):
8392 This runs on master, primary and secondary nodes of the instance.
8396 "ADD_MODE": self.op.mode,
8398 if self.op.mode == constants.INSTANCE_IMPORT:
8399 env["SRC_NODE"] = self.op.src_node
8400 env["SRC_PATH"] = self.op.src_path
8401 env["SRC_IMAGES"] = self.src_images
8403 env.update(_BuildInstanceHookEnv(
8404 name=self.op.instance_name,
8405 primary_node=self.op.pnode,
8406 secondary_nodes=self.secondaries,
8407 status=self.op.start,
8408 os_type=self.op.os_type,
8409 memory=self.be_full[constants.BE_MEMORY],
8410 vcpus=self.be_full[constants.BE_VCPUS],
8411 nics=_NICListToTuple(self, self.nics),
8412 disk_template=self.op.disk_template,
8413 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8414 for d in self.disks],
8417 hypervisor_name=self.op.hypervisor,
8423 def BuildHooksNodes(self):
8424 """Build hooks nodes.
8427 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8430 def _ReadExportInfo(self):
8431 """Reads the export information from disk.
8433 It will override the opcode source node and path with the actual
8434 information, if these two were not specified before.
8436 @return: the export information
8439 assert self.op.mode == constants.INSTANCE_IMPORT
8441 src_node = self.op.src_node
8442 src_path = self.op.src_path
8444 if src_node is None:
8445 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8446 exp_list = self.rpc.call_export_list(locked_nodes)
8448 for node in exp_list:
8449 if exp_list[node].fail_msg:
8451 if src_path in exp_list[node].payload:
8453 self.op.src_node = src_node = node
8454 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8458 raise errors.OpPrereqError("No export found for relative path %s" %
8459 src_path, errors.ECODE_INVAL)
8461 _CheckNodeOnline(self, src_node)
8462 result = self.rpc.call_export_info(src_node, src_path)
8463 result.Raise("No export or invalid export found in dir %s" % src_path)
8465 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8466 if not export_info.has_section(constants.INISECT_EXP):
8467 raise errors.ProgrammerError("Corrupted export config",
8468 errors.ECODE_ENVIRON)
8470 ei_version = export_info.get(constants.INISECT_EXP, "version")
8471 if (int(ei_version) != constants.EXPORT_VERSION):
8472 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8473 (ei_version, constants.EXPORT_VERSION),
8474 errors.ECODE_ENVIRON)
8477 def _ReadExportParams(self, einfo):
8478 """Use export parameters as defaults.
8480 In case the opcode doesn't specify (as in override) some instance
8481 parameters, then try to use them from the export information, if
8485 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8487 if self.op.disk_template is None:
8488 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8489 self.op.disk_template = einfo.get(constants.INISECT_INS,
8492 raise errors.OpPrereqError("No disk template specified and the export"
8493 " is missing the disk_template information",
8496 if not self.op.disks:
8497 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8499 # TODO: import the disk iv_name too
8500 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8501 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8502 disks.append({constants.IDISK_SIZE: disk_sz})
8503 self.op.disks = disks
8505 raise errors.OpPrereqError("No disk info specified and the export"
8506 " is missing the disk information",
8509 if (not self.op.nics and
8510 einfo.has_option(constants.INISECT_INS, "nic_count")):
8512 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8514 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8515 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8520 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8521 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8523 if (self.op.hypervisor is None and
8524 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8525 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8527 if einfo.has_section(constants.INISECT_HYP):
8528 # use the export parameters but do not override the ones
8529 # specified by the user
8530 for name, value in einfo.items(constants.INISECT_HYP):
8531 if name not in self.op.hvparams:
8532 self.op.hvparams[name] = value
8534 if einfo.has_section(constants.INISECT_BEP):
8535 # use the parameters, without overriding
8536 for name, value in einfo.items(constants.INISECT_BEP):
8537 if name not in self.op.beparams:
8538 self.op.beparams[name] = value
8540 # try to read the parameters old style, from the main section
8541 for name in constants.BES_PARAMETERS:
8542 if (name not in self.op.beparams and
8543 einfo.has_option(constants.INISECT_INS, name)):
8544 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8546 if einfo.has_section(constants.INISECT_OSP):
8547 # use the parameters, without overriding
8548 for name, value in einfo.items(constants.INISECT_OSP):
8549 if name not in self.op.osparams:
8550 self.op.osparams[name] = value
8552 def _RevertToDefaults(self, cluster):
8553 """Revert the instance parameters to the default values.
8557 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8558 for name in self.op.hvparams.keys():
8559 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8560 del self.op.hvparams[name]
8562 be_defs = cluster.SimpleFillBE({})
8563 for name in self.op.beparams.keys():
8564 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8565 del self.op.beparams[name]
8567 nic_defs = cluster.SimpleFillNIC({})
8568 for nic in self.op.nics:
8569 for name in constants.NICS_PARAMETERS:
8570 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8573 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8574 for name in self.op.osparams.keys():
8575 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8576 del self.op.osparams[name]
8578 def _CalculateFileStorageDir(self):
8579 """Calculate final instance file storage dir.
8582 # file storage dir calculation/check
8583 self.instance_file_storage_dir = None
8584 if self.op.disk_template in constants.DTS_FILEBASED:
8585 # build the full file storage dir path
8588 if self.op.disk_template == constants.DT_SHARED_FILE:
8589 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8591 get_fsd_fn = self.cfg.GetFileStorageDir
8593 cfg_storagedir = get_fsd_fn()
8594 if not cfg_storagedir:
8595 raise errors.OpPrereqError("Cluster file storage dir not defined")
8596 joinargs.append(cfg_storagedir)
8598 if self.op.file_storage_dir is not None:
8599 joinargs.append(self.op.file_storage_dir)
8601 joinargs.append(self.op.instance_name)
8603 # pylint: disable=W0142
8604 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8606 def CheckPrereq(self):
8607 """Check prerequisites.
8610 self._CalculateFileStorageDir()
8612 if self.op.mode == constants.INSTANCE_IMPORT:
8613 export_info = self._ReadExportInfo()
8614 self._ReadExportParams(export_info)
8616 if (not self.cfg.GetVGName() and
8617 self.op.disk_template not in constants.DTS_NOT_LVM):
8618 raise errors.OpPrereqError("Cluster does not support lvm-based"
8619 " instances", errors.ECODE_STATE)
8621 if self.op.hypervisor is None:
8622 self.op.hypervisor = self.cfg.GetHypervisorType()
8624 cluster = self.cfg.GetClusterInfo()
8625 enabled_hvs = cluster.enabled_hypervisors
8626 if self.op.hypervisor not in enabled_hvs:
8627 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8628 " cluster (%s)" % (self.op.hypervisor,
8629 ",".join(enabled_hvs)),
8632 # Check tag validity
8633 for tag in self.op.tags:
8634 objects.TaggableObject.ValidateTag(tag)
8636 # check hypervisor parameter syntax (locally)
8637 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8638 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8640 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8641 hv_type.CheckParameterSyntax(filled_hvp)
8642 self.hv_full = filled_hvp
8643 # check that we don't specify global parameters on an instance
8644 _CheckGlobalHvParams(self.op.hvparams)
8646 # fill and remember the beparams dict
8647 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8648 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8650 # build os parameters
8651 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8653 # now that hvp/bep are in final format, let's reset to defaults,
8655 if self.op.identify_defaults:
8656 self._RevertToDefaults(cluster)
8660 for idx, nic in enumerate(self.op.nics):
8661 nic_mode_req = nic.get(constants.INIC_MODE, None)
8662 nic_mode = nic_mode_req
8663 if nic_mode is None:
8664 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8666 # in routed mode, for the first nic, the default ip is 'auto'
8667 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8668 default_ip_mode = constants.VALUE_AUTO
8670 default_ip_mode = constants.VALUE_NONE
8672 # ip validity checks
8673 ip = nic.get(constants.INIC_IP, default_ip_mode)
8674 if ip is None or ip.lower() == constants.VALUE_NONE:
8676 elif ip.lower() == constants.VALUE_AUTO:
8677 if not self.op.name_check:
8678 raise errors.OpPrereqError("IP address set to auto but name checks"
8679 " have been skipped",
8681 nic_ip = self.hostname1.ip
8683 if not netutils.IPAddress.IsValid(ip):
8684 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8688 # TODO: check the ip address for uniqueness
8689 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8690 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8693 # MAC address verification
8694 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8695 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8696 mac = utils.NormalizeAndValidateMac(mac)
8699 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8700 except errors.ReservationError:
8701 raise errors.OpPrereqError("MAC address %s already in use"
8702 " in cluster" % mac,
8703 errors.ECODE_NOTUNIQUE)
8705 # Build nic parameters
8706 link = nic.get(constants.INIC_LINK, None)
8709 nicparams[constants.NIC_MODE] = nic_mode_req
8711 nicparams[constants.NIC_LINK] = link
8713 check_params = cluster.SimpleFillNIC(nicparams)
8714 objects.NIC.CheckParameterSyntax(check_params)
8715 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8717 # disk checks/pre-build
8718 default_vg = self.cfg.GetVGName()
8720 for disk in self.op.disks:
8721 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8722 if mode not in constants.DISK_ACCESS_SET:
8723 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8724 mode, errors.ECODE_INVAL)
8725 size = disk.get(constants.IDISK_SIZE, None)
8727 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8730 except (TypeError, ValueError):
8731 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8734 data_vg = disk.get(constants.IDISK_VG, default_vg)
8736 constants.IDISK_SIZE: size,
8737 constants.IDISK_MODE: mode,
8738 constants.IDISK_VG: data_vg,
8739 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8741 if constants.IDISK_ADOPT in disk:
8742 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8743 self.disks.append(new_disk)
8745 if self.op.mode == constants.INSTANCE_IMPORT:
8747 # Check that the new instance doesn't have less disks than the export
8748 instance_disks = len(self.disks)
8749 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8750 if instance_disks < export_disks:
8751 raise errors.OpPrereqError("Not enough disks to import."
8752 " (instance: %d, export: %d)" %
8753 (instance_disks, export_disks),
8757 for idx in range(export_disks):
8758 option = "disk%d_dump" % idx
8759 if export_info.has_option(constants.INISECT_INS, option):
8760 # FIXME: are the old os-es, disk sizes, etc. useful?
8761 export_name = export_info.get(constants.INISECT_INS, option)
8762 image = utils.PathJoin(self.op.src_path, export_name)
8763 disk_images.append(image)
8765 disk_images.append(False)
8767 self.src_images = disk_images
8769 old_name = export_info.get(constants.INISECT_INS, "name")
8771 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8772 except (TypeError, ValueError), err:
8773 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8774 " an integer: %s" % str(err),
8776 if self.op.instance_name == old_name:
8777 for idx, nic in enumerate(self.nics):
8778 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8779 nic_mac_ini = "nic%d_mac" % idx
8780 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8782 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8784 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8785 if self.op.ip_check:
8786 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8787 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8788 (self.check_ip, self.op.instance_name),
8789 errors.ECODE_NOTUNIQUE)
8791 #### mac address generation
8792 # By generating here the mac address both the allocator and the hooks get
8793 # the real final mac address rather than the 'auto' or 'generate' value.
8794 # There is a race condition between the generation and the instance object
8795 # creation, which means that we know the mac is valid now, but we're not
8796 # sure it will be when we actually add the instance. If things go bad
8797 # adding the instance will abort because of a duplicate mac, and the
8798 # creation job will fail.
8799 for nic in self.nics:
8800 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8801 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8805 if self.op.iallocator is not None:
8806 self._RunAllocator()
8808 #### node related checks
8810 # check primary node
8811 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8812 assert self.pnode is not None, \
8813 "Cannot retrieve locked node %s" % self.op.pnode
8815 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8816 pnode.name, errors.ECODE_STATE)
8818 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8819 pnode.name, errors.ECODE_STATE)
8820 if not pnode.vm_capable:
8821 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8822 " '%s'" % pnode.name, errors.ECODE_STATE)
8824 self.secondaries = []
8826 # mirror node verification
8827 if self.op.disk_template in constants.DTS_INT_MIRROR:
8828 if self.op.snode == pnode.name:
8829 raise errors.OpPrereqError("The secondary node cannot be the"
8830 " primary node", errors.ECODE_INVAL)
8831 _CheckNodeOnline(self, self.op.snode)
8832 _CheckNodeNotDrained(self, self.op.snode)
8833 _CheckNodeVmCapable(self, self.op.snode)
8834 self.secondaries.append(self.op.snode)
8836 nodenames = [pnode.name] + self.secondaries
8838 if not self.adopt_disks:
8839 # Check lv size requirements, if not adopting
8840 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8841 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8843 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8844 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8845 disk[constants.IDISK_ADOPT])
8846 for disk in self.disks])
8847 if len(all_lvs) != len(self.disks):
8848 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8850 for lv_name in all_lvs:
8852 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8853 # to ReserveLV uses the same syntax
8854 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8855 except errors.ReservationError:
8856 raise errors.OpPrereqError("LV named %s used by another instance" %
8857 lv_name, errors.ECODE_NOTUNIQUE)
8859 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8860 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8862 node_lvs = self.rpc.call_lv_list([pnode.name],
8863 vg_names.payload.keys())[pnode.name]
8864 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8865 node_lvs = node_lvs.payload
8867 delta = all_lvs.difference(node_lvs.keys())
8869 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8870 utils.CommaJoin(delta),
8872 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8874 raise errors.OpPrereqError("Online logical volumes found, cannot"
8875 " adopt: %s" % utils.CommaJoin(online_lvs),
8877 # update the size of disk based on what is found
8878 for dsk in self.disks:
8879 dsk[constants.IDISK_SIZE] = \
8880 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8881 dsk[constants.IDISK_ADOPT])][0]))
8883 elif self.op.disk_template == constants.DT_BLOCK:
8884 # Normalize and de-duplicate device paths
8885 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8886 for disk in self.disks])
8887 if len(all_disks) != len(self.disks):
8888 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8890 baddisks = [d for d in all_disks
8891 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8893 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8894 " cannot be adopted" %
8895 (", ".join(baddisks),
8896 constants.ADOPTABLE_BLOCKDEV_ROOT),
8899 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8900 list(all_disks))[pnode.name]
8901 node_disks.Raise("Cannot get block device information from node %s" %
8903 node_disks = node_disks.payload
8904 delta = all_disks.difference(node_disks.keys())
8906 raise errors.OpPrereqError("Missing block device(s): %s" %
8907 utils.CommaJoin(delta),
8909 for dsk in self.disks:
8910 dsk[constants.IDISK_SIZE] = \
8911 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8913 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8915 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8916 # check OS parameters (remotely)
8917 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8919 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8921 # memory check on primary node
8923 _CheckNodeFreeMemory(self, self.pnode.name,
8924 "creating instance %s" % self.op.instance_name,
8925 self.be_full[constants.BE_MEMORY],
8928 self.dry_run_result = list(nodenames)
8930 def Exec(self, feedback_fn):
8931 """Create and add the instance to the cluster.
8934 instance = self.op.instance_name
8935 pnode_name = self.pnode.name
8937 ht_kind = self.op.hypervisor
8938 if ht_kind in constants.HTS_REQ_PORT:
8939 network_port = self.cfg.AllocatePort()
8943 disks = _GenerateDiskTemplate(self,
8944 self.op.disk_template,
8945 instance, pnode_name,
8948 self.instance_file_storage_dir,
8949 self.op.file_driver,
8953 iobj = objects.Instance(name=instance, os=self.op.os_type,
8954 primary_node=pnode_name,
8955 nics=self.nics, disks=disks,
8956 disk_template=self.op.disk_template,
8958 network_port=network_port,
8959 beparams=self.op.beparams,
8960 hvparams=self.op.hvparams,
8961 hypervisor=self.op.hypervisor,
8962 osparams=self.op.osparams,
8966 for tag in self.op.tags:
8969 if self.adopt_disks:
8970 if self.op.disk_template == constants.DT_PLAIN:
8971 # rename LVs to the newly-generated names; we need to construct
8972 # 'fake' LV disks with the old data, plus the new unique_id
8973 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8975 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8976 rename_to.append(t_dsk.logical_id)
8977 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8978 self.cfg.SetDiskID(t_dsk, pnode_name)
8979 result = self.rpc.call_blockdev_rename(pnode_name,
8980 zip(tmp_disks, rename_to))
8981 result.Raise("Failed to rename adoped LVs")
8983 feedback_fn("* creating instance disks...")
8985 _CreateDisks(self, iobj)
8986 except errors.OpExecError:
8987 self.LogWarning("Device creation failed, reverting...")
8989 _RemoveDisks(self, iobj)
8991 self.cfg.ReleaseDRBDMinors(instance)
8994 feedback_fn("adding instance %s to cluster config" % instance)
8996 self.cfg.AddInstance(iobj, self.proc.GetECId())
8998 # Declare that we don't want to remove the instance lock anymore, as we've
8999 # added the instance to the config
9000 del self.remove_locks[locking.LEVEL_INSTANCE]
9002 if self.op.mode == constants.INSTANCE_IMPORT:
9003 # Release unused nodes
9004 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9007 _ReleaseLocks(self, locking.LEVEL_NODE)
9010 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9011 feedback_fn("* wiping instance disks...")
9013 _WipeDisks(self, iobj)
9014 except errors.OpExecError, err:
9015 logging.exception("Wiping disks failed")
9016 self.LogWarning("Wiping instance disks failed (%s)", err)
9020 # Something is already wrong with the disks, don't do anything else
9022 elif self.op.wait_for_sync:
9023 disk_abort = not _WaitForSync(self, iobj)
9024 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9025 # make sure the disks are not degraded (still sync-ing is ok)
9026 feedback_fn("* checking mirrors status")
9027 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9032 _RemoveDisks(self, iobj)
9033 self.cfg.RemoveInstance(iobj.name)
9034 # Make sure the instance lock gets removed
9035 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9036 raise errors.OpExecError("There are some degraded disks for"
9039 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9040 if self.op.mode == constants.INSTANCE_CREATE:
9041 if not self.op.no_install:
9042 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9043 not self.op.wait_for_sync)
9045 feedback_fn("* pausing disk sync to install instance OS")
9046 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9048 for idx, success in enumerate(result.payload):
9050 logging.warn("pause-sync of instance %s for disk %d failed",
9053 feedback_fn("* running the instance OS create scripts...")
9054 # FIXME: pass debug option from opcode to backend
9056 self.rpc.call_instance_os_add(pnode_name, iobj, False,
9057 self.op.debug_level)
9059 feedback_fn("* resuming disk sync")
9060 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9062 for idx, success in enumerate(result.payload):
9064 logging.warn("resume-sync of instance %s for disk %d failed",
9067 os_add_result.Raise("Could not add os for instance %s"
9068 " on node %s" % (instance, pnode_name))
9070 elif self.op.mode == constants.INSTANCE_IMPORT:
9071 feedback_fn("* running the instance OS import scripts...")
9075 for idx, image in enumerate(self.src_images):
9079 # FIXME: pass debug option from opcode to backend
9080 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9081 constants.IEIO_FILE, (image, ),
9082 constants.IEIO_SCRIPT,
9083 (iobj.disks[idx], idx),
9085 transfers.append(dt)
9088 masterd.instance.TransferInstanceData(self, feedback_fn,
9089 self.op.src_node, pnode_name,
9090 self.pnode.secondary_ip,
9092 if not compat.all(import_result):
9093 self.LogWarning("Some disks for instance %s on node %s were not"
9094 " imported successfully" % (instance, pnode_name))
9096 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9097 feedback_fn("* preparing remote import...")
9098 # The source cluster will stop the instance before attempting to make a
9099 # connection. In some cases stopping an instance can take a long time,
9100 # hence the shutdown timeout is added to the connection timeout.
9101 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9102 self.op.source_shutdown_timeout)
9103 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9105 assert iobj.primary_node == self.pnode.name
9107 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9108 self.source_x509_ca,
9109 self._cds, timeouts)
9110 if not compat.all(disk_results):
9111 # TODO: Should the instance still be started, even if some disks
9112 # failed to import (valid for local imports, too)?
9113 self.LogWarning("Some disks for instance %s on node %s were not"
9114 " imported successfully" % (instance, pnode_name))
9116 # Run rename script on newly imported instance
9117 assert iobj.name == instance
9118 feedback_fn("Running rename script for %s" % instance)
9119 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9120 self.source_instance_name,
9121 self.op.debug_level)
9123 self.LogWarning("Failed to run rename script for %s on node"
9124 " %s: %s" % (instance, pnode_name, result.fail_msg))
9127 # also checked in the prereq part
9128 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9132 iobj.admin_up = True
9133 self.cfg.Update(iobj, feedback_fn)
9134 logging.info("Starting instance %s on node %s", instance, pnode_name)
9135 feedback_fn("* starting instance...")
9136 result = self.rpc.call_instance_start(pnode_name, iobj,
9138 result.Raise("Could not start instance")
9140 return list(iobj.all_nodes)
9143 class LUInstanceConsole(NoHooksLU):
9144 """Connect to an instance's console.
9146 This is somewhat special in that it returns the command line that
9147 you need to run on the master node in order to connect to the
9153 def ExpandNames(self):
9154 self._ExpandAndLockInstance()
9156 def CheckPrereq(self):
9157 """Check prerequisites.
9159 This checks that the instance is in the cluster.
9162 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9163 assert self.instance is not None, \
9164 "Cannot retrieve locked instance %s" % self.op.instance_name
9165 _CheckNodeOnline(self, self.instance.primary_node)
9167 def Exec(self, feedback_fn):
9168 """Connect to the console of an instance
9171 instance = self.instance
9172 node = instance.primary_node
9174 node_insts = self.rpc.call_instance_list([node],
9175 [instance.hypervisor])[node]
9176 node_insts.Raise("Can't get node information from %s" % node)
9178 if instance.name not in node_insts.payload:
9179 if instance.admin_up:
9180 state = constants.INSTST_ERRORDOWN
9182 state = constants.INSTST_ADMINDOWN
9183 raise errors.OpExecError("Instance %s is not running (state %s)" %
9184 (instance.name, state))
9186 logging.debug("Connecting to console of %s on %s", instance.name, node)
9188 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9191 def _GetInstanceConsole(cluster, instance):
9192 """Returns console information for an instance.
9194 @type cluster: L{objects.Cluster}
9195 @type instance: L{objects.Instance}
9199 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9200 # beparams and hvparams are passed separately, to avoid editing the
9201 # instance and then saving the defaults in the instance itself.
9202 hvparams = cluster.FillHV(instance)
9203 beparams = cluster.FillBE(instance)
9204 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9206 assert console.instance == instance.name
9207 assert console.Validate()
9209 return console.ToDict()
9212 class LUInstanceReplaceDisks(LogicalUnit):
9213 """Replace the disks of an instance.
9216 HPATH = "mirrors-replace"
9217 HTYPE = constants.HTYPE_INSTANCE
9220 def CheckArguments(self):
9221 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9224 def ExpandNames(self):
9225 self._ExpandAndLockInstance()
9227 assert locking.LEVEL_NODE not in self.needed_locks
9228 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9230 assert self.op.iallocator is None or self.op.remote_node is None, \
9231 "Conflicting options"
9233 if self.op.remote_node is not None:
9234 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9236 # Warning: do not remove the locking of the new secondary here
9237 # unless DRBD8.AddChildren is changed to work in parallel;
9238 # currently it doesn't since parallel invocations of
9239 # FindUnusedMinor will conflict
9240 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9241 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9243 self.needed_locks[locking.LEVEL_NODE] = []
9244 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9246 if self.op.iallocator is not None:
9247 # iallocator will select a new node in the same group
9248 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9250 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9251 self.op.iallocator, self.op.remote_node,
9252 self.op.disks, False, self.op.early_release)
9254 self.tasklets = [self.replacer]
9256 def DeclareLocks(self, level):
9257 if level == locking.LEVEL_NODEGROUP:
9258 assert self.op.remote_node is None
9259 assert self.op.iallocator is not None
9260 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9262 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9263 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9264 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9266 elif level == locking.LEVEL_NODE:
9267 if self.op.iallocator is not None:
9268 assert self.op.remote_node is None
9269 assert not self.needed_locks[locking.LEVEL_NODE]
9271 # Lock member nodes of all locked groups
9272 self.needed_locks[locking.LEVEL_NODE] = [node_name
9273 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9274 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9276 self._LockInstancesNodes()
9278 def BuildHooksEnv(self):
9281 This runs on the master, the primary and all the secondaries.
9284 instance = self.replacer.instance
9286 "MODE": self.op.mode,
9287 "NEW_SECONDARY": self.op.remote_node,
9288 "OLD_SECONDARY": instance.secondary_nodes[0],
9290 env.update(_BuildInstanceHookEnvByObject(self, instance))
9293 def BuildHooksNodes(self):
9294 """Build hooks nodes.
9297 instance = self.replacer.instance
9299 self.cfg.GetMasterNode(),
9300 instance.primary_node,
9302 if self.op.remote_node is not None:
9303 nl.append(self.op.remote_node)
9306 def CheckPrereq(self):
9307 """Check prerequisites.
9310 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9311 self.op.iallocator is None)
9313 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9315 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9317 return LogicalUnit.CheckPrereq(self)
9320 class TLReplaceDisks(Tasklet):
9321 """Replaces disks for an instance.
9323 Note: Locking is not within the scope of this class.
9326 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9327 disks, delay_iallocator, early_release):
9328 """Initializes this class.
9331 Tasklet.__init__(self, lu)
9334 self.instance_name = instance_name
9336 self.iallocator_name = iallocator_name
9337 self.remote_node = remote_node
9339 self.delay_iallocator = delay_iallocator
9340 self.early_release = early_release
9343 self.instance = None
9344 self.new_node = None
9345 self.target_node = None
9346 self.other_node = None
9347 self.remote_node_info = None
9348 self.node_secondary_ip = None
9351 def CheckArguments(mode, remote_node, iallocator):
9352 """Helper function for users of this class.
9355 # check for valid parameter combination
9356 if mode == constants.REPLACE_DISK_CHG:
9357 if remote_node is None and iallocator is None:
9358 raise errors.OpPrereqError("When changing the secondary either an"
9359 " iallocator script must be used or the"
9360 " new node given", errors.ECODE_INVAL)
9362 if remote_node is not None and iallocator is not None:
9363 raise errors.OpPrereqError("Give either the iallocator or the new"
9364 " secondary, not both", errors.ECODE_INVAL)
9366 elif remote_node is not None or iallocator is not None:
9367 # Not replacing the secondary
9368 raise errors.OpPrereqError("The iallocator and new node options can"
9369 " only be used when changing the"
9370 " secondary node", errors.ECODE_INVAL)
9373 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9374 """Compute a new secondary node using an IAllocator.
9377 ial = IAllocator(lu.cfg, lu.rpc,
9378 mode=constants.IALLOCATOR_MODE_RELOC,
9380 relocate_from=list(relocate_from))
9382 ial.Run(iallocator_name)
9385 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9386 " %s" % (iallocator_name, ial.info),
9389 if len(ial.result) != ial.required_nodes:
9390 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9391 " of nodes (%s), required %s" %
9393 len(ial.result), ial.required_nodes),
9396 remote_node_name = ial.result[0]
9398 lu.LogInfo("Selected new secondary for instance '%s': %s",
9399 instance_name, remote_node_name)
9401 return remote_node_name
9403 def _FindFaultyDisks(self, node_name):
9404 """Wrapper for L{_FindFaultyInstanceDisks}.
9407 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9410 def _CheckDisksActivated(self, instance):
9411 """Checks if the instance disks are activated.
9413 @param instance: The instance to check disks
9414 @return: True if they are activated, False otherwise
9417 nodes = instance.all_nodes
9419 for idx, dev in enumerate(instance.disks):
9421 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9422 self.cfg.SetDiskID(dev, node)
9424 result = self.rpc.call_blockdev_find(node, dev)
9428 elif result.fail_msg or not result.payload:
9433 def CheckPrereq(self):
9434 """Check prerequisites.
9436 This checks that the instance is in the cluster.
9439 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9440 assert instance is not None, \
9441 "Cannot retrieve locked instance %s" % self.instance_name
9443 if instance.disk_template != constants.DT_DRBD8:
9444 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9445 " instances", errors.ECODE_INVAL)
9447 if len(instance.secondary_nodes) != 1:
9448 raise errors.OpPrereqError("The instance has a strange layout,"
9449 " expected one secondary but found %d" %
9450 len(instance.secondary_nodes),
9453 if not self.delay_iallocator:
9454 self._CheckPrereq2()
9456 def _CheckPrereq2(self):
9457 """Check prerequisites, second part.
9459 This function should always be part of CheckPrereq. It was separated and is
9460 now called from Exec because during node evacuation iallocator was only
9461 called with an unmodified cluster model, not taking planned changes into
9465 instance = self.instance
9466 secondary_node = instance.secondary_nodes[0]
9468 if self.iallocator_name is None:
9469 remote_node = self.remote_node
9471 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9472 instance.name, instance.secondary_nodes)
9474 if remote_node is None:
9475 self.remote_node_info = None
9477 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9478 "Remote node '%s' is not locked" % remote_node
9480 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9481 assert self.remote_node_info is not None, \
9482 "Cannot retrieve locked node %s" % remote_node
9484 if remote_node == self.instance.primary_node:
9485 raise errors.OpPrereqError("The specified node is the primary node of"
9486 " the instance", errors.ECODE_INVAL)
9488 if remote_node == secondary_node:
9489 raise errors.OpPrereqError("The specified node is already the"
9490 " secondary node of the instance",
9493 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9494 constants.REPLACE_DISK_CHG):
9495 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9498 if self.mode == constants.REPLACE_DISK_AUTO:
9499 if not self._CheckDisksActivated(instance):
9500 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9501 " first" % self.instance_name,
9503 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9504 faulty_secondary = self._FindFaultyDisks(secondary_node)
9506 if faulty_primary and faulty_secondary:
9507 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9508 " one node and can not be repaired"
9509 " automatically" % self.instance_name,
9513 self.disks = faulty_primary
9514 self.target_node = instance.primary_node
9515 self.other_node = secondary_node
9516 check_nodes = [self.target_node, self.other_node]
9517 elif faulty_secondary:
9518 self.disks = faulty_secondary
9519 self.target_node = secondary_node
9520 self.other_node = instance.primary_node
9521 check_nodes = [self.target_node, self.other_node]
9527 # Non-automatic modes
9528 if self.mode == constants.REPLACE_DISK_PRI:
9529 self.target_node = instance.primary_node
9530 self.other_node = secondary_node
9531 check_nodes = [self.target_node, self.other_node]
9533 elif self.mode == constants.REPLACE_DISK_SEC:
9534 self.target_node = secondary_node
9535 self.other_node = instance.primary_node
9536 check_nodes = [self.target_node, self.other_node]
9538 elif self.mode == constants.REPLACE_DISK_CHG:
9539 self.new_node = remote_node
9540 self.other_node = instance.primary_node
9541 self.target_node = secondary_node
9542 check_nodes = [self.new_node, self.other_node]
9544 _CheckNodeNotDrained(self.lu, remote_node)
9545 _CheckNodeVmCapable(self.lu, remote_node)
9547 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9548 assert old_node_info is not None
9549 if old_node_info.offline and not self.early_release:
9550 # doesn't make sense to delay the release
9551 self.early_release = True
9552 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9553 " early-release mode", secondary_node)
9556 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9559 # If not specified all disks should be replaced
9561 self.disks = range(len(self.instance.disks))
9563 for node in check_nodes:
9564 _CheckNodeOnline(self.lu, node)
9566 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9569 if node_name is not None)
9571 # Release unneeded node locks
9572 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9574 # Release any owned node group
9575 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9576 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9578 # Check whether disks are valid
9579 for disk_idx in self.disks:
9580 instance.FindDisk(disk_idx)
9582 # Get secondary node IP addresses
9583 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9584 in self.cfg.GetMultiNodeInfo(touched_nodes))
9586 def Exec(self, feedback_fn):
9587 """Execute disk replacement.
9589 This dispatches the disk replacement to the appropriate handler.
9592 if self.delay_iallocator:
9593 self._CheckPrereq2()
9596 # Verify owned locks before starting operation
9597 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9598 assert set(owned_nodes) == set(self.node_secondary_ip), \
9599 ("Incorrect node locks, owning %s, expected %s" %
9600 (owned_nodes, self.node_secondary_ip.keys()))
9602 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9603 assert list(owned_instances) == [self.instance_name], \
9604 "Instance '%s' not locked" % self.instance_name
9606 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9607 "Should not own any node group lock at this point"
9610 feedback_fn("No disks need replacement")
9613 feedback_fn("Replacing disk(s) %s for %s" %
9614 (utils.CommaJoin(self.disks), self.instance.name))
9616 activate_disks = (not self.instance.admin_up)
9618 # Activate the instance disks if we're replacing them on a down instance
9620 _StartInstanceDisks(self.lu, self.instance, True)
9623 # Should we replace the secondary node?
9624 if self.new_node is not None:
9625 fn = self._ExecDrbd8Secondary
9627 fn = self._ExecDrbd8DiskOnly
9629 result = fn(feedback_fn)
9631 # Deactivate the instance disks if we're replacing them on a
9634 _SafeShutdownInstanceDisks(self.lu, self.instance)
9637 # Verify owned locks
9638 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9639 nodes = frozenset(self.node_secondary_ip)
9640 assert ((self.early_release and not owned_nodes) or
9641 (not self.early_release and not (set(owned_nodes) - nodes))), \
9642 ("Not owning the correct locks, early_release=%s, owned=%r,"
9643 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9647 def _CheckVolumeGroup(self, nodes):
9648 self.lu.LogInfo("Checking volume groups")
9650 vgname = self.cfg.GetVGName()
9652 # Make sure volume group exists on all involved nodes
9653 results = self.rpc.call_vg_list(nodes)
9655 raise errors.OpExecError("Can't list volume groups on the nodes")
9659 res.Raise("Error checking node %s" % node)
9660 if vgname not in res.payload:
9661 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9664 def _CheckDisksExistence(self, nodes):
9665 # Check disk existence
9666 for idx, dev in enumerate(self.instance.disks):
9667 if idx not in self.disks:
9671 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9672 self.cfg.SetDiskID(dev, node)
9674 result = self.rpc.call_blockdev_find(node, dev)
9676 msg = result.fail_msg
9677 if msg or not result.payload:
9679 msg = "disk not found"
9680 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9683 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9684 for idx, dev in enumerate(self.instance.disks):
9685 if idx not in self.disks:
9688 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9691 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9693 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9694 " replace disks for instance %s" %
9695 (node_name, self.instance.name))
9697 def _CreateNewStorage(self, node_name):
9698 """Create new storage on the primary or secondary node.
9700 This is only used for same-node replaces, not for changing the
9701 secondary node, hence we don't want to modify the existing disk.
9706 for idx, dev in enumerate(self.instance.disks):
9707 if idx not in self.disks:
9710 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9712 self.cfg.SetDiskID(dev, node_name)
9714 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9715 names = _GenerateUniqueNames(self.lu, lv_names)
9717 vg_data = dev.children[0].logical_id[0]
9718 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9719 logical_id=(vg_data, names[0]))
9720 vg_meta = dev.children[1].logical_id[0]
9721 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9722 logical_id=(vg_meta, names[1]))
9724 new_lvs = [lv_data, lv_meta]
9725 old_lvs = [child.Copy() for child in dev.children]
9726 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9728 # we pass force_create=True to force the LVM creation
9729 for new_lv in new_lvs:
9730 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9731 _GetInstanceInfoText(self.instance), False)
9735 def _CheckDevices(self, node_name, iv_names):
9736 for name, (dev, _, _) in iv_names.iteritems():
9737 self.cfg.SetDiskID(dev, node_name)
9739 result = self.rpc.call_blockdev_find(node_name, dev)
9741 msg = result.fail_msg
9742 if msg or not result.payload:
9744 msg = "disk not found"
9745 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9748 if result.payload.is_degraded:
9749 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9751 def _RemoveOldStorage(self, node_name, iv_names):
9752 for name, (_, old_lvs, _) in iv_names.iteritems():
9753 self.lu.LogInfo("Remove logical volumes for %s" % name)
9756 self.cfg.SetDiskID(lv, node_name)
9758 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9760 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9761 hint="remove unused LVs manually")
9763 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9764 """Replace a disk on the primary or secondary for DRBD 8.
9766 The algorithm for replace is quite complicated:
9768 1. for each disk to be replaced:
9770 1. create new LVs on the target node with unique names
9771 1. detach old LVs from the drbd device
9772 1. rename old LVs to name_replaced.<time_t>
9773 1. rename new LVs to old LVs
9774 1. attach the new LVs (with the old names now) to the drbd device
9776 1. wait for sync across all devices
9778 1. for each modified disk:
9780 1. remove old LVs (which have the name name_replaces.<time_t>)
9782 Failures are not very well handled.
9787 # Step: check device activation
9788 self.lu.LogStep(1, steps_total, "Check device existence")
9789 self._CheckDisksExistence([self.other_node, self.target_node])
9790 self._CheckVolumeGroup([self.target_node, self.other_node])
9792 # Step: check other node consistency
9793 self.lu.LogStep(2, steps_total, "Check peer consistency")
9794 self._CheckDisksConsistency(self.other_node,
9795 self.other_node == self.instance.primary_node,
9798 # Step: create new storage
9799 self.lu.LogStep(3, steps_total, "Allocate new storage")
9800 iv_names = self._CreateNewStorage(self.target_node)
9802 # Step: for each lv, detach+rename*2+attach
9803 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9804 for dev, old_lvs, new_lvs in iv_names.itervalues():
9805 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9807 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9809 result.Raise("Can't detach drbd from local storage on node"
9810 " %s for device %s" % (self.target_node, dev.iv_name))
9812 #cfg.Update(instance)
9814 # ok, we created the new LVs, so now we know we have the needed
9815 # storage; as such, we proceed on the target node to rename
9816 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9817 # using the assumption that logical_id == physical_id (which in
9818 # turn is the unique_id on that node)
9820 # FIXME(iustin): use a better name for the replaced LVs
9821 temp_suffix = int(time.time())
9822 ren_fn = lambda d, suff: (d.physical_id[0],
9823 d.physical_id[1] + "_replaced-%s" % suff)
9825 # Build the rename list based on what LVs exist on the node
9826 rename_old_to_new = []
9827 for to_ren in old_lvs:
9828 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9829 if not result.fail_msg and result.payload:
9831 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9833 self.lu.LogInfo("Renaming the old LVs on the target node")
9834 result = self.rpc.call_blockdev_rename(self.target_node,
9836 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9838 # Now we rename the new LVs to the old LVs
9839 self.lu.LogInfo("Renaming the new LVs on the target node")
9840 rename_new_to_old = [(new, old.physical_id)
9841 for old, new in zip(old_lvs, new_lvs)]
9842 result = self.rpc.call_blockdev_rename(self.target_node,
9844 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9846 # Intermediate steps of in memory modifications
9847 for old, new in zip(old_lvs, new_lvs):
9848 new.logical_id = old.logical_id
9849 self.cfg.SetDiskID(new, self.target_node)
9851 # We need to modify old_lvs so that removal later removes the
9852 # right LVs, not the newly added ones; note that old_lvs is a
9854 for disk in old_lvs:
9855 disk.logical_id = ren_fn(disk, temp_suffix)
9856 self.cfg.SetDiskID(disk, self.target_node)
9858 # Now that the new lvs have the old name, we can add them to the device
9859 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9860 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9862 msg = result.fail_msg
9864 for new_lv in new_lvs:
9865 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9868 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9869 hint=("cleanup manually the unused logical"
9871 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9874 if self.early_release:
9875 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9877 self._RemoveOldStorage(self.target_node, iv_names)
9878 # WARNING: we release both node locks here, do not do other RPCs
9879 # than WaitForSync to the primary node
9880 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9881 names=[self.target_node, self.other_node])
9884 # This can fail as the old devices are degraded and _WaitForSync
9885 # does a combined result over all disks, so we don't check its return value
9886 self.lu.LogStep(cstep, steps_total, "Sync devices")
9888 _WaitForSync(self.lu, self.instance)
9890 # Check all devices manually
9891 self._CheckDevices(self.instance.primary_node, iv_names)
9893 # Step: remove old storage
9894 if not self.early_release:
9895 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9897 self._RemoveOldStorage(self.target_node, iv_names)
9899 def _ExecDrbd8Secondary(self, feedback_fn):
9900 """Replace the secondary node for DRBD 8.
9902 The algorithm for replace is quite complicated:
9903 - for all disks of the instance:
9904 - create new LVs on the new node with same names
9905 - shutdown the drbd device on the old secondary
9906 - disconnect the drbd network on the primary
9907 - create the drbd device on the new secondary
9908 - network attach the drbd on the primary, using an artifice:
9909 the drbd code for Attach() will connect to the network if it
9910 finds a device which is connected to the good local disks but
9912 - wait for sync across all devices
9913 - remove all disks from the old secondary
9915 Failures are not very well handled.
9920 pnode = self.instance.primary_node
9922 # Step: check device activation
9923 self.lu.LogStep(1, steps_total, "Check device existence")
9924 self._CheckDisksExistence([self.instance.primary_node])
9925 self._CheckVolumeGroup([self.instance.primary_node])
9927 # Step: check other node consistency
9928 self.lu.LogStep(2, steps_total, "Check peer consistency")
9929 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9931 # Step: create new storage
9932 self.lu.LogStep(3, steps_total, "Allocate new storage")
9933 for idx, dev in enumerate(self.instance.disks):
9934 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9935 (self.new_node, idx))
9936 # we pass force_create=True to force LVM creation
9937 for new_lv in dev.children:
9938 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9939 _GetInstanceInfoText(self.instance), False)
9941 # Step 4: dbrd minors and drbd setups changes
9942 # after this, we must manually remove the drbd minors on both the
9943 # error and the success paths
9944 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9945 minors = self.cfg.AllocateDRBDMinor([self.new_node
9946 for dev in self.instance.disks],
9948 logging.debug("Allocated minors %r", minors)
9951 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9952 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9953 (self.new_node, idx))
9954 # create new devices on new_node; note that we create two IDs:
9955 # one without port, so the drbd will be activated without
9956 # networking information on the new node at this stage, and one
9957 # with network, for the latter activation in step 4
9958 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9959 if self.instance.primary_node == o_node1:
9962 assert self.instance.primary_node == o_node2, "Three-node instance?"
9965 new_alone_id = (self.instance.primary_node, self.new_node, None,
9966 p_minor, new_minor, o_secret)
9967 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9968 p_minor, new_minor, o_secret)
9970 iv_names[idx] = (dev, dev.children, new_net_id)
9971 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9973 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9974 logical_id=new_alone_id,
9975 children=dev.children,
9978 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9979 _GetInstanceInfoText(self.instance), False)
9980 except errors.GenericError:
9981 self.cfg.ReleaseDRBDMinors(self.instance.name)
9984 # We have new devices, shutdown the drbd on the old secondary
9985 for idx, dev in enumerate(self.instance.disks):
9986 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9987 self.cfg.SetDiskID(dev, self.target_node)
9988 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9990 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9991 "node: %s" % (idx, msg),
9992 hint=("Please cleanup this device manually as"
9993 " soon as possible"))
9995 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9996 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9997 self.instance.disks)[pnode]
9999 msg = result.fail_msg
10001 # detaches didn't succeed (unlikely)
10002 self.cfg.ReleaseDRBDMinors(self.instance.name)
10003 raise errors.OpExecError("Can't detach the disks from the network on"
10004 " old node: %s" % (msg,))
10006 # if we managed to detach at least one, we update all the disks of
10007 # the instance to point to the new secondary
10008 self.lu.LogInfo("Updating instance configuration")
10009 for dev, _, new_logical_id in iv_names.itervalues():
10010 dev.logical_id = new_logical_id
10011 self.cfg.SetDiskID(dev, self.instance.primary_node)
10013 self.cfg.Update(self.instance, feedback_fn)
10015 # and now perform the drbd attach
10016 self.lu.LogInfo("Attaching primary drbds to new secondary"
10017 " (standalone => connected)")
10018 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10020 self.node_secondary_ip,
10021 self.instance.disks,
10022 self.instance.name,
10024 for to_node, to_result in result.items():
10025 msg = to_result.fail_msg
10027 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10029 hint=("please do a gnt-instance info to see the"
10030 " status of disks"))
10032 if self.early_release:
10033 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10035 self._RemoveOldStorage(self.target_node, iv_names)
10036 # WARNING: we release all node locks here, do not do other RPCs
10037 # than WaitForSync to the primary node
10038 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10039 names=[self.instance.primary_node,
10044 # This can fail as the old devices are degraded and _WaitForSync
10045 # does a combined result over all disks, so we don't check its return value
10046 self.lu.LogStep(cstep, steps_total, "Sync devices")
10048 _WaitForSync(self.lu, self.instance)
10050 # Check all devices manually
10051 self._CheckDevices(self.instance.primary_node, iv_names)
10053 # Step: remove old storage
10054 if not self.early_release:
10055 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10056 self._RemoveOldStorage(self.target_node, iv_names)
10059 class LURepairNodeStorage(NoHooksLU):
10060 """Repairs the volume group on a node.
10065 def CheckArguments(self):
10066 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10068 storage_type = self.op.storage_type
10070 if (constants.SO_FIX_CONSISTENCY not in
10071 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10072 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10073 " repaired" % storage_type,
10074 errors.ECODE_INVAL)
10076 def ExpandNames(self):
10077 self.needed_locks = {
10078 locking.LEVEL_NODE: [self.op.node_name],
10081 def _CheckFaultyDisks(self, instance, node_name):
10082 """Ensure faulty disks abort the opcode or at least warn."""
10084 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10086 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10087 " node '%s'" % (instance.name, node_name),
10088 errors.ECODE_STATE)
10089 except errors.OpPrereqError, err:
10090 if self.op.ignore_consistency:
10091 self.proc.LogWarning(str(err.args[0]))
10095 def CheckPrereq(self):
10096 """Check prerequisites.
10099 # Check whether any instance on this node has faulty disks
10100 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10101 if not inst.admin_up:
10103 check_nodes = set(inst.all_nodes)
10104 check_nodes.discard(self.op.node_name)
10105 for inst_node_name in check_nodes:
10106 self._CheckFaultyDisks(inst, inst_node_name)
10108 def Exec(self, feedback_fn):
10109 feedback_fn("Repairing storage unit '%s' on %s ..." %
10110 (self.op.name, self.op.node_name))
10112 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10113 result = self.rpc.call_storage_execute(self.op.node_name,
10114 self.op.storage_type, st_args,
10116 constants.SO_FIX_CONSISTENCY)
10117 result.Raise("Failed to repair storage unit '%s' on %s" %
10118 (self.op.name, self.op.node_name))
10121 class LUNodeEvacuate(NoHooksLU):
10122 """Evacuates instances off a list of nodes.
10127 def CheckArguments(self):
10128 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10130 def ExpandNames(self):
10131 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10133 if self.op.remote_node is not None:
10134 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10135 assert self.op.remote_node
10137 if self.op.remote_node == self.op.node_name:
10138 raise errors.OpPrereqError("Can not use evacuated node as a new"
10139 " secondary node", errors.ECODE_INVAL)
10141 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10142 raise errors.OpPrereqError("Without the use of an iallocator only"
10143 " secondary instances can be evacuated",
10144 errors.ECODE_INVAL)
10147 self.share_locks = _ShareAll()
10148 self.needed_locks = {
10149 locking.LEVEL_INSTANCE: [],
10150 locking.LEVEL_NODEGROUP: [],
10151 locking.LEVEL_NODE: [],
10154 if self.op.remote_node is None:
10155 # Iallocator will choose any node(s) in the same group
10156 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10158 group_nodes = frozenset([self.op.remote_node])
10160 # Determine nodes to be locked
10161 self.lock_nodes = set([self.op.node_name]) | group_nodes
10163 def _DetermineInstances(self):
10164 """Builds list of instances to operate on.
10167 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10169 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10170 # Primary instances only
10171 inst_fn = _GetNodePrimaryInstances
10172 assert self.op.remote_node is None, \
10173 "Evacuating primary instances requires iallocator"
10174 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10175 # Secondary instances only
10176 inst_fn = _GetNodeSecondaryInstances
10179 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10180 inst_fn = _GetNodeInstances
10182 return inst_fn(self.cfg, self.op.node_name)
10184 def DeclareLocks(self, level):
10185 if level == locking.LEVEL_INSTANCE:
10186 # Lock instances optimistically, needs verification once node and group
10187 # locks have been acquired
10188 self.needed_locks[locking.LEVEL_INSTANCE] = \
10189 set(i.name for i in self._DetermineInstances())
10191 elif level == locking.LEVEL_NODEGROUP:
10192 # Lock node groups optimistically, needs verification once nodes have
10194 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10195 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10197 elif level == locking.LEVEL_NODE:
10198 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10200 def CheckPrereq(self):
10202 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10203 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10204 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10206 assert owned_nodes == self.lock_nodes
10208 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10209 if owned_groups != wanted_groups:
10210 raise errors.OpExecError("Node groups changed since locks were acquired,"
10211 " current groups are '%s', used to be '%s'" %
10212 (utils.CommaJoin(wanted_groups),
10213 utils.CommaJoin(owned_groups)))
10215 # Determine affected instances
10216 self.instances = self._DetermineInstances()
10217 self.instance_names = [i.name for i in self.instances]
10219 if set(self.instance_names) != owned_instances:
10220 raise errors.OpExecError("Instances on node '%s' changed since locks"
10221 " were acquired, current instances are '%s',"
10222 " used to be '%s'" %
10223 (self.op.node_name,
10224 utils.CommaJoin(self.instance_names),
10225 utils.CommaJoin(owned_instances)))
10227 if self.instance_names:
10228 self.LogInfo("Evacuating instances from node '%s': %s",
10230 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10232 self.LogInfo("No instances to evacuate from node '%s'",
10235 if self.op.remote_node is not None:
10236 for i in self.instances:
10237 if i.primary_node == self.op.remote_node:
10238 raise errors.OpPrereqError("Node %s is the primary node of"
10239 " instance %s, cannot use it as"
10241 (self.op.remote_node, i.name),
10242 errors.ECODE_INVAL)
10244 def Exec(self, feedback_fn):
10245 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10247 if not self.instance_names:
10248 # No instances to evacuate
10251 elif self.op.iallocator is not None:
10252 # TODO: Implement relocation to other group
10253 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10254 evac_mode=self.op.mode,
10255 instances=list(self.instance_names))
10257 ial.Run(self.op.iallocator)
10259 if not ial.success:
10260 raise errors.OpPrereqError("Can't compute node evacuation using"
10261 " iallocator '%s': %s" %
10262 (self.op.iallocator, ial.info),
10263 errors.ECODE_NORES)
10265 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10267 elif self.op.remote_node is not None:
10268 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10270 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10271 remote_node=self.op.remote_node,
10273 mode=constants.REPLACE_DISK_CHG,
10274 early_release=self.op.early_release)]
10275 for instance_name in self.instance_names
10279 raise errors.ProgrammerError("No iallocator or remote node")
10281 return ResultWithJobs(jobs)
10284 def _SetOpEarlyRelease(early_release, op):
10285 """Sets C{early_release} flag on opcodes if available.
10289 op.early_release = early_release
10290 except AttributeError:
10291 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10296 def _NodeEvacDest(use_nodes, group, nodes):
10297 """Returns group or nodes depending on caller's choice.
10301 return utils.CommaJoin(nodes)
10306 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10307 """Unpacks the result of change-group and node-evacuate iallocator requests.
10309 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10310 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10312 @type lu: L{LogicalUnit}
10313 @param lu: Logical unit instance
10314 @type alloc_result: tuple/list
10315 @param alloc_result: Result from iallocator
10316 @type early_release: bool
10317 @param early_release: Whether to release locks early if possible
10318 @type use_nodes: bool
10319 @param use_nodes: Whether to display node names instead of groups
10322 (moved, failed, jobs) = alloc_result
10325 lu.LogWarning("Unable to evacuate instances %s",
10326 utils.CommaJoin("%s (%s)" % (name, reason)
10327 for (name, reason) in failed))
10330 lu.LogInfo("Instances to be moved: %s",
10331 utils.CommaJoin("%s (to %s)" %
10332 (name, _NodeEvacDest(use_nodes, group, nodes))
10333 for (name, group, nodes) in moved))
10335 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10336 map(opcodes.OpCode.LoadOpCode, ops))
10340 class LUInstanceGrowDisk(LogicalUnit):
10341 """Grow a disk of an instance.
10344 HPATH = "disk-grow"
10345 HTYPE = constants.HTYPE_INSTANCE
10348 def ExpandNames(self):
10349 self._ExpandAndLockInstance()
10350 self.needed_locks[locking.LEVEL_NODE] = []
10351 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10353 def DeclareLocks(self, level):
10354 if level == locking.LEVEL_NODE:
10355 self._LockInstancesNodes()
10357 def BuildHooksEnv(self):
10358 """Build hooks env.
10360 This runs on the master, the primary and all the secondaries.
10364 "DISK": self.op.disk,
10365 "AMOUNT": self.op.amount,
10367 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10370 def BuildHooksNodes(self):
10371 """Build hooks nodes.
10374 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10377 def CheckPrereq(self):
10378 """Check prerequisites.
10380 This checks that the instance is in the cluster.
10383 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10384 assert instance is not None, \
10385 "Cannot retrieve locked instance %s" % self.op.instance_name
10386 nodenames = list(instance.all_nodes)
10387 for node in nodenames:
10388 _CheckNodeOnline(self, node)
10390 self.instance = instance
10392 if instance.disk_template not in constants.DTS_GROWABLE:
10393 raise errors.OpPrereqError("Instance's disk layout does not support"
10394 " growing", errors.ECODE_INVAL)
10396 self.disk = instance.FindDisk(self.op.disk)
10398 if instance.disk_template not in (constants.DT_FILE,
10399 constants.DT_SHARED_FILE):
10400 # TODO: check the free disk space for file, when that feature will be
10402 _CheckNodesFreeDiskPerVG(self, nodenames,
10403 self.disk.ComputeGrowth(self.op.amount))
10405 def Exec(self, feedback_fn):
10406 """Execute disk grow.
10409 instance = self.instance
10412 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10414 raise errors.OpExecError("Cannot activate block device to grow")
10416 # First run all grow ops in dry-run mode
10417 for node in instance.all_nodes:
10418 self.cfg.SetDiskID(disk, node)
10419 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10420 result.Raise("Grow request failed to node %s" % node)
10422 # We know that (as far as we can test) operations across different
10423 # nodes will succeed, time to run it for real
10424 for node in instance.all_nodes:
10425 self.cfg.SetDiskID(disk, node)
10426 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10427 result.Raise("Grow request failed to node %s" % node)
10429 # TODO: Rewrite code to work properly
10430 # DRBD goes into sync mode for a short amount of time after executing the
10431 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10432 # calling "resize" in sync mode fails. Sleeping for a short amount of
10433 # time is a work-around.
10436 disk.RecordGrow(self.op.amount)
10437 self.cfg.Update(instance, feedback_fn)
10438 if self.op.wait_for_sync:
10439 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10441 self.proc.LogWarning("Disk sync-ing has not returned a good"
10442 " status; please check the instance")
10443 if not instance.admin_up:
10444 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10445 elif not instance.admin_up:
10446 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10447 " not supposed to be running because no wait for"
10448 " sync mode was requested")
10451 class LUInstanceQueryData(NoHooksLU):
10452 """Query runtime instance data.
10457 def ExpandNames(self):
10458 self.needed_locks = {}
10460 # Use locking if requested or when non-static information is wanted
10461 if not (self.op.static or self.op.use_locking):
10462 self.LogWarning("Non-static data requested, locks need to be acquired")
10463 self.op.use_locking = True
10465 if self.op.instances or not self.op.use_locking:
10466 # Expand instance names right here
10467 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10469 # Will use acquired locks
10470 self.wanted_names = None
10472 if self.op.use_locking:
10473 self.share_locks = _ShareAll()
10475 if self.wanted_names is None:
10476 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10478 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10480 self.needed_locks[locking.LEVEL_NODE] = []
10481 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10483 def DeclareLocks(self, level):
10484 if self.op.use_locking and level == locking.LEVEL_NODE:
10485 self._LockInstancesNodes()
10487 def CheckPrereq(self):
10488 """Check prerequisites.
10490 This only checks the optional instance list against the existing names.
10493 if self.wanted_names is None:
10494 assert self.op.use_locking, "Locking was not used"
10495 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10497 self.wanted_instances = \
10498 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10500 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10501 """Returns the status of a block device
10504 if self.op.static or not node:
10507 self.cfg.SetDiskID(dev, node)
10509 result = self.rpc.call_blockdev_find(node, dev)
10513 result.Raise("Can't compute disk status for %s" % instance_name)
10515 status = result.payload
10519 return (status.dev_path, status.major, status.minor,
10520 status.sync_percent, status.estimated_time,
10521 status.is_degraded, status.ldisk_status)
10523 def _ComputeDiskStatus(self, instance, snode, dev):
10524 """Compute block device status.
10527 if dev.dev_type in constants.LDS_DRBD:
10528 # we change the snode then (otherwise we use the one passed in)
10529 if dev.logical_id[0] == instance.primary_node:
10530 snode = dev.logical_id[1]
10532 snode = dev.logical_id[0]
10534 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10535 instance.name, dev)
10536 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10539 dev_children = map(compat.partial(self._ComputeDiskStatus,
10546 "iv_name": dev.iv_name,
10547 "dev_type": dev.dev_type,
10548 "logical_id": dev.logical_id,
10549 "physical_id": dev.physical_id,
10550 "pstatus": dev_pstatus,
10551 "sstatus": dev_sstatus,
10552 "children": dev_children,
10557 def Exec(self, feedback_fn):
10558 """Gather and return data"""
10561 cluster = self.cfg.GetClusterInfo()
10563 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10564 for i in self.wanted_instances)
10565 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10566 if self.op.static or pnode.offline:
10567 remote_state = None
10569 self.LogWarning("Primary node %s is marked offline, returning static"
10570 " information only for instance %s" %
10571 (pnode.name, instance.name))
10573 remote_info = self.rpc.call_instance_info(instance.primary_node,
10575 instance.hypervisor)
10576 remote_info.Raise("Error checking node %s" % instance.primary_node)
10577 remote_info = remote_info.payload
10578 if remote_info and "state" in remote_info:
10579 remote_state = "up"
10581 remote_state = "down"
10583 if instance.admin_up:
10584 config_state = "up"
10586 config_state = "down"
10588 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10591 result[instance.name] = {
10592 "name": instance.name,
10593 "config_state": config_state,
10594 "run_state": remote_state,
10595 "pnode": instance.primary_node,
10596 "snodes": instance.secondary_nodes,
10598 # this happens to be the same format used for hooks
10599 "nics": _NICListToTuple(self, instance.nics),
10600 "disk_template": instance.disk_template,
10602 "hypervisor": instance.hypervisor,
10603 "network_port": instance.network_port,
10604 "hv_instance": instance.hvparams,
10605 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10606 "be_instance": instance.beparams,
10607 "be_actual": cluster.FillBE(instance),
10608 "os_instance": instance.osparams,
10609 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10610 "serial_no": instance.serial_no,
10611 "mtime": instance.mtime,
10612 "ctime": instance.ctime,
10613 "uuid": instance.uuid,
10619 class LUInstanceSetParams(LogicalUnit):
10620 """Modifies an instances's parameters.
10623 HPATH = "instance-modify"
10624 HTYPE = constants.HTYPE_INSTANCE
10627 def CheckArguments(self):
10628 if not (self.op.nics or self.op.disks or self.op.disk_template or
10629 self.op.hvparams or self.op.beparams or self.op.os_name):
10630 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10632 if self.op.hvparams:
10633 _CheckGlobalHvParams(self.op.hvparams)
10637 for disk_op, disk_dict in self.op.disks:
10638 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10639 if disk_op == constants.DDM_REMOVE:
10640 disk_addremove += 1
10642 elif disk_op == constants.DDM_ADD:
10643 disk_addremove += 1
10645 if not isinstance(disk_op, int):
10646 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10647 if not isinstance(disk_dict, dict):
10648 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10649 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10651 if disk_op == constants.DDM_ADD:
10652 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10653 if mode not in constants.DISK_ACCESS_SET:
10654 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10655 errors.ECODE_INVAL)
10656 size = disk_dict.get(constants.IDISK_SIZE, None)
10658 raise errors.OpPrereqError("Required disk parameter size missing",
10659 errors.ECODE_INVAL)
10662 except (TypeError, ValueError), err:
10663 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10664 str(err), errors.ECODE_INVAL)
10665 disk_dict[constants.IDISK_SIZE] = size
10667 # modification of disk
10668 if constants.IDISK_SIZE in disk_dict:
10669 raise errors.OpPrereqError("Disk size change not possible, use"
10670 " grow-disk", errors.ECODE_INVAL)
10672 if disk_addremove > 1:
10673 raise errors.OpPrereqError("Only one disk add or remove operation"
10674 " supported at a time", errors.ECODE_INVAL)
10676 if self.op.disks and self.op.disk_template is not None:
10677 raise errors.OpPrereqError("Disk template conversion and other disk"
10678 " changes not supported at the same time",
10679 errors.ECODE_INVAL)
10681 if (self.op.disk_template and
10682 self.op.disk_template in constants.DTS_INT_MIRROR and
10683 self.op.remote_node is None):
10684 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10685 " one requires specifying a secondary node",
10686 errors.ECODE_INVAL)
10690 for nic_op, nic_dict in self.op.nics:
10691 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10692 if nic_op == constants.DDM_REMOVE:
10695 elif nic_op == constants.DDM_ADD:
10698 if not isinstance(nic_op, int):
10699 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10700 if not isinstance(nic_dict, dict):
10701 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10702 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10704 # nic_dict should be a dict
10705 nic_ip = nic_dict.get(constants.INIC_IP, None)
10706 if nic_ip is not None:
10707 if nic_ip.lower() == constants.VALUE_NONE:
10708 nic_dict[constants.INIC_IP] = None
10710 if not netutils.IPAddress.IsValid(nic_ip):
10711 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10712 errors.ECODE_INVAL)
10714 nic_bridge = nic_dict.get("bridge", None)
10715 nic_link = nic_dict.get(constants.INIC_LINK, None)
10716 if nic_bridge and nic_link:
10717 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10718 " at the same time", errors.ECODE_INVAL)
10719 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10720 nic_dict["bridge"] = None
10721 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10722 nic_dict[constants.INIC_LINK] = None
10724 if nic_op == constants.DDM_ADD:
10725 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10726 if nic_mac is None:
10727 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10729 if constants.INIC_MAC in nic_dict:
10730 nic_mac = nic_dict[constants.INIC_MAC]
10731 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10732 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10734 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10735 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10736 " modifying an existing nic",
10737 errors.ECODE_INVAL)
10739 if nic_addremove > 1:
10740 raise errors.OpPrereqError("Only one NIC add or remove operation"
10741 " supported at a time", errors.ECODE_INVAL)
10743 def ExpandNames(self):
10744 self._ExpandAndLockInstance()
10745 self.needed_locks[locking.LEVEL_NODE] = []
10746 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10748 def DeclareLocks(self, level):
10749 if level == locking.LEVEL_NODE:
10750 self._LockInstancesNodes()
10751 if self.op.disk_template and self.op.remote_node:
10752 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10753 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10755 def BuildHooksEnv(self):
10756 """Build hooks env.
10758 This runs on the master, primary and secondaries.
10762 if constants.BE_MEMORY in self.be_new:
10763 args["memory"] = self.be_new[constants.BE_MEMORY]
10764 if constants.BE_VCPUS in self.be_new:
10765 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10766 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10767 # information at all.
10770 nic_override = dict(self.op.nics)
10771 for idx, nic in enumerate(self.instance.nics):
10772 if idx in nic_override:
10773 this_nic_override = nic_override[idx]
10775 this_nic_override = {}
10776 if constants.INIC_IP in this_nic_override:
10777 ip = this_nic_override[constants.INIC_IP]
10780 if constants.INIC_MAC in this_nic_override:
10781 mac = this_nic_override[constants.INIC_MAC]
10784 if idx in self.nic_pnew:
10785 nicparams = self.nic_pnew[idx]
10787 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10788 mode = nicparams[constants.NIC_MODE]
10789 link = nicparams[constants.NIC_LINK]
10790 args["nics"].append((ip, mac, mode, link))
10791 if constants.DDM_ADD in nic_override:
10792 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10793 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10794 nicparams = self.nic_pnew[constants.DDM_ADD]
10795 mode = nicparams[constants.NIC_MODE]
10796 link = nicparams[constants.NIC_LINK]
10797 args["nics"].append((ip, mac, mode, link))
10798 elif constants.DDM_REMOVE in nic_override:
10799 del args["nics"][-1]
10801 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10802 if self.op.disk_template:
10803 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10807 def BuildHooksNodes(self):
10808 """Build hooks nodes.
10811 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10814 def CheckPrereq(self):
10815 """Check prerequisites.
10817 This only checks the instance list against the existing names.
10820 # checking the new params on the primary/secondary nodes
10822 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10823 cluster = self.cluster = self.cfg.GetClusterInfo()
10824 assert self.instance is not None, \
10825 "Cannot retrieve locked instance %s" % self.op.instance_name
10826 pnode = instance.primary_node
10827 nodelist = list(instance.all_nodes)
10830 if self.op.os_name and not self.op.force:
10831 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10832 self.op.force_variant)
10833 instance_os = self.op.os_name
10835 instance_os = instance.os
10837 if self.op.disk_template:
10838 if instance.disk_template == self.op.disk_template:
10839 raise errors.OpPrereqError("Instance already has disk template %s" %
10840 instance.disk_template, errors.ECODE_INVAL)
10842 if (instance.disk_template,
10843 self.op.disk_template) not in self._DISK_CONVERSIONS:
10844 raise errors.OpPrereqError("Unsupported disk template conversion from"
10845 " %s to %s" % (instance.disk_template,
10846 self.op.disk_template),
10847 errors.ECODE_INVAL)
10848 _CheckInstanceDown(self, instance, "cannot change disk template")
10849 if self.op.disk_template in constants.DTS_INT_MIRROR:
10850 if self.op.remote_node == pnode:
10851 raise errors.OpPrereqError("Given new secondary node %s is the same"
10852 " as the primary node of the instance" %
10853 self.op.remote_node, errors.ECODE_STATE)
10854 _CheckNodeOnline(self, self.op.remote_node)
10855 _CheckNodeNotDrained(self, self.op.remote_node)
10856 # FIXME: here we assume that the old instance type is DT_PLAIN
10857 assert instance.disk_template == constants.DT_PLAIN
10858 disks = [{constants.IDISK_SIZE: d.size,
10859 constants.IDISK_VG: d.logical_id[0]}
10860 for d in instance.disks]
10861 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10862 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10864 # hvparams processing
10865 if self.op.hvparams:
10866 hv_type = instance.hypervisor
10867 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10868 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10869 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10872 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10873 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10874 self.hv_new = hv_new # the new actual values
10875 self.hv_inst = i_hvdict # the new dict (without defaults)
10877 self.hv_new = self.hv_inst = {}
10879 # beparams processing
10880 if self.op.beparams:
10881 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10883 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10884 be_new = cluster.SimpleFillBE(i_bedict)
10885 self.be_new = be_new # the new actual values
10886 self.be_inst = i_bedict # the new dict (without defaults)
10888 self.be_new = self.be_inst = {}
10889 be_old = cluster.FillBE(instance)
10891 # osparams processing
10892 if self.op.osparams:
10893 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10894 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10895 self.os_inst = i_osdict # the new dict (without defaults)
10901 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10902 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10903 mem_check_list = [pnode]
10904 if be_new[constants.BE_AUTO_BALANCE]:
10905 # either we changed auto_balance to yes or it was from before
10906 mem_check_list.extend(instance.secondary_nodes)
10907 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10908 instance.hypervisor)
10909 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10910 instance.hypervisor)
10911 pninfo = nodeinfo[pnode]
10912 msg = pninfo.fail_msg
10914 # Assume the primary node is unreachable and go ahead
10915 self.warn.append("Can't get info from primary node %s: %s" %
10917 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10918 self.warn.append("Node data from primary node %s doesn't contain"
10919 " free memory information" % pnode)
10920 elif instance_info.fail_msg:
10921 self.warn.append("Can't get instance runtime information: %s" %
10922 instance_info.fail_msg)
10924 if instance_info.payload:
10925 current_mem = int(instance_info.payload["memory"])
10927 # Assume instance not running
10928 # (there is a slight race condition here, but it's not very probable,
10929 # and we have no other way to check)
10931 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10932 pninfo.payload["memory_free"])
10934 raise errors.OpPrereqError("This change will prevent the instance"
10935 " from starting, due to %d MB of memory"
10936 " missing on its primary node" % miss_mem,
10937 errors.ECODE_NORES)
10939 if be_new[constants.BE_AUTO_BALANCE]:
10940 for node, nres in nodeinfo.items():
10941 if node not in instance.secondary_nodes:
10943 nres.Raise("Can't get info from secondary node %s" % node,
10944 prereq=True, ecode=errors.ECODE_STATE)
10945 if not isinstance(nres.payload.get("memory_free", None), int):
10946 raise errors.OpPrereqError("Secondary node %s didn't return free"
10947 " memory information" % node,
10948 errors.ECODE_STATE)
10949 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10950 raise errors.OpPrereqError("This change will prevent the instance"
10951 " from failover to its secondary node"
10952 " %s, due to not enough memory" % node,
10953 errors.ECODE_STATE)
10957 self.nic_pinst = {}
10958 for nic_op, nic_dict in self.op.nics:
10959 if nic_op == constants.DDM_REMOVE:
10960 if not instance.nics:
10961 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10962 errors.ECODE_INVAL)
10964 if nic_op != constants.DDM_ADD:
10966 if not instance.nics:
10967 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10968 " no NICs" % nic_op,
10969 errors.ECODE_INVAL)
10970 if nic_op < 0 or nic_op >= len(instance.nics):
10971 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10973 (nic_op, len(instance.nics) - 1),
10974 errors.ECODE_INVAL)
10975 old_nic_params = instance.nics[nic_op].nicparams
10976 old_nic_ip = instance.nics[nic_op].ip
10978 old_nic_params = {}
10981 update_params_dict = dict([(key, nic_dict[key])
10982 for key in constants.NICS_PARAMETERS
10983 if key in nic_dict])
10985 if "bridge" in nic_dict:
10986 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10988 new_nic_params = _GetUpdatedParams(old_nic_params,
10989 update_params_dict)
10990 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10991 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10992 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10993 self.nic_pinst[nic_op] = new_nic_params
10994 self.nic_pnew[nic_op] = new_filled_nic_params
10995 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10997 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10998 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10999 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11001 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11003 self.warn.append(msg)
11005 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11006 if new_nic_mode == constants.NIC_MODE_ROUTED:
11007 if constants.INIC_IP in nic_dict:
11008 nic_ip = nic_dict[constants.INIC_IP]
11010 nic_ip = old_nic_ip
11012 raise errors.OpPrereqError("Cannot set the nic ip to None"
11013 " on a routed nic", errors.ECODE_INVAL)
11014 if constants.INIC_MAC in nic_dict:
11015 nic_mac = nic_dict[constants.INIC_MAC]
11016 if nic_mac is None:
11017 raise errors.OpPrereqError("Cannot set the nic mac to None",
11018 errors.ECODE_INVAL)
11019 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11020 # otherwise generate the mac
11021 nic_dict[constants.INIC_MAC] = \
11022 self.cfg.GenerateMAC(self.proc.GetECId())
11024 # or validate/reserve the current one
11026 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11027 except errors.ReservationError:
11028 raise errors.OpPrereqError("MAC address %s already in use"
11029 " in cluster" % nic_mac,
11030 errors.ECODE_NOTUNIQUE)
11033 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11034 raise errors.OpPrereqError("Disk operations not supported for"
11035 " diskless instances",
11036 errors.ECODE_INVAL)
11037 for disk_op, _ in self.op.disks:
11038 if disk_op == constants.DDM_REMOVE:
11039 if len(instance.disks) == 1:
11040 raise errors.OpPrereqError("Cannot remove the last disk of"
11041 " an instance", errors.ECODE_INVAL)
11042 _CheckInstanceDown(self, instance, "cannot remove disks")
11044 if (disk_op == constants.DDM_ADD and
11045 len(instance.disks) >= constants.MAX_DISKS):
11046 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11047 " add more" % constants.MAX_DISKS,
11048 errors.ECODE_STATE)
11049 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11051 if disk_op < 0 or disk_op >= len(instance.disks):
11052 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11054 (disk_op, len(instance.disks)),
11055 errors.ECODE_INVAL)
11059 def _ConvertPlainToDrbd(self, feedback_fn):
11060 """Converts an instance from plain to drbd.
11063 feedback_fn("Converting template to drbd")
11064 instance = self.instance
11065 pnode = instance.primary_node
11066 snode = self.op.remote_node
11068 # create a fake disk info for _GenerateDiskTemplate
11069 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11070 constants.IDISK_VG: d.logical_id[0]}
11071 for d in instance.disks]
11072 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11073 instance.name, pnode, [snode],
11074 disk_info, None, None, 0, feedback_fn)
11075 info = _GetInstanceInfoText(instance)
11076 feedback_fn("Creating aditional volumes...")
11077 # first, create the missing data and meta devices
11078 for disk in new_disks:
11079 # unfortunately this is... not too nice
11080 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11082 for child in disk.children:
11083 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11084 # at this stage, all new LVs have been created, we can rename the
11086 feedback_fn("Renaming original volumes...")
11087 rename_list = [(o, n.children[0].logical_id)
11088 for (o, n) in zip(instance.disks, new_disks)]
11089 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11090 result.Raise("Failed to rename original LVs")
11092 feedback_fn("Initializing DRBD devices...")
11093 # all child devices are in place, we can now create the DRBD devices
11094 for disk in new_disks:
11095 for node in [pnode, snode]:
11096 f_create = node == pnode
11097 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11099 # at this point, the instance has been modified
11100 instance.disk_template = constants.DT_DRBD8
11101 instance.disks = new_disks
11102 self.cfg.Update(instance, feedback_fn)
11104 # disks are created, waiting for sync
11105 disk_abort = not _WaitForSync(self, instance,
11106 oneshot=not self.op.wait_for_sync)
11108 raise errors.OpExecError("There are some degraded disks for"
11109 " this instance, please cleanup manually")
11111 def _ConvertDrbdToPlain(self, feedback_fn):
11112 """Converts an instance from drbd to plain.
11115 instance = self.instance
11116 assert len(instance.secondary_nodes) == 1
11117 pnode = instance.primary_node
11118 snode = instance.secondary_nodes[0]
11119 feedback_fn("Converting template to plain")
11121 old_disks = instance.disks
11122 new_disks = [d.children[0] for d in old_disks]
11124 # copy over size and mode
11125 for parent, child in zip(old_disks, new_disks):
11126 child.size = parent.size
11127 child.mode = parent.mode
11129 # update instance structure
11130 instance.disks = new_disks
11131 instance.disk_template = constants.DT_PLAIN
11132 self.cfg.Update(instance, feedback_fn)
11134 feedback_fn("Removing volumes on the secondary node...")
11135 for disk in old_disks:
11136 self.cfg.SetDiskID(disk, snode)
11137 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11139 self.LogWarning("Could not remove block device %s on node %s,"
11140 " continuing anyway: %s", disk.iv_name, snode, msg)
11142 feedback_fn("Removing unneeded volumes on the primary node...")
11143 for idx, disk in enumerate(old_disks):
11144 meta = disk.children[1]
11145 self.cfg.SetDiskID(meta, pnode)
11146 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11148 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11149 " continuing anyway: %s", idx, pnode, msg)
11151 def Exec(self, feedback_fn):
11152 """Modifies an instance.
11154 All parameters take effect only at the next restart of the instance.
11157 # Process here the warnings from CheckPrereq, as we don't have a
11158 # feedback_fn there.
11159 for warn in self.warn:
11160 feedback_fn("WARNING: %s" % warn)
11163 instance = self.instance
11165 for disk_op, disk_dict in self.op.disks:
11166 if disk_op == constants.DDM_REMOVE:
11167 # remove the last disk
11168 device = instance.disks.pop()
11169 device_idx = len(instance.disks)
11170 for node, disk in device.ComputeNodeTree(instance.primary_node):
11171 self.cfg.SetDiskID(disk, node)
11172 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11174 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11175 " continuing anyway", device_idx, node, msg)
11176 result.append(("disk/%d" % device_idx, "remove"))
11177 elif disk_op == constants.DDM_ADD:
11179 if instance.disk_template in (constants.DT_FILE,
11180 constants.DT_SHARED_FILE):
11181 file_driver, file_path = instance.disks[0].logical_id
11182 file_path = os.path.dirname(file_path)
11184 file_driver = file_path = None
11185 disk_idx_base = len(instance.disks)
11186 new_disk = _GenerateDiskTemplate(self,
11187 instance.disk_template,
11188 instance.name, instance.primary_node,
11189 instance.secondary_nodes,
11193 disk_idx_base, feedback_fn)[0]
11194 instance.disks.append(new_disk)
11195 info = _GetInstanceInfoText(instance)
11197 logging.info("Creating volume %s for instance %s",
11198 new_disk.iv_name, instance.name)
11199 # Note: this needs to be kept in sync with _CreateDisks
11201 for node in instance.all_nodes:
11202 f_create = node == instance.primary_node
11204 _CreateBlockDev(self, node, instance, new_disk,
11205 f_create, info, f_create)
11206 except errors.OpExecError, err:
11207 self.LogWarning("Failed to create volume %s (%s) on"
11209 new_disk.iv_name, new_disk, node, err)
11210 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11211 (new_disk.size, new_disk.mode)))
11213 # change a given disk
11214 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11215 result.append(("disk.mode/%d" % disk_op,
11216 disk_dict[constants.IDISK_MODE]))
11218 if self.op.disk_template:
11219 r_shut = _ShutdownInstanceDisks(self, instance)
11221 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11222 " proceed with disk template conversion")
11223 mode = (instance.disk_template, self.op.disk_template)
11225 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11227 self.cfg.ReleaseDRBDMinors(instance.name)
11229 result.append(("disk_template", self.op.disk_template))
11232 for nic_op, nic_dict in self.op.nics:
11233 if nic_op == constants.DDM_REMOVE:
11234 # remove the last nic
11235 del instance.nics[-1]
11236 result.append(("nic.%d" % len(instance.nics), "remove"))
11237 elif nic_op == constants.DDM_ADD:
11238 # mac and bridge should be set, by now
11239 mac = nic_dict[constants.INIC_MAC]
11240 ip = nic_dict.get(constants.INIC_IP, None)
11241 nicparams = self.nic_pinst[constants.DDM_ADD]
11242 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11243 instance.nics.append(new_nic)
11244 result.append(("nic.%d" % (len(instance.nics) - 1),
11245 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11246 (new_nic.mac, new_nic.ip,
11247 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11248 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11251 for key in (constants.INIC_MAC, constants.INIC_IP):
11252 if key in nic_dict:
11253 setattr(instance.nics[nic_op], key, nic_dict[key])
11254 if nic_op in self.nic_pinst:
11255 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11256 for key, val in nic_dict.iteritems():
11257 result.append(("nic.%s/%d" % (key, nic_op), val))
11260 if self.op.hvparams:
11261 instance.hvparams = self.hv_inst
11262 for key, val in self.op.hvparams.iteritems():
11263 result.append(("hv/%s" % key, val))
11266 if self.op.beparams:
11267 instance.beparams = self.be_inst
11268 for key, val in self.op.beparams.iteritems():
11269 result.append(("be/%s" % key, val))
11272 if self.op.os_name:
11273 instance.os = self.op.os_name
11276 if self.op.osparams:
11277 instance.osparams = self.os_inst
11278 for key, val in self.op.osparams.iteritems():
11279 result.append(("os/%s" % key, val))
11281 self.cfg.Update(instance, feedback_fn)
11285 _DISK_CONVERSIONS = {
11286 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11287 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11291 class LUInstanceChangeGroup(LogicalUnit):
11292 HPATH = "instance-change-group"
11293 HTYPE = constants.HTYPE_INSTANCE
11296 def ExpandNames(self):
11297 self.share_locks = _ShareAll()
11298 self.needed_locks = {
11299 locking.LEVEL_NODEGROUP: [],
11300 locking.LEVEL_NODE: [],
11303 self._ExpandAndLockInstance()
11305 if self.op.target_groups:
11306 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11307 self.op.target_groups)
11309 self.req_target_uuids = None
11311 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11313 def DeclareLocks(self, level):
11314 if level == locking.LEVEL_NODEGROUP:
11315 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11317 if self.req_target_uuids:
11318 lock_groups = set(self.req_target_uuids)
11320 # Lock all groups used by instance optimistically; this requires going
11321 # via the node before it's locked, requiring verification later on
11322 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11323 lock_groups.update(instance_groups)
11325 # No target groups, need to lock all of them
11326 lock_groups = locking.ALL_SET
11328 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11330 elif level == locking.LEVEL_NODE:
11331 if self.req_target_uuids:
11332 # Lock all nodes used by instances
11333 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11334 self._LockInstancesNodes()
11336 # Lock all nodes in all potential target groups
11337 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11338 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11339 member_nodes = [node_name
11340 for group in lock_groups
11341 for node_name in self.cfg.GetNodeGroup(group).members]
11342 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11344 # Lock all nodes as all groups are potential targets
11345 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11347 def CheckPrereq(self):
11348 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11349 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11350 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11352 assert (self.req_target_uuids is None or
11353 owned_groups.issuperset(self.req_target_uuids))
11354 assert owned_instances == set([self.op.instance_name])
11356 # Get instance information
11357 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11359 # Check if node groups for locked instance are still correct
11360 assert owned_nodes.issuperset(self.instance.all_nodes), \
11361 ("Instance %s's nodes changed while we kept the lock" %
11362 self.op.instance_name)
11364 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11367 if self.req_target_uuids:
11368 # User requested specific target groups
11369 self.target_uuids = self.req_target_uuids
11371 # All groups except those used by the instance are potential targets
11372 self.target_uuids = owned_groups - inst_groups
11374 conflicting_groups = self.target_uuids & inst_groups
11375 if conflicting_groups:
11376 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11377 " used by the instance '%s'" %
11378 (utils.CommaJoin(conflicting_groups),
11379 self.op.instance_name),
11380 errors.ECODE_INVAL)
11382 if not self.target_uuids:
11383 raise errors.OpPrereqError("There are no possible target groups",
11384 errors.ECODE_INVAL)
11386 def BuildHooksEnv(self):
11387 """Build hooks env.
11390 assert self.target_uuids
11393 "TARGET_GROUPS": " ".join(self.target_uuids),
11396 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11400 def BuildHooksNodes(self):
11401 """Build hooks nodes.
11404 mn = self.cfg.GetMasterNode()
11405 return ([mn], [mn])
11407 def Exec(self, feedback_fn):
11408 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11410 assert instances == [self.op.instance_name], "Instance not locked"
11412 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11413 instances=instances, target_groups=list(self.target_uuids))
11415 ial.Run(self.op.iallocator)
11417 if not ial.success:
11418 raise errors.OpPrereqError("Can't compute solution for changing group of"
11419 " instance '%s' using iallocator '%s': %s" %
11420 (self.op.instance_name, self.op.iallocator,
11422 errors.ECODE_NORES)
11424 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11426 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11427 " instance '%s'", len(jobs), self.op.instance_name)
11429 return ResultWithJobs(jobs)
11432 class LUBackupQuery(NoHooksLU):
11433 """Query the exports list
11438 def ExpandNames(self):
11439 self.needed_locks = {}
11440 self.share_locks[locking.LEVEL_NODE] = 1
11441 if not self.op.nodes:
11442 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11444 self.needed_locks[locking.LEVEL_NODE] = \
11445 _GetWantedNodes(self, self.op.nodes)
11447 def Exec(self, feedback_fn):
11448 """Compute the list of all the exported system images.
11451 @return: a dictionary with the structure node->(export-list)
11452 where export-list is a list of the instances exported on
11456 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11457 rpcresult = self.rpc.call_export_list(self.nodes)
11459 for node in rpcresult:
11460 if rpcresult[node].fail_msg:
11461 result[node] = False
11463 result[node] = rpcresult[node].payload
11468 class LUBackupPrepare(NoHooksLU):
11469 """Prepares an instance for an export and returns useful information.
11474 def ExpandNames(self):
11475 self._ExpandAndLockInstance()
11477 def CheckPrereq(self):
11478 """Check prerequisites.
11481 instance_name = self.op.instance_name
11483 self.instance = self.cfg.GetInstanceInfo(instance_name)
11484 assert self.instance is not None, \
11485 "Cannot retrieve locked instance %s" % self.op.instance_name
11486 _CheckNodeOnline(self, self.instance.primary_node)
11488 self._cds = _GetClusterDomainSecret()
11490 def Exec(self, feedback_fn):
11491 """Prepares an instance for an export.
11494 instance = self.instance
11496 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11497 salt = utils.GenerateSecret(8)
11499 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11500 result = self.rpc.call_x509_cert_create(instance.primary_node,
11501 constants.RIE_CERT_VALIDITY)
11502 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11504 (name, cert_pem) = result.payload
11506 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11510 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11511 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11513 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11519 class LUBackupExport(LogicalUnit):
11520 """Export an instance to an image in the cluster.
11523 HPATH = "instance-export"
11524 HTYPE = constants.HTYPE_INSTANCE
11527 def CheckArguments(self):
11528 """Check the arguments.
11531 self.x509_key_name = self.op.x509_key_name
11532 self.dest_x509_ca_pem = self.op.destination_x509_ca
11534 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11535 if not self.x509_key_name:
11536 raise errors.OpPrereqError("Missing X509 key name for encryption",
11537 errors.ECODE_INVAL)
11539 if not self.dest_x509_ca_pem:
11540 raise errors.OpPrereqError("Missing destination X509 CA",
11541 errors.ECODE_INVAL)
11543 def ExpandNames(self):
11544 self._ExpandAndLockInstance()
11546 # Lock all nodes for local exports
11547 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11548 # FIXME: lock only instance primary and destination node
11550 # Sad but true, for now we have do lock all nodes, as we don't know where
11551 # the previous export might be, and in this LU we search for it and
11552 # remove it from its current node. In the future we could fix this by:
11553 # - making a tasklet to search (share-lock all), then create the
11554 # new one, then one to remove, after
11555 # - removing the removal operation altogether
11556 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11558 def DeclareLocks(self, level):
11559 """Last minute lock declaration."""
11560 # All nodes are locked anyway, so nothing to do here.
11562 def BuildHooksEnv(self):
11563 """Build hooks env.
11565 This will run on the master, primary node and target node.
11569 "EXPORT_MODE": self.op.mode,
11570 "EXPORT_NODE": self.op.target_node,
11571 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11572 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11573 # TODO: Generic function for boolean env variables
11574 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11577 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11581 def BuildHooksNodes(self):
11582 """Build hooks nodes.
11585 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11587 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11588 nl.append(self.op.target_node)
11592 def CheckPrereq(self):
11593 """Check prerequisites.
11595 This checks that the instance and node names are valid.
11598 instance_name = self.op.instance_name
11600 self.instance = self.cfg.GetInstanceInfo(instance_name)
11601 assert self.instance is not None, \
11602 "Cannot retrieve locked instance %s" % self.op.instance_name
11603 _CheckNodeOnline(self, self.instance.primary_node)
11605 if (self.op.remove_instance and self.instance.admin_up and
11606 not self.op.shutdown):
11607 raise errors.OpPrereqError("Can not remove instance without shutting it"
11610 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11611 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11612 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11613 assert self.dst_node is not None
11615 _CheckNodeOnline(self, self.dst_node.name)
11616 _CheckNodeNotDrained(self, self.dst_node.name)
11619 self.dest_disk_info = None
11620 self.dest_x509_ca = None
11622 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11623 self.dst_node = None
11625 if len(self.op.target_node) != len(self.instance.disks):
11626 raise errors.OpPrereqError(("Received destination information for %s"
11627 " disks, but instance %s has %s disks") %
11628 (len(self.op.target_node), instance_name,
11629 len(self.instance.disks)),
11630 errors.ECODE_INVAL)
11632 cds = _GetClusterDomainSecret()
11634 # Check X509 key name
11636 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11637 except (TypeError, ValueError), err:
11638 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11640 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11641 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11642 errors.ECODE_INVAL)
11644 # Load and verify CA
11646 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11647 except OpenSSL.crypto.Error, err:
11648 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11649 (err, ), errors.ECODE_INVAL)
11651 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11652 if errcode is not None:
11653 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11654 (msg, ), errors.ECODE_INVAL)
11656 self.dest_x509_ca = cert
11658 # Verify target information
11660 for idx, disk_data in enumerate(self.op.target_node):
11662 (host, port, magic) = \
11663 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11664 except errors.GenericError, err:
11665 raise errors.OpPrereqError("Target info for disk %s: %s" %
11666 (idx, err), errors.ECODE_INVAL)
11668 disk_info.append((host, port, magic))
11670 assert len(disk_info) == len(self.op.target_node)
11671 self.dest_disk_info = disk_info
11674 raise errors.ProgrammerError("Unhandled export mode %r" %
11677 # instance disk type verification
11678 # TODO: Implement export support for file-based disks
11679 for disk in self.instance.disks:
11680 if disk.dev_type == constants.LD_FILE:
11681 raise errors.OpPrereqError("Export not supported for instances with"
11682 " file-based disks", errors.ECODE_INVAL)
11684 def _CleanupExports(self, feedback_fn):
11685 """Removes exports of current instance from all other nodes.
11687 If an instance in a cluster with nodes A..D was exported to node C, its
11688 exports will be removed from the nodes A, B and D.
11691 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11693 nodelist = self.cfg.GetNodeList()
11694 nodelist.remove(self.dst_node.name)
11696 # on one-node clusters nodelist will be empty after the removal
11697 # if we proceed the backup would be removed because OpBackupQuery
11698 # substitutes an empty list with the full cluster node list.
11699 iname = self.instance.name
11701 feedback_fn("Removing old exports for instance %s" % iname)
11702 exportlist = self.rpc.call_export_list(nodelist)
11703 for node in exportlist:
11704 if exportlist[node].fail_msg:
11706 if iname in exportlist[node].payload:
11707 msg = self.rpc.call_export_remove(node, iname).fail_msg
11709 self.LogWarning("Could not remove older export for instance %s"
11710 " on node %s: %s", iname, node, msg)
11712 def Exec(self, feedback_fn):
11713 """Export an instance to an image in the cluster.
11716 assert self.op.mode in constants.EXPORT_MODES
11718 instance = self.instance
11719 src_node = instance.primary_node
11721 if self.op.shutdown:
11722 # shutdown the instance, but not the disks
11723 feedback_fn("Shutting down instance %s" % instance.name)
11724 result = self.rpc.call_instance_shutdown(src_node, instance,
11725 self.op.shutdown_timeout)
11726 # TODO: Maybe ignore failures if ignore_remove_failures is set
11727 result.Raise("Could not shutdown instance %s on"
11728 " node %s" % (instance.name, src_node))
11730 # set the disks ID correctly since call_instance_start needs the
11731 # correct drbd minor to create the symlinks
11732 for disk in instance.disks:
11733 self.cfg.SetDiskID(disk, src_node)
11735 activate_disks = (not instance.admin_up)
11738 # Activate the instance disks if we'exporting a stopped instance
11739 feedback_fn("Activating disks for %s" % instance.name)
11740 _StartInstanceDisks(self, instance, None)
11743 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11746 helper.CreateSnapshots()
11748 if (self.op.shutdown and instance.admin_up and
11749 not self.op.remove_instance):
11750 assert not activate_disks
11751 feedback_fn("Starting instance %s" % instance.name)
11752 result = self.rpc.call_instance_start(src_node, instance,
11754 msg = result.fail_msg
11756 feedback_fn("Failed to start instance: %s" % msg)
11757 _ShutdownInstanceDisks(self, instance)
11758 raise errors.OpExecError("Could not start instance: %s" % msg)
11760 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11761 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11762 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11763 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11764 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11766 (key_name, _, _) = self.x509_key_name
11769 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11772 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11773 key_name, dest_ca_pem,
11778 # Check for backwards compatibility
11779 assert len(dresults) == len(instance.disks)
11780 assert compat.all(isinstance(i, bool) for i in dresults), \
11781 "Not all results are boolean: %r" % dresults
11785 feedback_fn("Deactivating disks for %s" % instance.name)
11786 _ShutdownInstanceDisks(self, instance)
11788 if not (compat.all(dresults) and fin_resu):
11791 failures.append("export finalization")
11792 if not compat.all(dresults):
11793 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11795 failures.append("disk export: disk(s) %s" % fdsk)
11797 raise errors.OpExecError("Export failed, errors in %s" %
11798 utils.CommaJoin(failures))
11800 # At this point, the export was successful, we can cleanup/finish
11802 # Remove instance if requested
11803 if self.op.remove_instance:
11804 feedback_fn("Removing instance %s" % instance.name)
11805 _RemoveInstance(self, feedback_fn, instance,
11806 self.op.ignore_remove_failures)
11808 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11809 self._CleanupExports(feedback_fn)
11811 return fin_resu, dresults
11814 class LUBackupRemove(NoHooksLU):
11815 """Remove exports related to the named instance.
11820 def ExpandNames(self):
11821 self.needed_locks = {}
11822 # We need all nodes to be locked in order for RemoveExport to work, but we
11823 # don't need to lock the instance itself, as nothing will happen to it (and
11824 # we can remove exports also for a removed instance)
11825 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11827 def Exec(self, feedback_fn):
11828 """Remove any export.
11831 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11832 # If the instance was not found we'll try with the name that was passed in.
11833 # This will only work if it was an FQDN, though.
11835 if not instance_name:
11837 instance_name = self.op.instance_name
11839 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11840 exportlist = self.rpc.call_export_list(locked_nodes)
11842 for node in exportlist:
11843 msg = exportlist[node].fail_msg
11845 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11847 if instance_name in exportlist[node].payload:
11849 result = self.rpc.call_export_remove(node, instance_name)
11850 msg = result.fail_msg
11852 logging.error("Could not remove export for instance %s"
11853 " on node %s: %s", instance_name, node, msg)
11855 if fqdn_warn and not found:
11856 feedback_fn("Export not found. If trying to remove an export belonging"
11857 " to a deleted instance please use its Fully Qualified"
11861 class LUGroupAdd(LogicalUnit):
11862 """Logical unit for creating node groups.
11865 HPATH = "group-add"
11866 HTYPE = constants.HTYPE_GROUP
11869 def ExpandNames(self):
11870 # We need the new group's UUID here so that we can create and acquire the
11871 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11872 # that it should not check whether the UUID exists in the configuration.
11873 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11874 self.needed_locks = {}
11875 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11877 def CheckPrereq(self):
11878 """Check prerequisites.
11880 This checks that the given group name is not an existing node group
11885 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11886 except errors.OpPrereqError:
11889 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11890 " node group (UUID: %s)" %
11891 (self.op.group_name, existing_uuid),
11892 errors.ECODE_EXISTS)
11894 if self.op.ndparams:
11895 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11897 def BuildHooksEnv(self):
11898 """Build hooks env.
11902 "GROUP_NAME": self.op.group_name,
11905 def BuildHooksNodes(self):
11906 """Build hooks nodes.
11909 mn = self.cfg.GetMasterNode()
11910 return ([mn], [mn])
11912 def Exec(self, feedback_fn):
11913 """Add the node group to the cluster.
11916 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11917 uuid=self.group_uuid,
11918 alloc_policy=self.op.alloc_policy,
11919 ndparams=self.op.ndparams)
11921 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11922 del self.remove_locks[locking.LEVEL_NODEGROUP]
11925 class LUGroupAssignNodes(NoHooksLU):
11926 """Logical unit for assigning nodes to groups.
11931 def ExpandNames(self):
11932 # These raise errors.OpPrereqError on their own:
11933 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11934 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11936 # We want to lock all the affected nodes and groups. We have readily
11937 # available the list of nodes, and the *destination* group. To gather the
11938 # list of "source" groups, we need to fetch node information later on.
11939 self.needed_locks = {
11940 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11941 locking.LEVEL_NODE: self.op.nodes,
11944 def DeclareLocks(self, level):
11945 if level == locking.LEVEL_NODEGROUP:
11946 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11948 # Try to get all affected nodes' groups without having the group or node
11949 # lock yet. Needs verification later in the code flow.
11950 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11952 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11954 def CheckPrereq(self):
11955 """Check prerequisites.
11958 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11959 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11960 frozenset(self.op.nodes))
11962 expected_locks = (set([self.group_uuid]) |
11963 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11964 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11965 if actual_locks != expected_locks:
11966 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11967 " current groups are '%s', used to be '%s'" %
11968 (utils.CommaJoin(expected_locks),
11969 utils.CommaJoin(actual_locks)))
11971 self.node_data = self.cfg.GetAllNodesInfo()
11972 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11973 instance_data = self.cfg.GetAllInstancesInfo()
11975 if self.group is None:
11976 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11977 (self.op.group_name, self.group_uuid))
11979 (new_splits, previous_splits) = \
11980 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11981 for node in self.op.nodes],
11982 self.node_data, instance_data)
11985 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11987 if not self.op.force:
11988 raise errors.OpExecError("The following instances get split by this"
11989 " change and --force was not given: %s" %
11992 self.LogWarning("This operation will split the following instances: %s",
11995 if previous_splits:
11996 self.LogWarning("In addition, these already-split instances continue"
11997 " to be split across groups: %s",
11998 utils.CommaJoin(utils.NiceSort(previous_splits)))
12000 def Exec(self, feedback_fn):
12001 """Assign nodes to a new group.
12004 for node in self.op.nodes:
12005 self.node_data[node].group = self.group_uuid
12007 # FIXME: Depends on side-effects of modifying the result of
12008 # C{cfg.GetAllNodesInfo}
12010 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12013 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12014 """Check for split instances after a node assignment.
12016 This method considers a series of node assignments as an atomic operation,
12017 and returns information about split instances after applying the set of
12020 In particular, it returns information about newly split instances, and
12021 instances that were already split, and remain so after the change.
12023 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12026 @type changes: list of (node_name, new_group_uuid) pairs.
12027 @param changes: list of node assignments to consider.
12028 @param node_data: a dict with data for all nodes
12029 @param instance_data: a dict with all instances to consider
12030 @rtype: a two-tuple
12031 @return: a list of instances that were previously okay and result split as a
12032 consequence of this change, and a list of instances that were previously
12033 split and this change does not fix.
12036 changed_nodes = dict((node, group) for node, group in changes
12037 if node_data[node].group != group)
12039 all_split_instances = set()
12040 previously_split_instances = set()
12042 def InstanceNodes(instance):
12043 return [instance.primary_node] + list(instance.secondary_nodes)
12045 for inst in instance_data.values():
12046 if inst.disk_template not in constants.DTS_INT_MIRROR:
12049 instance_nodes = InstanceNodes(inst)
12051 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12052 previously_split_instances.add(inst.name)
12054 if len(set(changed_nodes.get(node, node_data[node].group)
12055 for node in instance_nodes)) > 1:
12056 all_split_instances.add(inst.name)
12058 return (list(all_split_instances - previously_split_instances),
12059 list(previously_split_instances & all_split_instances))
12062 class _GroupQuery(_QueryBase):
12063 FIELDS = query.GROUP_FIELDS
12065 def ExpandNames(self, lu):
12066 lu.needed_locks = {}
12068 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12069 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12072 self.wanted = [name_to_uuid[name]
12073 for name in utils.NiceSort(name_to_uuid.keys())]
12075 # Accept names to be either names or UUIDs.
12078 all_uuid = frozenset(self._all_groups.keys())
12080 for name in self.names:
12081 if name in all_uuid:
12082 self.wanted.append(name)
12083 elif name in name_to_uuid:
12084 self.wanted.append(name_to_uuid[name])
12086 missing.append(name)
12089 raise errors.OpPrereqError("Some groups do not exist: %s" %
12090 utils.CommaJoin(missing),
12091 errors.ECODE_NOENT)
12093 def DeclareLocks(self, lu, level):
12096 def _GetQueryData(self, lu):
12097 """Computes the list of node groups and their attributes.
12100 do_nodes = query.GQ_NODE in self.requested_data
12101 do_instances = query.GQ_INST in self.requested_data
12103 group_to_nodes = None
12104 group_to_instances = None
12106 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12107 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12108 # latter GetAllInstancesInfo() is not enough, for we have to go through
12109 # instance->node. Hence, we will need to process nodes even if we only need
12110 # instance information.
12111 if do_nodes or do_instances:
12112 all_nodes = lu.cfg.GetAllNodesInfo()
12113 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12116 for node in all_nodes.values():
12117 if node.group in group_to_nodes:
12118 group_to_nodes[node.group].append(node.name)
12119 node_to_group[node.name] = node.group
12122 all_instances = lu.cfg.GetAllInstancesInfo()
12123 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12125 for instance in all_instances.values():
12126 node = instance.primary_node
12127 if node in node_to_group:
12128 group_to_instances[node_to_group[node]].append(instance.name)
12131 # Do not pass on node information if it was not requested.
12132 group_to_nodes = None
12134 return query.GroupQueryData([self._all_groups[uuid]
12135 for uuid in self.wanted],
12136 group_to_nodes, group_to_instances)
12139 class LUGroupQuery(NoHooksLU):
12140 """Logical unit for querying node groups.
12145 def CheckArguments(self):
12146 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12147 self.op.output_fields, False)
12149 def ExpandNames(self):
12150 self.gq.ExpandNames(self)
12152 def DeclareLocks(self, level):
12153 self.gq.DeclareLocks(self, level)
12155 def Exec(self, feedback_fn):
12156 return self.gq.OldStyleQuery(self)
12159 class LUGroupSetParams(LogicalUnit):
12160 """Modifies the parameters of a node group.
12163 HPATH = "group-modify"
12164 HTYPE = constants.HTYPE_GROUP
12167 def CheckArguments(self):
12170 self.op.alloc_policy,
12173 if all_changes.count(None) == len(all_changes):
12174 raise errors.OpPrereqError("Please pass at least one modification",
12175 errors.ECODE_INVAL)
12177 def ExpandNames(self):
12178 # This raises errors.OpPrereqError on its own:
12179 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12181 self.needed_locks = {
12182 locking.LEVEL_NODEGROUP: [self.group_uuid],
12185 def CheckPrereq(self):
12186 """Check prerequisites.
12189 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12191 if self.group is None:
12192 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12193 (self.op.group_name, self.group_uuid))
12195 if self.op.ndparams:
12196 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12197 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12198 self.new_ndparams = new_ndparams
12200 def BuildHooksEnv(self):
12201 """Build hooks env.
12205 "GROUP_NAME": self.op.group_name,
12206 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12209 def BuildHooksNodes(self):
12210 """Build hooks nodes.
12213 mn = self.cfg.GetMasterNode()
12214 return ([mn], [mn])
12216 def Exec(self, feedback_fn):
12217 """Modifies the node group.
12222 if self.op.ndparams:
12223 self.group.ndparams = self.new_ndparams
12224 result.append(("ndparams", str(self.group.ndparams)))
12226 if self.op.alloc_policy:
12227 self.group.alloc_policy = self.op.alloc_policy
12229 self.cfg.Update(self.group, feedback_fn)
12233 class LUGroupRemove(LogicalUnit):
12234 HPATH = "group-remove"
12235 HTYPE = constants.HTYPE_GROUP
12238 def ExpandNames(self):
12239 # This will raises errors.OpPrereqError on its own:
12240 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12241 self.needed_locks = {
12242 locking.LEVEL_NODEGROUP: [self.group_uuid],
12245 def CheckPrereq(self):
12246 """Check prerequisites.
12248 This checks that the given group name exists as a node group, that is
12249 empty (i.e., contains no nodes), and that is not the last group of the
12253 # Verify that the group is empty.
12254 group_nodes = [node.name
12255 for node in self.cfg.GetAllNodesInfo().values()
12256 if node.group == self.group_uuid]
12259 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12261 (self.op.group_name,
12262 utils.CommaJoin(utils.NiceSort(group_nodes))),
12263 errors.ECODE_STATE)
12265 # Verify the cluster would not be left group-less.
12266 if len(self.cfg.GetNodeGroupList()) == 1:
12267 raise errors.OpPrereqError("Group '%s' is the only group,"
12268 " cannot be removed" %
12269 self.op.group_name,
12270 errors.ECODE_STATE)
12272 def BuildHooksEnv(self):
12273 """Build hooks env.
12277 "GROUP_NAME": self.op.group_name,
12280 def BuildHooksNodes(self):
12281 """Build hooks nodes.
12284 mn = self.cfg.GetMasterNode()
12285 return ([mn], [mn])
12287 def Exec(self, feedback_fn):
12288 """Remove the node group.
12292 self.cfg.RemoveNodeGroup(self.group_uuid)
12293 except errors.ConfigurationError:
12294 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12295 (self.op.group_name, self.group_uuid))
12297 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12300 class LUGroupRename(LogicalUnit):
12301 HPATH = "group-rename"
12302 HTYPE = constants.HTYPE_GROUP
12305 def ExpandNames(self):
12306 # This raises errors.OpPrereqError on its own:
12307 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12309 self.needed_locks = {
12310 locking.LEVEL_NODEGROUP: [self.group_uuid],
12313 def CheckPrereq(self):
12314 """Check prerequisites.
12316 Ensures requested new name is not yet used.
12320 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12321 except errors.OpPrereqError:
12324 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12325 " node group (UUID: %s)" %
12326 (self.op.new_name, new_name_uuid),
12327 errors.ECODE_EXISTS)
12329 def BuildHooksEnv(self):
12330 """Build hooks env.
12334 "OLD_NAME": self.op.group_name,
12335 "NEW_NAME": self.op.new_name,
12338 def BuildHooksNodes(self):
12339 """Build hooks nodes.
12342 mn = self.cfg.GetMasterNode()
12344 all_nodes = self.cfg.GetAllNodesInfo()
12345 all_nodes.pop(mn, None)
12348 run_nodes.extend(node.name for node in all_nodes.values()
12349 if node.group == self.group_uuid)
12351 return (run_nodes, run_nodes)
12353 def Exec(self, feedback_fn):
12354 """Rename the node group.
12357 group = self.cfg.GetNodeGroup(self.group_uuid)
12360 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12361 (self.op.group_name, self.group_uuid))
12363 group.name = self.op.new_name
12364 self.cfg.Update(group, feedback_fn)
12366 return self.op.new_name
12369 class LUGroupEvacuate(LogicalUnit):
12370 HPATH = "group-evacuate"
12371 HTYPE = constants.HTYPE_GROUP
12374 def ExpandNames(self):
12375 # This raises errors.OpPrereqError on its own:
12376 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12378 if self.op.target_groups:
12379 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12380 self.op.target_groups)
12382 self.req_target_uuids = []
12384 if self.group_uuid in self.req_target_uuids:
12385 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12386 " as a target group (targets are %s)" %
12388 utils.CommaJoin(self.req_target_uuids)),
12389 errors.ECODE_INVAL)
12391 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12393 self.share_locks = _ShareAll()
12394 self.needed_locks = {
12395 locking.LEVEL_INSTANCE: [],
12396 locking.LEVEL_NODEGROUP: [],
12397 locking.LEVEL_NODE: [],
12400 def DeclareLocks(self, level):
12401 if level == locking.LEVEL_INSTANCE:
12402 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12404 # Lock instances optimistically, needs verification once node and group
12405 # locks have been acquired
12406 self.needed_locks[locking.LEVEL_INSTANCE] = \
12407 self.cfg.GetNodeGroupInstances(self.group_uuid)
12409 elif level == locking.LEVEL_NODEGROUP:
12410 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12412 if self.req_target_uuids:
12413 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12415 # Lock all groups used by instances optimistically; this requires going
12416 # via the node before it's locked, requiring verification later on
12417 lock_groups.update(group_uuid
12418 for instance_name in
12419 self.owned_locks(locking.LEVEL_INSTANCE)
12421 self.cfg.GetInstanceNodeGroups(instance_name))
12423 # No target groups, need to lock all of them
12424 lock_groups = locking.ALL_SET
12426 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12428 elif level == locking.LEVEL_NODE:
12429 # This will only lock the nodes in the group to be evacuated which
12430 # contain actual instances
12431 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12432 self._LockInstancesNodes()
12434 # Lock all nodes in group to be evacuated and target groups
12435 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12436 assert self.group_uuid in owned_groups
12437 member_nodes = [node_name
12438 for group in owned_groups
12439 for node_name in self.cfg.GetNodeGroup(group).members]
12440 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12442 def CheckPrereq(self):
12443 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12444 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12445 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12447 assert owned_groups.issuperset(self.req_target_uuids)
12448 assert self.group_uuid in owned_groups
12450 # Check if locked instances are still correct
12451 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12453 # Get instance information
12454 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12456 # Check if node groups for locked instances are still correct
12457 for instance_name in owned_instances:
12458 inst = self.instances[instance_name]
12459 assert owned_nodes.issuperset(inst.all_nodes), \
12460 "Instance %s's nodes changed while we kept the lock" % instance_name
12462 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12465 assert self.group_uuid in inst_groups, \
12466 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12468 if self.req_target_uuids:
12469 # User requested specific target groups
12470 self.target_uuids = self.req_target_uuids
12472 # All groups except the one to be evacuated are potential targets
12473 self.target_uuids = [group_uuid for group_uuid in owned_groups
12474 if group_uuid != self.group_uuid]
12476 if not self.target_uuids:
12477 raise errors.OpPrereqError("There are no possible target groups",
12478 errors.ECODE_INVAL)
12480 def BuildHooksEnv(self):
12481 """Build hooks env.
12485 "GROUP_NAME": self.op.group_name,
12486 "TARGET_GROUPS": " ".join(self.target_uuids),
12489 def BuildHooksNodes(self):
12490 """Build hooks nodes.
12493 mn = self.cfg.GetMasterNode()
12495 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12497 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12499 return (run_nodes, run_nodes)
12501 def Exec(self, feedback_fn):
12502 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12504 assert self.group_uuid not in self.target_uuids
12506 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12507 instances=instances, target_groups=self.target_uuids)
12509 ial.Run(self.op.iallocator)
12511 if not ial.success:
12512 raise errors.OpPrereqError("Can't compute group evacuation using"
12513 " iallocator '%s': %s" %
12514 (self.op.iallocator, ial.info),
12515 errors.ECODE_NORES)
12517 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12519 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12520 len(jobs), self.op.group_name)
12522 return ResultWithJobs(jobs)
12525 class TagsLU(NoHooksLU): # pylint: disable=W0223
12526 """Generic tags LU.
12528 This is an abstract class which is the parent of all the other tags LUs.
12531 def ExpandNames(self):
12532 self.group_uuid = None
12533 self.needed_locks = {}
12534 if self.op.kind == constants.TAG_NODE:
12535 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12536 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12537 elif self.op.kind == constants.TAG_INSTANCE:
12538 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12539 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12540 elif self.op.kind == constants.TAG_NODEGROUP:
12541 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12543 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12544 # not possible to acquire the BGL based on opcode parameters)
12546 def CheckPrereq(self):
12547 """Check prerequisites.
12550 if self.op.kind == constants.TAG_CLUSTER:
12551 self.target = self.cfg.GetClusterInfo()
12552 elif self.op.kind == constants.TAG_NODE:
12553 self.target = self.cfg.GetNodeInfo(self.op.name)
12554 elif self.op.kind == constants.TAG_INSTANCE:
12555 self.target = self.cfg.GetInstanceInfo(self.op.name)
12556 elif self.op.kind == constants.TAG_NODEGROUP:
12557 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12559 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12560 str(self.op.kind), errors.ECODE_INVAL)
12563 class LUTagsGet(TagsLU):
12564 """Returns the tags of a given object.
12569 def ExpandNames(self):
12570 TagsLU.ExpandNames(self)
12572 # Share locks as this is only a read operation
12573 self.share_locks = _ShareAll()
12575 def Exec(self, feedback_fn):
12576 """Returns the tag list.
12579 return list(self.target.GetTags())
12582 class LUTagsSearch(NoHooksLU):
12583 """Searches the tags for a given pattern.
12588 def ExpandNames(self):
12589 self.needed_locks = {}
12591 def CheckPrereq(self):
12592 """Check prerequisites.
12594 This checks the pattern passed for validity by compiling it.
12598 self.re = re.compile(self.op.pattern)
12599 except re.error, err:
12600 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12601 (self.op.pattern, err), errors.ECODE_INVAL)
12603 def Exec(self, feedback_fn):
12604 """Returns the tag list.
12608 tgts = [("/cluster", cfg.GetClusterInfo())]
12609 ilist = cfg.GetAllInstancesInfo().values()
12610 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12611 nlist = cfg.GetAllNodesInfo().values()
12612 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12613 tgts.extend(("/nodegroup/%s" % n.name, n)
12614 for n in cfg.GetAllNodeGroupsInfo().values())
12616 for path, target in tgts:
12617 for tag in target.GetTags():
12618 if self.re.search(tag):
12619 results.append((path, tag))
12623 class LUTagsSet(TagsLU):
12624 """Sets a tag on a given object.
12629 def CheckPrereq(self):
12630 """Check prerequisites.
12632 This checks the type and length of the tag name and value.
12635 TagsLU.CheckPrereq(self)
12636 for tag in self.op.tags:
12637 objects.TaggableObject.ValidateTag(tag)
12639 def Exec(self, feedback_fn):
12644 for tag in self.op.tags:
12645 self.target.AddTag(tag)
12646 except errors.TagError, err:
12647 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12648 self.cfg.Update(self.target, feedback_fn)
12651 class LUTagsDel(TagsLU):
12652 """Delete a list of tags from a given object.
12657 def CheckPrereq(self):
12658 """Check prerequisites.
12660 This checks that we have the given tag.
12663 TagsLU.CheckPrereq(self)
12664 for tag in self.op.tags:
12665 objects.TaggableObject.ValidateTag(tag)
12666 del_tags = frozenset(self.op.tags)
12667 cur_tags = self.target.GetTags()
12669 diff_tags = del_tags - cur_tags
12671 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12672 raise errors.OpPrereqError("Tag(s) %s not found" %
12673 (utils.CommaJoin(diff_names), ),
12674 errors.ECODE_NOENT)
12676 def Exec(self, feedback_fn):
12677 """Remove the tag from the object.
12680 for tag in self.op.tags:
12681 self.target.RemoveTag(tag)
12682 self.cfg.Update(self.target, feedback_fn)
12685 class LUTestDelay(NoHooksLU):
12686 """Sleep for a specified amount of time.
12688 This LU sleeps on the master and/or nodes for a specified amount of
12694 def ExpandNames(self):
12695 """Expand names and set required locks.
12697 This expands the node list, if any.
12700 self.needed_locks = {}
12701 if self.op.on_nodes:
12702 # _GetWantedNodes can be used here, but is not always appropriate to use
12703 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12704 # more information.
12705 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12706 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12708 def _TestDelay(self):
12709 """Do the actual sleep.
12712 if self.op.on_master:
12713 if not utils.TestDelay(self.op.duration):
12714 raise errors.OpExecError("Error during master delay test")
12715 if self.op.on_nodes:
12716 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12717 for node, node_result in result.items():
12718 node_result.Raise("Failure during rpc call to node %s" % node)
12720 def Exec(self, feedback_fn):
12721 """Execute the test delay opcode, with the wanted repetitions.
12724 if self.op.repeat == 0:
12727 top_value = self.op.repeat - 1
12728 for i in range(self.op.repeat):
12729 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12733 class LUTestJqueue(NoHooksLU):
12734 """Utility LU to test some aspects of the job queue.
12739 # Must be lower than default timeout for WaitForJobChange to see whether it
12740 # notices changed jobs
12741 _CLIENT_CONNECT_TIMEOUT = 20.0
12742 _CLIENT_CONFIRM_TIMEOUT = 60.0
12745 def _NotifyUsingSocket(cls, cb, errcls):
12746 """Opens a Unix socket and waits for another program to connect.
12749 @param cb: Callback to send socket name to client
12750 @type errcls: class
12751 @param errcls: Exception class to use for errors
12754 # Using a temporary directory as there's no easy way to create temporary
12755 # sockets without writing a custom loop around tempfile.mktemp and
12757 tmpdir = tempfile.mkdtemp()
12759 tmpsock = utils.PathJoin(tmpdir, "sock")
12761 logging.debug("Creating temporary socket at %s", tmpsock)
12762 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12767 # Send details to client
12770 # Wait for client to connect before continuing
12771 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12773 (conn, _) = sock.accept()
12774 except socket.error, err:
12775 raise errcls("Client didn't connect in time (%s)" % err)
12779 # Remove as soon as client is connected
12780 shutil.rmtree(tmpdir)
12782 # Wait for client to close
12785 # pylint: disable=E1101
12786 # Instance of '_socketobject' has no ... member
12787 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12789 except socket.error, err:
12790 raise errcls("Client failed to confirm notification (%s)" % err)
12794 def _SendNotification(self, test, arg, sockname):
12795 """Sends a notification to the client.
12798 @param test: Test name
12799 @param arg: Test argument (depends on test)
12800 @type sockname: string
12801 @param sockname: Socket path
12804 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12806 def _Notify(self, prereq, test, arg):
12807 """Notifies the client of a test.
12810 @param prereq: Whether this is a prereq-phase test
12812 @param test: Test name
12813 @param arg: Test argument (depends on test)
12817 errcls = errors.OpPrereqError
12819 errcls = errors.OpExecError
12821 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12825 def CheckArguments(self):
12826 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12827 self.expandnames_calls = 0
12829 def ExpandNames(self):
12830 checkargs_calls = getattr(self, "checkargs_calls", 0)
12831 if checkargs_calls < 1:
12832 raise errors.ProgrammerError("CheckArguments was not called")
12834 self.expandnames_calls += 1
12836 if self.op.notify_waitlock:
12837 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12839 self.LogInfo("Expanding names")
12841 # Get lock on master node (just to get a lock, not for a particular reason)
12842 self.needed_locks = {
12843 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12846 def Exec(self, feedback_fn):
12847 if self.expandnames_calls < 1:
12848 raise errors.ProgrammerError("ExpandNames was not called")
12850 if self.op.notify_exec:
12851 self._Notify(False, constants.JQT_EXEC, None)
12853 self.LogInfo("Executing")
12855 if self.op.log_messages:
12856 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12857 for idx, msg in enumerate(self.op.log_messages):
12858 self.LogInfo("Sending log message %s", idx + 1)
12859 feedback_fn(constants.JQT_MSGPREFIX + msg)
12860 # Report how many test messages have been sent
12861 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12864 raise errors.OpExecError("Opcode failure was requested")
12869 class IAllocator(object):
12870 """IAllocator framework.
12872 An IAllocator instance has three sets of attributes:
12873 - cfg that is needed to query the cluster
12874 - input data (all members of the _KEYS class attribute are required)
12875 - four buffer attributes (in|out_data|text), that represent the
12876 input (to the external script) in text and data structure format,
12877 and the output from it, again in two formats
12878 - the result variables from the script (success, info, nodes) for
12882 # pylint: disable=R0902
12883 # lots of instance attributes
12885 def __init__(self, cfg, rpc, mode, **kwargs):
12888 # init buffer variables
12889 self.in_text = self.out_text = self.in_data = self.out_data = None
12890 # init all input fields so that pylint is happy
12892 self.memory = self.disks = self.disk_template = None
12893 self.os = self.tags = self.nics = self.vcpus = None
12894 self.hypervisor = None
12895 self.relocate_from = None
12897 self.instances = None
12898 self.evac_mode = None
12899 self.target_groups = []
12901 self.required_nodes = None
12902 # init result fields
12903 self.success = self.info = self.result = None
12906 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12908 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12909 " IAllocator" % self.mode)
12911 keyset = [n for (n, _) in keydata]
12914 if key not in keyset:
12915 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12916 " IAllocator" % key)
12917 setattr(self, key, kwargs[key])
12920 if key not in kwargs:
12921 raise errors.ProgrammerError("Missing input parameter '%s' to"
12922 " IAllocator" % key)
12923 self._BuildInputData(compat.partial(fn, self), keydata)
12925 def _ComputeClusterData(self):
12926 """Compute the generic allocator input data.
12928 This is the data that is independent of the actual operation.
12932 cluster_info = cfg.GetClusterInfo()
12935 "version": constants.IALLOCATOR_VERSION,
12936 "cluster_name": cfg.GetClusterName(),
12937 "cluster_tags": list(cluster_info.GetTags()),
12938 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12939 # we don't have job IDs
12941 ninfo = cfg.GetAllNodesInfo()
12942 iinfo = cfg.GetAllInstancesInfo().values()
12943 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12946 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12948 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12949 hypervisor_name = self.hypervisor
12950 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12951 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12953 hypervisor_name = cluster_info.enabled_hypervisors[0]
12955 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12958 self.rpc.call_all_instances_info(node_list,
12959 cluster_info.enabled_hypervisors)
12961 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12963 config_ndata = self._ComputeBasicNodeData(ninfo)
12964 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12965 i_list, config_ndata)
12966 assert len(data["nodes"]) == len(ninfo), \
12967 "Incomplete node data computed"
12969 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12971 self.in_data = data
12974 def _ComputeNodeGroupData(cfg):
12975 """Compute node groups data.
12978 ng = dict((guuid, {
12979 "name": gdata.name,
12980 "alloc_policy": gdata.alloc_policy,
12982 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12987 def _ComputeBasicNodeData(node_cfg):
12988 """Compute global node data.
12991 @returns: a dict of name: (node dict, node config)
12994 # fill in static (config-based) values
12995 node_results = dict((ninfo.name, {
12996 "tags": list(ninfo.GetTags()),
12997 "primary_ip": ninfo.primary_ip,
12998 "secondary_ip": ninfo.secondary_ip,
12999 "offline": ninfo.offline,
13000 "drained": ninfo.drained,
13001 "master_candidate": ninfo.master_candidate,
13002 "group": ninfo.group,
13003 "master_capable": ninfo.master_capable,
13004 "vm_capable": ninfo.vm_capable,
13006 for ninfo in node_cfg.values())
13008 return node_results
13011 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13013 """Compute global node data.
13015 @param node_results: the basic node structures as filled from the config
13018 # make a copy of the current dict
13019 node_results = dict(node_results)
13020 for nname, nresult in node_data.items():
13021 assert nname in node_results, "Missing basic data for node %s" % nname
13022 ninfo = node_cfg[nname]
13024 if not (ninfo.offline or ninfo.drained):
13025 nresult.Raise("Can't get data for node %s" % nname)
13026 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13028 remote_info = nresult.payload
13030 for attr in ["memory_total", "memory_free", "memory_dom0",
13031 "vg_size", "vg_free", "cpu_total"]:
13032 if attr not in remote_info:
13033 raise errors.OpExecError("Node '%s' didn't return attribute"
13034 " '%s'" % (nname, attr))
13035 if not isinstance(remote_info[attr], int):
13036 raise errors.OpExecError("Node '%s' returned invalid value"
13038 (nname, attr, remote_info[attr]))
13039 # compute memory used by primary instances
13040 i_p_mem = i_p_up_mem = 0
13041 for iinfo, beinfo in i_list:
13042 if iinfo.primary_node == nname:
13043 i_p_mem += beinfo[constants.BE_MEMORY]
13044 if iinfo.name not in node_iinfo[nname].payload:
13047 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13048 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13049 remote_info["memory_free"] -= max(0, i_mem_diff)
13052 i_p_up_mem += beinfo[constants.BE_MEMORY]
13054 # compute memory used by instances
13056 "total_memory": remote_info["memory_total"],
13057 "reserved_memory": remote_info["memory_dom0"],
13058 "free_memory": remote_info["memory_free"],
13059 "total_disk": remote_info["vg_size"],
13060 "free_disk": remote_info["vg_free"],
13061 "total_cpus": remote_info["cpu_total"],
13062 "i_pri_memory": i_p_mem,
13063 "i_pri_up_memory": i_p_up_mem,
13065 pnr_dyn.update(node_results[nname])
13066 node_results[nname] = pnr_dyn
13068 return node_results
13071 def _ComputeInstanceData(cluster_info, i_list):
13072 """Compute global instance data.
13076 for iinfo, beinfo in i_list:
13078 for nic in iinfo.nics:
13079 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13083 "mode": filled_params[constants.NIC_MODE],
13084 "link": filled_params[constants.NIC_LINK],
13086 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13087 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13088 nic_data.append(nic_dict)
13090 "tags": list(iinfo.GetTags()),
13091 "admin_up": iinfo.admin_up,
13092 "vcpus": beinfo[constants.BE_VCPUS],
13093 "memory": beinfo[constants.BE_MEMORY],
13095 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13097 "disks": [{constants.IDISK_SIZE: dsk.size,
13098 constants.IDISK_MODE: dsk.mode}
13099 for dsk in iinfo.disks],
13100 "disk_template": iinfo.disk_template,
13101 "hypervisor": iinfo.hypervisor,
13103 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13105 instance_data[iinfo.name] = pir
13107 return instance_data
13109 def _AddNewInstance(self):
13110 """Add new instance data to allocator structure.
13112 This in combination with _AllocatorGetClusterData will create the
13113 correct structure needed as input for the allocator.
13115 The checks for the completeness of the opcode must have already been
13119 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13121 if self.disk_template in constants.DTS_INT_MIRROR:
13122 self.required_nodes = 2
13124 self.required_nodes = 1
13128 "disk_template": self.disk_template,
13131 "vcpus": self.vcpus,
13132 "memory": self.memory,
13133 "disks": self.disks,
13134 "disk_space_total": disk_space,
13136 "required_nodes": self.required_nodes,
13137 "hypervisor": self.hypervisor,
13142 def _AddRelocateInstance(self):
13143 """Add relocate instance data to allocator structure.
13145 This in combination with _IAllocatorGetClusterData will create the
13146 correct structure needed as input for the allocator.
13148 The checks for the completeness of the opcode must have already been
13152 instance = self.cfg.GetInstanceInfo(self.name)
13153 if instance is None:
13154 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13155 " IAllocator" % self.name)
13157 if instance.disk_template not in constants.DTS_MIRRORED:
13158 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13159 errors.ECODE_INVAL)
13161 if instance.disk_template in constants.DTS_INT_MIRROR and \
13162 len(instance.secondary_nodes) != 1:
13163 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13164 errors.ECODE_STATE)
13166 self.required_nodes = 1
13167 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13168 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13172 "disk_space_total": disk_space,
13173 "required_nodes": self.required_nodes,
13174 "relocate_from": self.relocate_from,
13178 def _AddNodeEvacuate(self):
13179 """Get data for node-evacuate requests.
13183 "instances": self.instances,
13184 "evac_mode": self.evac_mode,
13187 def _AddChangeGroup(self):
13188 """Get data for node-evacuate requests.
13192 "instances": self.instances,
13193 "target_groups": self.target_groups,
13196 def _BuildInputData(self, fn, keydata):
13197 """Build input data structures.
13200 self._ComputeClusterData()
13203 request["type"] = self.mode
13204 for keyname, keytype in keydata:
13205 if keyname not in request:
13206 raise errors.ProgrammerError("Request parameter %s is missing" %
13208 val = request[keyname]
13209 if not keytype(val):
13210 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13211 " validation, value %s, expected"
13212 " type %s" % (keyname, val, keytype))
13213 self.in_data["request"] = request
13215 self.in_text = serializer.Dump(self.in_data)
13217 _STRING_LIST = ht.TListOf(ht.TString)
13218 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13219 # pylint: disable=E1101
13220 # Class '...' has no 'OP_ID' member
13221 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13222 opcodes.OpInstanceMigrate.OP_ID,
13223 opcodes.OpInstanceReplaceDisks.OP_ID])
13227 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13228 ht.TItems([ht.TNonEmptyString,
13229 ht.TNonEmptyString,
13230 ht.TListOf(ht.TNonEmptyString),
13233 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13234 ht.TItems([ht.TNonEmptyString,
13237 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13238 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13241 constants.IALLOCATOR_MODE_ALLOC:
13244 ("name", ht.TString),
13245 ("memory", ht.TInt),
13246 ("disks", ht.TListOf(ht.TDict)),
13247 ("disk_template", ht.TString),
13248 ("os", ht.TString),
13249 ("tags", _STRING_LIST),
13250 ("nics", ht.TListOf(ht.TDict)),
13251 ("vcpus", ht.TInt),
13252 ("hypervisor", ht.TString),
13254 constants.IALLOCATOR_MODE_RELOC:
13255 (_AddRelocateInstance,
13256 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13258 constants.IALLOCATOR_MODE_NODE_EVAC:
13259 (_AddNodeEvacuate, [
13260 ("instances", _STRING_LIST),
13261 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13263 constants.IALLOCATOR_MODE_CHG_GROUP:
13264 (_AddChangeGroup, [
13265 ("instances", _STRING_LIST),
13266 ("target_groups", _STRING_LIST),
13270 def Run(self, name, validate=True, call_fn=None):
13271 """Run an instance allocator and return the results.
13274 if call_fn is None:
13275 call_fn = self.rpc.call_iallocator_runner
13277 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13278 result.Raise("Failure while running the iallocator script")
13280 self.out_text = result.payload
13282 self._ValidateResult()
13284 def _ValidateResult(self):
13285 """Process the allocator results.
13287 This will process and if successful save the result in
13288 self.out_data and the other parameters.
13292 rdict = serializer.Load(self.out_text)
13293 except Exception, err:
13294 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13296 if not isinstance(rdict, dict):
13297 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13299 # TODO: remove backwards compatiblity in later versions
13300 if "nodes" in rdict and "result" not in rdict:
13301 rdict["result"] = rdict["nodes"]
13304 for key in "success", "info", "result":
13305 if key not in rdict:
13306 raise errors.OpExecError("Can't parse iallocator results:"
13307 " missing key '%s'" % key)
13308 setattr(self, key, rdict[key])
13310 if not self._result_check(self.result):
13311 raise errors.OpExecError("Iallocator returned invalid result,"
13312 " expected %s, got %s" %
13313 (self._result_check, self.result),
13314 errors.ECODE_INVAL)
13316 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13317 assert self.relocate_from is not None
13318 assert self.required_nodes == 1
13320 node2group = dict((name, ndata["group"])
13321 for (name, ndata) in self.in_data["nodes"].items())
13323 fn = compat.partial(self._NodesToGroups, node2group,
13324 self.in_data["nodegroups"])
13326 instance = self.cfg.GetInstanceInfo(self.name)
13327 request_groups = fn(self.relocate_from + [instance.primary_node])
13328 result_groups = fn(rdict["result"] + [instance.primary_node])
13330 if self.success and not set(result_groups).issubset(request_groups):
13331 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13332 " differ from original groups (%s)" %
13333 (utils.CommaJoin(result_groups),
13334 utils.CommaJoin(request_groups)))
13336 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13337 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13339 self.out_data = rdict
13342 def _NodesToGroups(node2group, groups, nodes):
13343 """Returns a list of unique group names for a list of nodes.
13345 @type node2group: dict
13346 @param node2group: Map from node name to group UUID
13348 @param groups: Group information
13350 @param nodes: Node names
13357 group_uuid = node2group[node]
13359 # Ignore unknown node
13363 group = groups[group_uuid]
13365 # Can't find group, let's use UUID
13366 group_name = group_uuid
13368 group_name = group["name"]
13370 result.add(group_name)
13372 return sorted(result)
13375 class LUTestAllocator(NoHooksLU):
13376 """Run allocator tests.
13378 This LU runs the allocator tests
13381 def CheckPrereq(self):
13382 """Check prerequisites.
13384 This checks the opcode parameters depending on the director and mode test.
13387 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13388 for attr in ["memory", "disks", "disk_template",
13389 "os", "tags", "nics", "vcpus"]:
13390 if not hasattr(self.op, attr):
13391 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13392 attr, errors.ECODE_INVAL)
13393 iname = self.cfg.ExpandInstanceName(self.op.name)
13394 if iname is not None:
13395 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13396 iname, errors.ECODE_EXISTS)
13397 if not isinstance(self.op.nics, list):
13398 raise errors.OpPrereqError("Invalid parameter 'nics'",
13399 errors.ECODE_INVAL)
13400 if not isinstance(self.op.disks, list):
13401 raise errors.OpPrereqError("Invalid parameter 'disks'",
13402 errors.ECODE_INVAL)
13403 for row in self.op.disks:
13404 if (not isinstance(row, dict) or
13405 constants.IDISK_SIZE not in row or
13406 not isinstance(row[constants.IDISK_SIZE], int) or
13407 constants.IDISK_MODE not in row or
13408 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13409 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13410 " parameter", errors.ECODE_INVAL)
13411 if self.op.hypervisor is None:
13412 self.op.hypervisor = self.cfg.GetHypervisorType()
13413 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13414 fname = _ExpandInstanceName(self.cfg, self.op.name)
13415 self.op.name = fname
13416 self.relocate_from = \
13417 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13418 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13419 constants.IALLOCATOR_MODE_NODE_EVAC):
13420 if not self.op.instances:
13421 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13422 self.op.instances = _GetWantedInstances(self, self.op.instances)
13424 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13425 self.op.mode, errors.ECODE_INVAL)
13427 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13428 if self.op.allocator is None:
13429 raise errors.OpPrereqError("Missing allocator name",
13430 errors.ECODE_INVAL)
13431 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13432 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13433 self.op.direction, errors.ECODE_INVAL)
13435 def Exec(self, feedback_fn):
13436 """Run the allocator test.
13439 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13440 ial = IAllocator(self.cfg, self.rpc,
13443 memory=self.op.memory,
13444 disks=self.op.disks,
13445 disk_template=self.op.disk_template,
13449 vcpus=self.op.vcpus,
13450 hypervisor=self.op.hypervisor,
13452 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13453 ial = IAllocator(self.cfg, self.rpc,
13456 relocate_from=list(self.relocate_from),
13458 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13459 ial = IAllocator(self.cfg, self.rpc,
13461 instances=self.op.instances,
13462 target_groups=self.op.target_groups)
13463 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13464 ial = IAllocator(self.cfg, self.rpc,
13466 instances=self.op.instances,
13467 evac_mode=self.op.evac_mode)
13469 raise errors.ProgrammerError("Uncatched mode %s in"
13470 " LUTestAllocator.Exec", self.op.mode)
13472 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13473 result = ial.in_text
13475 ial.Run(self.op.allocator, validate=False)
13476 result = ial.out_text
13480 #: Query type implementations
13482 constants.QR_INSTANCE: _InstanceQuery,
13483 constants.QR_NODE: _NodeQuery,
13484 constants.QR_GROUP: _GroupQuery,
13485 constants.QR_OS: _OsQuery,
13488 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13491 def _GetQueryImplementation(name):
13492 """Returns the implemtnation for a query type.
13494 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13498 return _QUERY_IMPL[name]
13500 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13501 errors.ECODE_INVAL)