4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587 """Checks if the instances in a node group are still correct.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type group_uuid: string
592 @param group_uuid: Node group UUID
593 @type owned_instances: set or frozenset
594 @param owned_instances: List of currently owned instances
597 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598 if owned_instances != wanted_instances:
599 raise errors.OpPrereqError("Instances in node group '%s' changed since"
600 " locks were acquired, wanted '%s', have '%s';"
601 " retry the operation" %
603 utils.CommaJoin(wanted_instances),
604 utils.CommaJoin(owned_instances)),
607 return wanted_instances
610 def _SupportsOob(cfg, node):
611 """Tells if node supports OOB.
613 @type cfg: L{config.ConfigWriter}
614 @param cfg: The cluster configuration
615 @type node: L{objects.Node}
616 @param node: The node
617 @return: The OOB script if supported or an empty string otherwise
620 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
623 def _GetWantedNodes(lu, nodes):
624 """Returns list of checked and expanded node names.
626 @type lu: L{LogicalUnit}
627 @param lu: the logical unit on whose behalf we execute
629 @param nodes: list of node names or None for all nodes
631 @return: the list of nodes, sorted
632 @raise errors.ProgrammerError: if the nodes parameter is wrong type
636 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
638 return utils.NiceSort(lu.cfg.GetNodeList())
641 def _GetWantedInstances(lu, instances):
642 """Returns list of checked and expanded instance names.
644 @type lu: L{LogicalUnit}
645 @param lu: the logical unit on whose behalf we execute
646 @type instances: list
647 @param instances: list of instance names or None for all instances
649 @return: the list of instances, sorted
650 @raise errors.OpPrereqError: if the instances parameter is wrong type
651 @raise errors.OpPrereqError: if any of the passed instances is not found
655 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
657 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
661 def _GetUpdatedParams(old_params, update_dict,
662 use_default=True, use_none=False):
663 """Return the new version of a parameter dictionary.
665 @type old_params: dict
666 @param old_params: old parameters
667 @type update_dict: dict
668 @param update_dict: dict containing new parameter values, or
669 constants.VALUE_DEFAULT to reset the parameter to its default
671 @param use_default: boolean
672 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673 values as 'to be deleted' values
674 @param use_none: boolean
675 @type use_none: whether to recognise C{None} values as 'to be
678 @return: the new parameter dictionary
681 params_copy = copy.deepcopy(old_params)
682 for key, val in update_dict.iteritems():
683 if ((use_default and val == constants.VALUE_DEFAULT) or
684 (use_none and val is None)):
690 params_copy[key] = val
694 def _ReleaseLocks(lu, level, names=None, keep=None):
695 """Releases locks owned by an LU.
697 @type lu: L{LogicalUnit}
698 @param level: Lock level
699 @type names: list or None
700 @param names: Names of locks to release
701 @type keep: list or None
702 @param keep: Names of locks to retain
705 assert not (keep is not None and names is not None), \
706 "Only one of the 'names' and the 'keep' parameters can be given"
708 if names is not None:
709 should_release = names.__contains__
711 should_release = lambda name: name not in keep
713 should_release = None
719 # Determine which locks to release
720 for name in lu.owned_locks(level):
721 if should_release(name):
726 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
728 # Release just some locks
729 lu.glm.release(level, names=release)
731 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
734 lu.glm.release(level)
736 assert not lu.glm.is_owned(level), "No locks should be owned"
739 def _MapInstanceDisksToNodes(instances):
740 """Creates a map from (node, volume) to instance name.
742 @type instances: list of L{objects.Instance}
743 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
746 return dict(((node, vol), inst.name)
747 for inst in instances
748 for (node, vols) in inst.MapLVsByNode().items()
752 def _RunPostHook(lu, node_name):
753 """Runs the post-hook for an opcode on a single node.
756 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
758 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
760 # pylint: disable=W0702
761 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
764 def _CheckOutputFields(static, dynamic, selected):
765 """Checks whether all selected fields are valid.
767 @type static: L{utils.FieldSet}
768 @param static: static fields set
769 @type dynamic: L{utils.FieldSet}
770 @param dynamic: dynamic fields set
777 delta = f.NonMatching(selected)
779 raise errors.OpPrereqError("Unknown output fields selected: %s"
780 % ",".join(delta), errors.ECODE_INVAL)
783 def _CheckGlobalHvParams(params):
784 """Validates that given hypervisor params are not global ones.
786 This will ensure that instances don't get customised versions of
790 used_globals = constants.HVC_GLOBALS.intersection(params)
792 msg = ("The following hypervisor parameters are global and cannot"
793 " be customized at instance level, please modify them at"
794 " cluster level: %s" % utils.CommaJoin(used_globals))
795 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
798 def _CheckNodeOnline(lu, node, msg=None):
799 """Ensure that a given node is online.
801 @param lu: the LU on behalf of which we make the check
802 @param node: the node to check
803 @param msg: if passed, should be a message to replace the default one
804 @raise errors.OpPrereqError: if the node is offline
808 msg = "Can't use offline node"
809 if lu.cfg.GetNodeInfo(node).offline:
810 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
813 def _CheckNodeNotDrained(lu, node):
814 """Ensure that a given node is not drained.
816 @param lu: the LU on behalf of which we make the check
817 @param node: the node to check
818 @raise errors.OpPrereqError: if the node is drained
821 if lu.cfg.GetNodeInfo(node).drained:
822 raise errors.OpPrereqError("Can't use drained node %s" % node,
826 def _CheckNodeVmCapable(lu, node):
827 """Ensure that a given node is vm capable.
829 @param lu: the LU on behalf of which we make the check
830 @param node: the node to check
831 @raise errors.OpPrereqError: if the node is not vm capable
834 if not lu.cfg.GetNodeInfo(node).vm_capable:
835 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
840 """Ensure that a node supports a given OS.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @param os_name: the OS to query about
845 @param force_variant: whether to ignore variant errors
846 @raise errors.OpPrereqError: if the node is not supporting the OS
849 result = lu.rpc.call_os_get(node, os_name)
850 result.Raise("OS '%s' not in supported OS list for node %s" %
852 prereq=True, ecode=errors.ECODE_INVAL)
853 if not force_variant:
854 _CheckOSVariant(result.payload, os_name)
857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858 """Ensure that a node has the given secondary ip.
860 @type lu: L{LogicalUnit}
861 @param lu: the LU on behalf of which we make the check
863 @param node: the node to check
864 @type secondary_ip: string
865 @param secondary_ip: the ip to check
866 @type prereq: boolean
867 @param prereq: whether to throw a prerequisite or an execute error
868 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
872 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873 result.Raise("Failure checking secondary ip on node %s" % node,
874 prereq=prereq, ecode=errors.ECODE_ENVIRON)
875 if not result.payload:
876 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877 " please fix and re-run this command" % secondary_ip)
879 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
881 raise errors.OpExecError(msg)
884 def _GetClusterDomainSecret():
885 """Reads the cluster domain secret.
888 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
892 def _CheckInstanceDown(lu, instance, reason):
893 """Ensure that an instance is not running."""
894 if instance.admin_up:
895 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896 (instance.name, reason), errors.ECODE_STATE)
898 pnode = instance.primary_node
899 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900 ins_l.Raise("Can't contact node %s for instance information" % pnode,
901 prereq=True, ecode=errors.ECODE_ENVIRON)
903 if instance.name in ins_l.payload:
904 raise errors.OpPrereqError("Instance %s is running, %s" %
905 (instance.name, reason), errors.ECODE_STATE)
908 def _ExpandItemName(fn, name, kind):
909 """Expand an item name.
911 @param fn: the function to use for expansion
912 @param name: requested item name
913 @param kind: text description ('Node' or 'Instance')
914 @return: the resolved (full) name
915 @raise errors.OpPrereqError: if the item is not found
919 if full_name is None:
920 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925 def _ExpandNodeName(cfg, name):
926 """Wrapper over L{_ExpandItemName} for nodes."""
927 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
930 def _ExpandInstanceName(cfg, name):
931 """Wrapper over L{_ExpandItemName} for instance."""
932 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936 memory, vcpus, nics, disk_template, disks,
937 bep, hvp, hypervisor_name, tags):
938 """Builds instance related env variables for hooks
940 This builds the hook environment from individual variables.
943 @param name: the name of the instance
944 @type primary_node: string
945 @param primary_node: the name of the instance's primary node
946 @type secondary_nodes: list
947 @param secondary_nodes: list of secondary nodes as strings
948 @type os_type: string
949 @param os_type: the name of the instance's OS
950 @type status: boolean
951 @param status: the should_run status of the instance
953 @param memory: the memory size of the instance
955 @param vcpus: the count of VCPUs the instance has
957 @param nics: list of tuples (ip, mac, mode, link) representing
958 the NICs the instance has
959 @type disk_template: string
960 @param disk_template: the disk template of the instance
962 @param disks: the list of (size, mode) pairs
964 @param bep: the backend parameters for the instance
966 @param hvp: the hypervisor parameters for the instance
967 @type hypervisor_name: string
968 @param hypervisor_name: the hypervisor for the instance
970 @param tags: list of instance tags as strings
972 @return: the hook environment for this instance
981 "INSTANCE_NAME": name,
982 "INSTANCE_PRIMARY": primary_node,
983 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984 "INSTANCE_OS_TYPE": os_type,
985 "INSTANCE_STATUS": str_status,
986 "INSTANCE_MEMORY": memory,
987 "INSTANCE_VCPUS": vcpus,
988 "INSTANCE_DISK_TEMPLATE": disk_template,
989 "INSTANCE_HYPERVISOR": hypervisor_name,
993 nic_count = len(nics)
994 for idx, (ip, mac, mode, link) in enumerate(nics):
997 env["INSTANCE_NIC%d_IP" % idx] = ip
998 env["INSTANCE_NIC%d_MAC" % idx] = mac
999 env["INSTANCE_NIC%d_MODE" % idx] = mode
1000 env["INSTANCE_NIC%d_LINK" % idx] = link
1001 if mode == constants.NIC_MODE_BRIDGED:
1002 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1006 env["INSTANCE_NIC_COUNT"] = nic_count
1009 disk_count = len(disks)
1010 for idx, (size, mode) in enumerate(disks):
1011 env["INSTANCE_DISK%d_SIZE" % idx] = size
1012 env["INSTANCE_DISK%d_MODE" % idx] = mode
1016 env["INSTANCE_DISK_COUNT"] = disk_count
1021 env["INSTANCE_TAGS"] = " ".join(tags)
1023 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024 for key, value in source.items():
1025 env["INSTANCE_%s_%s" % (kind, key)] = value
1030 def _NICListToTuple(lu, nics):
1031 """Build a list of nic information tuples.
1033 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034 value in LUInstanceQueryData.
1036 @type lu: L{LogicalUnit}
1037 @param lu: the logical unit on whose behalf we execute
1038 @type nics: list of L{objects.NIC}
1039 @param nics: list of nics to convert to hooks tuples
1043 cluster = lu.cfg.GetClusterInfo()
1047 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048 mode = filled_params[constants.NIC_MODE]
1049 link = filled_params[constants.NIC_LINK]
1050 hooks_nics.append((ip, mac, mode, link))
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055 """Builds instance related env variables for hooks from an object.
1057 @type lu: L{LogicalUnit}
1058 @param lu: the logical unit on whose behalf we execute
1059 @type instance: L{objects.Instance}
1060 @param instance: the instance for which we should build the
1062 @type override: dict
1063 @param override: dictionary with key/values that will override
1066 @return: the hook environment dictionary
1069 cluster = lu.cfg.GetClusterInfo()
1070 bep = cluster.FillBE(instance)
1071 hvp = cluster.FillHV(instance)
1073 "name": instance.name,
1074 "primary_node": instance.primary_node,
1075 "secondary_nodes": instance.secondary_nodes,
1076 "os_type": instance.os,
1077 "status": instance.admin_up,
1078 "memory": bep[constants.BE_MEMORY],
1079 "vcpus": bep[constants.BE_VCPUS],
1080 "nics": _NICListToTuple(lu, instance.nics),
1081 "disk_template": instance.disk_template,
1082 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1085 "hypervisor_name": instance.hypervisor,
1086 "tags": instance.tags,
1089 args.update(override)
1090 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1093 def _AdjustCandidatePool(lu, exceptions):
1094 """Adjust the candidate pool after node operations.
1097 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1099 lu.LogInfo("Promoted nodes to master candidate role: %s",
1100 utils.CommaJoin(node.name for node in mod_list))
1101 for name in mod_list:
1102 lu.context.ReaddNode(name)
1103 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1105 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110 """Decide whether I should promote myself as a master candidate.
1113 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115 # the new node will increase mc_max with one, so:
1116 mc_should = min(mc_should + 1, cp_size)
1117 return mc_now < mc_should
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121 """Check that the brigdes needed by a list of nics exist.
1124 cluster = lu.cfg.GetClusterInfo()
1125 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126 brlist = [params[constants.NIC_LINK] for params in paramslist
1127 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1129 result = lu.rpc.call_bridges_exist(target_node, brlist)
1130 result.Raise("Error checking bridges on destination node '%s'" %
1131 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135 """Check that the brigdes needed by an instance exist.
1139 node = instance.primary_node
1140 _CheckNicsBridgesExist(lu, instance.nics, node)
1143 def _CheckOSVariant(os_obj, name):
1144 """Check whether an OS name conforms to the os variants specification.
1146 @type os_obj: L{objects.OS}
1147 @param os_obj: OS object to check
1149 @param name: OS name passed by the user, to check for validity
1152 variant = objects.OS.GetVariant(name)
1153 if not os_obj.supported_variants:
1155 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156 " passed)" % (os_obj.name, variant),
1160 raise errors.OpPrereqError("OS name must include a variant",
1163 if variant not in os_obj.supported_variants:
1164 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1167 def _GetNodeInstancesInner(cfg, fn):
1168 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1171 def _GetNodeInstances(cfg, node_name):
1172 """Returns a list of all primary and secondary instances on a node.
1176 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180 """Returns primary instances on a node.
1183 return _GetNodeInstancesInner(cfg,
1184 lambda inst: node_name == inst.primary_node)
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188 """Returns secondary instances on a node.
1191 return _GetNodeInstancesInner(cfg,
1192 lambda inst: node_name in inst.secondary_nodes)
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196 """Returns the arguments for a storage type.
1199 # Special case for file storage
1200 if storage_type == constants.ST_FILE:
1201 # storage.FileStorage wants a list of storage directories
1202 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1210 for dev in instance.disks:
1211 cfg.SetDiskID(dev, node_name)
1213 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214 result.Raise("Failed to get disk status from node %s" % node_name,
1215 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1217 for idx, bdev_status in enumerate(result.payload):
1218 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225 """Check the sanity of iallocator and node arguments and use the
1226 cluster-wide iallocator if appropriate.
1228 Check that at most one of (iallocator, node) is specified. If none is
1229 specified, then the LU's opcode's iallocator slot is filled with the
1230 cluster-wide default iallocator.
1232 @type iallocator_slot: string
1233 @param iallocator_slot: the name of the opcode iallocator slot
1234 @type node_slot: string
1235 @param node_slot: the name of the opcode target node slot
1238 node = getattr(lu.op, node_slot, None)
1239 iallocator = getattr(lu.op, iallocator_slot, None)
1241 if node is not None and iallocator is not None:
1242 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1244 elif node is None and iallocator is None:
1245 default_iallocator = lu.cfg.GetDefaultIAllocator()
1246 if default_iallocator:
1247 setattr(lu.op, iallocator_slot, default_iallocator)
1249 raise errors.OpPrereqError("No iallocator or node given and no"
1250 " cluster-wide default iallocator found;"
1251 " please specify either an iallocator or a"
1252 " node, or set a cluster-wide default"
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257 """Decides on which iallocator to use.
1259 @type cfg: L{config.ConfigWriter}
1260 @param cfg: Cluster configuration object
1261 @type iallocator: string or None
1262 @param iallocator: Iallocator specified in opcode
1264 @return: Iallocator name
1268 # Use default iallocator
1269 iallocator = cfg.GetDefaultIAllocator()
1272 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273 " opcode nor as a cluster-wide default",
1279 class LUClusterPostInit(LogicalUnit):
1280 """Logical unit for running hooks after cluster initialization.
1283 HPATH = "cluster-init"
1284 HTYPE = constants.HTYPE_CLUSTER
1286 def BuildHooksEnv(self):
1291 "OP_TARGET": self.cfg.GetClusterName(),
1294 def BuildHooksNodes(self):
1295 """Build hooks nodes.
1298 return ([], [self.cfg.GetMasterNode()])
1300 def Exec(self, feedback_fn):
1307 class LUClusterDestroy(LogicalUnit):
1308 """Logical unit for destroying the cluster.
1311 HPATH = "cluster-destroy"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1328 def CheckPrereq(self):
1329 """Check prerequisites.
1331 This checks whether the cluster is empty.
1333 Any errors are signaled by raising errors.OpPrereqError.
1336 master = self.cfg.GetMasterNode()
1338 nodelist = self.cfg.GetNodeList()
1339 if len(nodelist) != 1 or nodelist[0] != master:
1340 raise errors.OpPrereqError("There are still %d node(s) in"
1341 " this cluster." % (len(nodelist) - 1),
1343 instancelist = self.cfg.GetInstanceList()
1345 raise errors.OpPrereqError("There are still %d instance(s) in"
1346 " this cluster." % len(instancelist),
1349 def Exec(self, feedback_fn):
1350 """Destroys the cluster.
1353 master = self.cfg.GetMasterNode()
1355 # Run post hooks on master node before it's removed
1356 _RunPostHook(self, master)
1358 result = self.rpc.call_node_stop_master(master, False)
1359 result.Raise("Could not disable the master role")
1364 def _VerifyCertificate(filename):
1365 """Verifies a certificate for L{LUClusterVerifyConfig}.
1367 @type filename: string
1368 @param filename: Path to PEM file
1372 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373 utils.ReadFile(filename))
1374 except Exception, err: # pylint: disable=W0703
1375 return (LUClusterVerifyConfig.ETYPE_ERROR,
1376 "Failed to load X509 certificate %s: %s" % (filename, err))
1379 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380 constants.SSL_CERT_EXPIRATION_ERROR)
1383 fnamemsg = "While verifying %s: %s" % (filename, msg)
1388 return (None, fnamemsg)
1389 elif errcode == utils.CERT_WARNING:
1390 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391 elif errcode == utils.CERT_ERROR:
1392 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1394 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1397 def _GetAllHypervisorParameters(cluster, instances):
1398 """Compute the set of all hypervisor parameters.
1400 @type cluster: L{objects.Cluster}
1401 @param cluster: the cluster object
1402 @param instances: list of L{objects.Instance}
1403 @param instances: additional instances from which to obtain parameters
1404 @rtype: list of (origin, hypervisor, parameters)
1405 @return: a list with all parameters found, indicating the hypervisor they
1406 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411 for hv_name in cluster.enabled_hypervisors:
1412 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1414 for os_name, os_hvp in cluster.os_hvp.items():
1415 for hv_name, hv_params in os_hvp.items():
1417 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1420 # TODO: collapse identical parameter values in a single one
1421 for instance in instances:
1422 if instance.hvparams:
1423 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424 cluster.FillHV(instance)))
1429 class _VerifyErrors(object):
1430 """Mix-in for cluster/group verify LUs.
1432 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433 self.op and self._feedback_fn to be available.)
1436 TCLUSTER = "cluster"
1438 TINSTANCE = "instance"
1440 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452 ENODEDRBD = (TNODE, "ENODEDRBD")
1453 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456 ENODEHV = (TNODE, "ENODEHV")
1457 ENODELVM = (TNODE, "ENODELVM")
1458 ENODEN1 = (TNODE, "ENODEN1")
1459 ENODENET = (TNODE, "ENODENET")
1460 ENODEOS = (TNODE, "ENODEOS")
1461 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463 ENODERPC = (TNODE, "ENODERPC")
1464 ENODESSH = (TNODE, "ENODESSH")
1465 ENODEVERSION = (TNODE, "ENODEVERSION")
1466 ENODESETUP = (TNODE, "ENODESETUP")
1467 ENODETIME = (TNODE, "ENODETIME")
1468 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1470 ETYPE_FIELD = "code"
1471 ETYPE_ERROR = "ERROR"
1472 ETYPE_WARNING = "WARNING"
1474 def _Error(self, ecode, item, msg, *args, **kwargs):
1475 """Format an error message.
1477 Based on the opcode's error_codes parameter, either format a
1478 parseable error code, or a simpler error string.
1480 This must be called only from Exec and functions called from Exec.
1483 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1485 # first complete the msg
1488 # then format the whole message
1489 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1496 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497 # and finally report it via the feedback_fn
1498 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1500 def _ErrorIf(self, cond, *args, **kwargs):
1501 """Log an error message if the passed condition is True.
1505 or self.op.debug_simulate_errors) # pylint: disable=E1101
1507 self._Error(*args, **kwargs)
1508 # do not mark the operation as failed for WARN cases only
1509 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510 self.bad = self.bad or cond
1513 class LUClusterVerify(NoHooksLU):
1514 """Submits all jobs necessary to verify the cluster.
1519 def ExpandNames(self):
1520 self.needed_locks = {}
1522 def Exec(self, feedback_fn):
1525 if self.op.group_name:
1526 groups = [self.op.group_name]
1527 depends_fn = lambda: None
1529 groups = self.cfg.GetNodeGroupList()
1531 # Verify global configuration
1532 jobs.append([opcodes.OpClusterVerifyConfig()])
1534 # Always depend on global verification
1535 depends_fn = lambda: [(-len(jobs), [])]
1537 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538 depends=depends_fn())]
1539 for group in groups)
1541 # Fix up all parameters
1542 for op in itertools.chain(*jobs): # pylint: disable=W0142
1543 op.debug_simulate_errors = self.op.debug_simulate_errors
1544 op.verbose = self.op.verbose
1545 op.error_codes = self.op.error_codes
1547 op.skip_checks = self.op.skip_checks
1548 except AttributeError:
1549 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1551 return ResultWithJobs(jobs)
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555 """Verifies the cluster config.
1560 def _VerifyHVP(self, hvp_data):
1561 """Verifies locally the syntax of the hypervisor parameters.
1564 for item, hv_name, hv_params in hvp_data:
1565 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1568 hv_class = hypervisor.GetHypervisor(hv_name)
1569 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570 hv_class.CheckParameterSyntax(hv_params)
1571 except errors.GenericError, err:
1572 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1574 def ExpandNames(self):
1575 # Information can be safely retrieved as the BGL is acquired in exclusive
1577 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579 self.all_node_info = self.cfg.GetAllNodesInfo()
1580 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581 self.needed_locks = {}
1583 def Exec(self, feedback_fn):
1584 """Verify integrity of cluster, performing various test on nodes.
1588 self._feedback_fn = feedback_fn
1590 feedback_fn("* Verifying cluster config")
1592 for msg in self.cfg.VerifyConfig():
1593 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1595 feedback_fn("* Verifying cluster certificate files")
1597 for cert_filename in constants.ALL_CERT_FILES:
1598 (errcode, msg) = _VerifyCertificate(cert_filename)
1599 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1601 feedback_fn("* Verifying hypervisor parameters")
1603 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604 self.all_inst_info.values()))
1606 feedback_fn("* Verifying all nodes belong to an existing group")
1608 # We do this verification here because, should this bogus circumstance
1609 # occur, it would never be caught by VerifyGroup, which only acts on
1610 # nodes/instances reachable from existing node groups.
1612 dangling_nodes = set(node.name for node in self.all_node_info.values()
1613 if node.group not in self.all_group_info)
1615 dangling_instances = {}
1616 no_node_instances = []
1618 for inst in self.all_inst_info.values():
1619 if inst.primary_node in dangling_nodes:
1620 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621 elif inst.primary_node not in self.all_node_info:
1622 no_node_instances.append(inst.name)
1627 utils.CommaJoin(dangling_instances.get(node.name,
1629 for node in dangling_nodes]
1631 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632 "the following nodes (and their instances) belong to a non"
1633 " existing group: %s", utils.CommaJoin(pretty_dangling))
1635 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636 "the following instances have a non-existing primary-node:"
1637 " %s", utils.CommaJoin(no_node_instances))
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643 """Verifies the status of a node group.
1646 HPATH = "cluster-verify"
1647 HTYPE = constants.HTYPE_CLUSTER
1650 _HOOKS_INDENT_RE = re.compile("^", re.M)
1652 class NodeImage(object):
1653 """A class representing the logical and physical status of a node.
1656 @ivar name: the node name to which this object refers
1657 @ivar volumes: a structure as returned from
1658 L{ganeti.backend.GetVolumeList} (runtime)
1659 @ivar instances: a list of running instances (runtime)
1660 @ivar pinst: list of configured primary instances (config)
1661 @ivar sinst: list of configured secondary instances (config)
1662 @ivar sbp: dictionary of {primary-node: list of instances} for all
1663 instances for which this node is secondary (config)
1664 @ivar mfree: free memory, as reported by hypervisor (runtime)
1665 @ivar dfree: free disk, as reported by the node (runtime)
1666 @ivar offline: the offline status (config)
1667 @type rpc_fail: boolean
1668 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669 not whether the individual keys were correct) (runtime)
1670 @type lvm_fail: boolean
1671 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672 @type hyp_fail: boolean
1673 @ivar hyp_fail: whether the RPC call didn't return the instance list
1674 @type ghost: boolean
1675 @ivar ghost: whether this is a known node or not (config)
1676 @type os_fail: boolean
1677 @ivar os_fail: whether the RPC call didn't return valid OS data
1679 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680 @type vm_capable: boolean
1681 @ivar vm_capable: whether the node can host instances
1684 def __init__(self, offline=False, name=None, vm_capable=True):
1693 self.offline = offline
1694 self.vm_capable = vm_capable
1695 self.rpc_fail = False
1696 self.lvm_fail = False
1697 self.hyp_fail = False
1699 self.os_fail = False
1702 def ExpandNames(self):
1703 # This raises errors.OpPrereqError on its own:
1704 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1706 # Get instances in node group; this is unsafe and needs verification later
1708 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1710 self.needed_locks = {
1711 locking.LEVEL_INSTANCE: inst_names,
1712 locking.LEVEL_NODEGROUP: [self.group_uuid],
1713 locking.LEVEL_NODE: [],
1716 self.share_locks = _ShareAll()
1718 def DeclareLocks(self, level):
1719 if level == locking.LEVEL_NODE:
1720 # Get members of node group; this is unsafe and needs verification later
1721 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1723 all_inst_info = self.cfg.GetAllInstancesInfo()
1725 # In Exec(), we warn about mirrored instances that have primary and
1726 # secondary living in separate node groups. To fully verify that
1727 # volumes for these instances are healthy, we will need to do an
1728 # extra call to their secondaries. We ensure here those nodes will
1730 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1731 # Important: access only the instances whose lock is owned
1732 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1733 nodes.update(all_inst_info[inst].secondary_nodes)
1735 self.needed_locks[locking.LEVEL_NODE] = nodes
1737 def CheckPrereq(self):
1738 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1739 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1741 group_nodes = set(self.group_info.members)
1743 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1746 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1748 unlocked_instances = \
1749 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1752 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1753 utils.CommaJoin(unlocked_nodes),
1756 if unlocked_instances:
1757 raise errors.OpPrereqError("Missing lock for instances: %s" %
1758 utils.CommaJoin(unlocked_instances),
1761 self.all_node_info = self.cfg.GetAllNodesInfo()
1762 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1764 self.my_node_names = utils.NiceSort(group_nodes)
1765 self.my_inst_names = utils.NiceSort(group_instances)
1767 self.my_node_info = dict((name, self.all_node_info[name])
1768 for name in self.my_node_names)
1770 self.my_inst_info = dict((name, self.all_inst_info[name])
1771 for name in self.my_inst_names)
1773 # We detect here the nodes that will need the extra RPC calls for verifying
1774 # split LV volumes; they should be locked.
1775 extra_lv_nodes = set()
1777 for inst in self.my_inst_info.values():
1778 if inst.disk_template in constants.DTS_INT_MIRROR:
1779 for nname in inst.all_nodes:
1780 if self.all_node_info[nname].group != self.group_uuid:
1781 extra_lv_nodes.add(nname)
1783 unlocked_lv_nodes = \
1784 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1786 if unlocked_lv_nodes:
1787 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1788 utils.CommaJoin(unlocked_lv_nodes),
1790 self.extra_lv_nodes = list(extra_lv_nodes)
1792 def _VerifyNode(self, ninfo, nresult):
1793 """Perform some basic validation on data returned from a node.
1795 - check the result data structure is well formed and has all the
1797 - check ganeti version
1799 @type ninfo: L{objects.Node}
1800 @param ninfo: the node to check
1801 @param nresult: the results from the node
1803 @return: whether overall this call was successful (and we can expect
1804 reasonable values in the respose)
1808 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1810 # main result, nresult should be a non-empty dict
1811 test = not nresult or not isinstance(nresult, dict)
1812 _ErrorIf(test, self.ENODERPC, node,
1813 "unable to verify node: no data returned")
1817 # compares ganeti version
1818 local_version = constants.PROTOCOL_VERSION
1819 remote_version = nresult.get("version", None)
1820 test = not (remote_version and
1821 isinstance(remote_version, (list, tuple)) and
1822 len(remote_version) == 2)
1823 _ErrorIf(test, self.ENODERPC, node,
1824 "connection to node returned invalid data")
1828 test = local_version != remote_version[0]
1829 _ErrorIf(test, self.ENODEVERSION, node,
1830 "incompatible protocol versions: master %s,"
1831 " node %s", local_version, remote_version[0])
1835 # node seems compatible, we can actually try to look into its results
1837 # full package version
1838 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1839 self.ENODEVERSION, node,
1840 "software version mismatch: master %s, node %s",
1841 constants.RELEASE_VERSION, remote_version[1],
1842 code=self.ETYPE_WARNING)
1844 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1845 if ninfo.vm_capable and isinstance(hyp_result, dict):
1846 for hv_name, hv_result in hyp_result.iteritems():
1847 test = hv_result is not None
1848 _ErrorIf(test, self.ENODEHV, node,
1849 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1851 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1852 if ninfo.vm_capable and isinstance(hvp_result, list):
1853 for item, hv_name, hv_result in hvp_result:
1854 _ErrorIf(True, self.ENODEHV, node,
1855 "hypervisor %s parameter verify failure (source %s): %s",
1856 hv_name, item, hv_result)
1858 test = nresult.get(constants.NV_NODESETUP,
1859 ["Missing NODESETUP results"])
1860 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1865 def _VerifyNodeTime(self, ninfo, nresult,
1866 nvinfo_starttime, nvinfo_endtime):
1867 """Check the node time.
1869 @type ninfo: L{objects.Node}
1870 @param ninfo: the node to check
1871 @param nresult: the remote results for the node
1872 @param nvinfo_starttime: the start time of the RPC call
1873 @param nvinfo_endtime: the end time of the RPC call
1877 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1879 ntime = nresult.get(constants.NV_TIME, None)
1881 ntime_merged = utils.MergeTime(ntime)
1882 except (ValueError, TypeError):
1883 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1886 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1887 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1888 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1889 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1893 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1894 "Node time diverges by at least %s from master node time",
1897 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1898 """Check the node LVM results.
1900 @type ninfo: L{objects.Node}
1901 @param ninfo: the node to check
1902 @param nresult: the remote results for the node
1903 @param vg_name: the configured VG name
1910 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1912 # checks vg existence and size > 20G
1913 vglist = nresult.get(constants.NV_VGLIST, None)
1915 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1917 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1918 constants.MIN_VG_SIZE)
1919 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1922 pvlist = nresult.get(constants.NV_PVLIST, None)
1923 test = pvlist is None
1924 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1926 # check that ':' is not present in PV names, since it's a
1927 # special character for lvcreate (denotes the range of PEs to
1929 for _, pvname, owner_vg in pvlist:
1930 test = ":" in pvname
1931 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1932 " '%s' of VG '%s'", pvname, owner_vg)
1934 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1935 """Check the node bridges.
1937 @type ninfo: L{objects.Node}
1938 @param ninfo: the node to check
1939 @param nresult: the remote results for the node
1940 @param bridges: the expected list of bridges
1947 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1949 missing = nresult.get(constants.NV_BRIDGES, None)
1950 test = not isinstance(missing, list)
1951 _ErrorIf(test, self.ENODENET, node,
1952 "did not return valid bridge information")
1954 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1955 utils.CommaJoin(sorted(missing)))
1957 def _VerifyNodeNetwork(self, ninfo, nresult):
1958 """Check the node network connectivity results.
1960 @type ninfo: L{objects.Node}
1961 @param ninfo: the node to check
1962 @param nresult: the remote results for the node
1966 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1968 test = constants.NV_NODELIST not in nresult
1969 _ErrorIf(test, self.ENODESSH, node,
1970 "node hasn't returned node ssh connectivity data")
1972 if nresult[constants.NV_NODELIST]:
1973 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1974 _ErrorIf(True, self.ENODESSH, node,
1975 "ssh communication with node '%s': %s", a_node, a_msg)
1977 test = constants.NV_NODENETTEST not in nresult
1978 _ErrorIf(test, self.ENODENET, node,
1979 "node hasn't returned node tcp connectivity data")
1981 if nresult[constants.NV_NODENETTEST]:
1982 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1984 _ErrorIf(True, self.ENODENET, node,
1985 "tcp communication with node '%s': %s",
1986 anode, nresult[constants.NV_NODENETTEST][anode])
1988 test = constants.NV_MASTERIP not in nresult
1989 _ErrorIf(test, self.ENODENET, node,
1990 "node hasn't returned node master IP reachability data")
1992 if not nresult[constants.NV_MASTERIP]:
1993 if node == self.master_node:
1994 msg = "the master node cannot reach the master IP (not configured?)"
1996 msg = "cannot reach the master IP"
1997 _ErrorIf(True, self.ENODENET, node, msg)
1999 def _VerifyInstance(self, instance, instanceconfig, node_image,
2001 """Verify an instance.
2003 This function checks to see if the required block devices are
2004 available on the instance's node.
2007 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2008 node_current = instanceconfig.primary_node
2010 node_vol_should = {}
2011 instanceconfig.MapLVsByNode(node_vol_should)
2013 for node in node_vol_should:
2014 n_img = node_image[node]
2015 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2016 # ignore missing volumes on offline or broken nodes
2018 for volume in node_vol_should[node]:
2019 test = volume not in n_img.volumes
2020 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2021 "volume %s missing on node %s", volume, node)
2023 if instanceconfig.admin_up:
2024 pri_img = node_image[node_current]
2025 test = instance not in pri_img.instances and not pri_img.offline
2026 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2027 "instance not running on its primary node %s",
2030 diskdata = [(nname, success, status, idx)
2031 for (nname, disks) in diskstatus.items()
2032 for idx, (success, status) in enumerate(disks)]
2034 for nname, success, bdev_status, idx in diskdata:
2035 # the 'ghost node' construction in Exec() ensures that we have a
2037 snode = node_image[nname]
2038 bad_snode = snode.ghost or snode.offline
2039 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2040 self.EINSTANCEFAULTYDISK, instance,
2041 "couldn't retrieve status for disk/%s on %s: %s",
2042 idx, nname, bdev_status)
2043 _ErrorIf((instanceconfig.admin_up and success and
2044 bdev_status.ldisk_status == constants.LDS_FAULTY),
2045 self.EINSTANCEFAULTYDISK, instance,
2046 "disk/%s on %s is faulty", idx, nname)
2048 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2049 """Verify if there are any unknown volumes in the cluster.
2051 The .os, .swap and backup volumes are ignored. All other volumes are
2052 reported as unknown.
2054 @type reserved: L{ganeti.utils.FieldSet}
2055 @param reserved: a FieldSet of reserved volume names
2058 for node, n_img in node_image.items():
2059 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2060 self.all_node_info[node].group != self.group_uuid):
2061 # skip non-healthy nodes
2063 for volume in n_img.volumes:
2064 test = ((node not in node_vol_should or
2065 volume not in node_vol_should[node]) and
2066 not reserved.Matches(volume))
2067 self._ErrorIf(test, self.ENODEORPHANLV, node,
2068 "volume %s is unknown", volume)
2070 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2071 """Verify N+1 Memory Resilience.
2073 Check that if one single node dies we can still start all the
2074 instances it was primary for.
2077 cluster_info = self.cfg.GetClusterInfo()
2078 for node, n_img in node_image.items():
2079 # This code checks that every node which is now listed as
2080 # secondary has enough memory to host all instances it is
2081 # supposed to should a single other node in the cluster fail.
2082 # FIXME: not ready for failover to an arbitrary node
2083 # FIXME: does not support file-backed instances
2084 # WARNING: we currently take into account down instances as well
2085 # as up ones, considering that even if they're down someone
2086 # might want to start them even in the event of a node failure.
2087 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2088 # we're skipping nodes marked offline and nodes in other groups from
2089 # the N+1 warning, since most likely we don't have good memory
2090 # infromation from them; we already list instances living on such
2091 # nodes, and that's enough warning
2093 for prinode, instances in n_img.sbp.items():
2095 for instance in instances:
2096 bep = cluster_info.FillBE(instance_cfg[instance])
2097 if bep[constants.BE_AUTO_BALANCE]:
2098 needed_mem += bep[constants.BE_MEMORY]
2099 test = n_img.mfree < needed_mem
2100 self._ErrorIf(test, self.ENODEN1, node,
2101 "not enough memory to accomodate instance failovers"
2102 " should node %s fail (%dMiB needed, %dMiB available)",
2103 prinode, needed_mem, n_img.mfree)
2106 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2107 (files_all, files_all_opt, files_mc, files_vm)):
2108 """Verifies file checksums collected from all nodes.
2110 @param errorif: Callback for reporting errors
2111 @param nodeinfo: List of L{objects.Node} objects
2112 @param master_node: Name of master node
2113 @param all_nvinfo: RPC results
2116 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2117 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2118 "Found file listed in more than one file list"
2120 # Define functions determining which nodes to consider for a file
2123 (files_all_opt, None),
2124 (files_mc, lambda node: (node.master_candidate or
2125 node.name == master_node)),
2126 (files_vm, lambda node: node.vm_capable),
2129 # Build mapping from filename to list of nodes which should have the file
2131 for (files, fn) in files2nodefn:
2133 filenodes = nodeinfo
2135 filenodes = filter(fn, nodeinfo)
2136 nodefiles.update((filename,
2137 frozenset(map(operator.attrgetter("name"), filenodes)))
2138 for filename in files)
2140 assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2142 fileinfo = dict((filename, {}) for filename in nodefiles)
2143 ignore_nodes = set()
2145 for node in nodeinfo:
2147 ignore_nodes.add(node.name)
2150 nresult = all_nvinfo[node.name]
2152 if nresult.fail_msg or not nresult.payload:
2155 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2157 test = not (node_files and isinstance(node_files, dict))
2158 errorif(test, cls.ENODEFILECHECK, node.name,
2159 "Node did not return file checksum data")
2161 ignore_nodes.add(node.name)
2164 # Build per-checksum mapping from filename to nodes having it
2165 for (filename, checksum) in node_files.items():
2166 assert filename in nodefiles
2167 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2169 for (filename, checksums) in fileinfo.items():
2170 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2172 # Nodes having the file
2173 with_file = frozenset(node_name
2174 for nodes in fileinfo[filename].values()
2175 for node_name in nodes) - ignore_nodes
2177 expected_nodes = nodefiles[filename] - ignore_nodes
2179 # Nodes missing file
2180 missing_file = expected_nodes - with_file
2182 if filename in files_all_opt:
2184 errorif(missing_file and missing_file != expected_nodes,
2185 cls.ECLUSTERFILECHECK, None,
2186 "File %s is optional, but it must exist on all or no"
2187 " nodes (not found on %s)",
2188 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2190 # Non-optional files
2191 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2192 "File %s is missing from node(s) %s", filename,
2193 utils.CommaJoin(utils.NiceSort(missing_file)))
2195 # Warn if a node has a file it shouldn't
2196 unexpected = with_file - expected_nodes
2198 cls.ECLUSTERFILECHECK, None,
2199 "File %s should not exist on node(s) %s",
2200 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2202 # See if there are multiple versions of the file
2203 test = len(checksums) > 1
2205 variants = ["variant %s on %s" %
2206 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2207 for (idx, (checksum, nodes)) in
2208 enumerate(sorted(checksums.items()))]
2212 errorif(test, cls.ECLUSTERFILECHECK, None,
2213 "File %s found with %s different checksums (%s)",
2214 filename, len(checksums), "; ".join(variants))
2216 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2218 """Verifies and the node DRBD status.
2220 @type ninfo: L{objects.Node}
2221 @param ninfo: the node to check
2222 @param nresult: the remote results for the node
2223 @param instanceinfo: the dict of instances
2224 @param drbd_helper: the configured DRBD usermode helper
2225 @param drbd_map: the DRBD map as returned by
2226 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2230 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2233 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2234 test = (helper_result == None)
2235 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236 "no drbd usermode helper returned")
2238 status, payload = helper_result
2240 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2241 "drbd usermode helper check unsuccessful: %s", payload)
2242 test = status and (payload != drbd_helper)
2243 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2244 "wrong drbd usermode helper: %s", payload)
2246 # compute the DRBD minors
2248 for minor, instance in drbd_map[node].items():
2249 test = instance not in instanceinfo
2250 _ErrorIf(test, self.ECLUSTERCFG, None,
2251 "ghost instance '%s' in temporary DRBD map", instance)
2252 # ghost instance should not be running, but otherwise we
2253 # don't give double warnings (both ghost instance and
2254 # unallocated minor in use)
2256 node_drbd[minor] = (instance, False)
2258 instance = instanceinfo[instance]
2259 node_drbd[minor] = (instance.name, instance.admin_up)
2261 # and now check them
2262 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2263 test = not isinstance(used_minors, (tuple, list))
2264 _ErrorIf(test, self.ENODEDRBD, node,
2265 "cannot parse drbd status file: %s", str(used_minors))
2267 # we cannot check drbd status
2270 for minor, (iname, must_exist) in node_drbd.items():
2271 test = minor not in used_minors and must_exist
2272 _ErrorIf(test, self.ENODEDRBD, node,
2273 "drbd minor %d of instance %s is not active", minor, iname)
2274 for minor in used_minors:
2275 test = minor not in node_drbd
2276 _ErrorIf(test, self.ENODEDRBD, node,
2277 "unallocated drbd minor %d is in use", minor)
2279 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2280 """Builds the node OS structures.
2282 @type ninfo: L{objects.Node}
2283 @param ninfo: the node to check
2284 @param nresult: the remote results for the node
2285 @param nimg: the node image object
2289 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2291 remote_os = nresult.get(constants.NV_OSLIST, None)
2292 test = (not isinstance(remote_os, list) or
2293 not compat.all(isinstance(v, list) and len(v) == 7
2294 for v in remote_os))
2296 _ErrorIf(test, self.ENODEOS, node,
2297 "node hasn't returned valid OS data")
2306 for (name, os_path, status, diagnose,
2307 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2309 if name not in os_dict:
2312 # parameters is a list of lists instead of list of tuples due to
2313 # JSON lacking a real tuple type, fix it:
2314 parameters = [tuple(v) for v in parameters]
2315 os_dict[name].append((os_path, status, diagnose,
2316 set(variants), set(parameters), set(api_ver)))
2318 nimg.oslist = os_dict
2320 def _VerifyNodeOS(self, ninfo, nimg, base):
2321 """Verifies the node OS list.
2323 @type ninfo: L{objects.Node}
2324 @param ninfo: the node to check
2325 @param nimg: the node image object
2326 @param base: the 'template' node we match against (e.g. from the master)
2330 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2332 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2334 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2335 for os_name, os_data in nimg.oslist.items():
2336 assert os_data, "Empty OS status for OS %s?!" % os_name
2337 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2338 _ErrorIf(not f_status, self.ENODEOS, node,
2339 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2340 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2341 "OS '%s' has multiple entries (first one shadows the rest): %s",
2342 os_name, utils.CommaJoin([v[0] for v in os_data]))
2343 # comparisons with the 'base' image
2344 test = os_name not in base.oslist
2345 _ErrorIf(test, self.ENODEOS, node,
2346 "Extra OS %s not present on reference node (%s)",
2350 assert base.oslist[os_name], "Base node has empty OS status?"
2351 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2353 # base OS is invalid, skipping
2355 for kind, a, b in [("API version", f_api, b_api),
2356 ("variants list", f_var, b_var),
2357 ("parameters", beautify_params(f_param),
2358 beautify_params(b_param))]:
2359 _ErrorIf(a != b, self.ENODEOS, node,
2360 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2361 kind, os_name, base.name,
2362 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2364 # check any missing OSes
2365 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2366 _ErrorIf(missing, self.ENODEOS, node,
2367 "OSes present on reference node %s but missing on this node: %s",
2368 base.name, utils.CommaJoin(missing))
2370 def _VerifyOob(self, ninfo, nresult):
2371 """Verifies out of band functionality of a node.
2373 @type ninfo: L{objects.Node}
2374 @param ninfo: the node to check
2375 @param nresult: the remote results for the node
2379 # We just have to verify the paths on master and/or master candidates
2380 # as the oob helper is invoked on the master
2381 if ((ninfo.master_candidate or ninfo.master_capable) and
2382 constants.NV_OOB_PATHS in nresult):
2383 for path_result in nresult[constants.NV_OOB_PATHS]:
2384 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2386 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2387 """Verifies and updates the node volume data.
2389 This function will update a L{NodeImage}'s internal structures
2390 with data from the remote call.
2392 @type ninfo: L{objects.Node}
2393 @param ninfo: the node to check
2394 @param nresult: the remote results for the node
2395 @param nimg: the node image object
2396 @param vg_name: the configured VG name
2400 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402 nimg.lvm_fail = True
2403 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2406 elif isinstance(lvdata, basestring):
2407 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2408 utils.SafeEncode(lvdata))
2409 elif not isinstance(lvdata, dict):
2410 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2412 nimg.volumes = lvdata
2413 nimg.lvm_fail = False
2415 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2416 """Verifies and updates the node instance list.
2418 If the listing was successful, then updates this node's instance
2419 list. Otherwise, it marks the RPC call as failed for the instance
2422 @type ninfo: L{objects.Node}
2423 @param ninfo: the node to check
2424 @param nresult: the remote results for the node
2425 @param nimg: the node image object
2428 idata = nresult.get(constants.NV_INSTANCELIST, None)
2429 test = not isinstance(idata, list)
2430 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2431 " (instancelist): %s", utils.SafeEncode(str(idata)))
2433 nimg.hyp_fail = True
2435 nimg.instances = idata
2437 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2438 """Verifies and computes a node information map
2440 @type ninfo: L{objects.Node}
2441 @param ninfo: the node to check
2442 @param nresult: the remote results for the node
2443 @param nimg: the node image object
2444 @param vg_name: the configured VG name
2448 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2450 # try to read free memory (from the hypervisor)
2451 hv_info = nresult.get(constants.NV_HVINFO, None)
2452 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2453 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2456 nimg.mfree = int(hv_info["memory_free"])
2457 except (ValueError, TypeError):
2458 _ErrorIf(True, self.ENODERPC, node,
2459 "node returned invalid nodeinfo, check hypervisor")
2461 # FIXME: devise a free space model for file based instances as well
2462 if vg_name is not None:
2463 test = (constants.NV_VGLIST not in nresult or
2464 vg_name not in nresult[constants.NV_VGLIST])
2465 _ErrorIf(test, self.ENODELVM, node,
2466 "node didn't return data for the volume group '%s'"
2467 " - it is either missing or broken", vg_name)
2470 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2471 except (ValueError, TypeError):
2472 _ErrorIf(True, self.ENODERPC, node,
2473 "node returned invalid LVM info, check LVM status")
2475 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2476 """Gets per-disk status information for all instances.
2478 @type nodelist: list of strings
2479 @param nodelist: Node names
2480 @type node_image: dict of (name, L{objects.Node})
2481 @param node_image: Node objects
2482 @type instanceinfo: dict of (name, L{objects.Instance})
2483 @param instanceinfo: Instance objects
2484 @rtype: {instance: {node: [(succes, payload)]}}
2485 @return: a dictionary of per-instance dictionaries with nodes as
2486 keys and disk information as values; the disk information is a
2487 list of tuples (success, payload)
2490 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2493 node_disks_devonly = {}
2494 diskless_instances = set()
2495 diskless = constants.DT_DISKLESS
2497 for nname in nodelist:
2498 node_instances = list(itertools.chain(node_image[nname].pinst,
2499 node_image[nname].sinst))
2500 diskless_instances.update(inst for inst in node_instances
2501 if instanceinfo[inst].disk_template == diskless)
2502 disks = [(inst, disk)
2503 for inst in node_instances
2504 for disk in instanceinfo[inst].disks]
2507 # No need to collect data
2510 node_disks[nname] = disks
2512 # Creating copies as SetDiskID below will modify the objects and that can
2513 # lead to incorrect data returned from nodes
2514 devonly = [dev.Copy() for (_, dev) in disks]
2517 self.cfg.SetDiskID(dev, nname)
2519 node_disks_devonly[nname] = devonly
2521 assert len(node_disks) == len(node_disks_devonly)
2523 # Collect data from all nodes with disks
2524 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2527 assert len(result) == len(node_disks)
2531 for (nname, nres) in result.items():
2532 disks = node_disks[nname]
2535 # No data from this node
2536 data = len(disks) * [(False, "node offline")]
2539 _ErrorIf(msg, self.ENODERPC, nname,
2540 "while getting disk information: %s", msg)
2542 # No data from this node
2543 data = len(disks) * [(False, msg)]
2546 for idx, i in enumerate(nres.payload):
2547 if isinstance(i, (tuple, list)) and len(i) == 2:
2550 logging.warning("Invalid result from node %s, entry %d: %s",
2552 data.append((False, "Invalid result from the remote node"))
2554 for ((inst, _), status) in zip(disks, data):
2555 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2557 # Add empty entries for diskless instances.
2558 for inst in diskless_instances:
2559 assert inst not in instdisk
2562 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2563 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2564 compat.all(isinstance(s, (tuple, list)) and
2565 len(s) == 2 for s in statuses)
2566 for inst, nnames in instdisk.items()
2567 for nname, statuses in nnames.items())
2568 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573 def _SshNodeSelector(group_uuid, all_nodes):
2574 """Create endless iterators for all potential SSH check hosts.
2577 nodes = [node for node in all_nodes
2578 if (node.group != group_uuid and
2580 keyfunc = operator.attrgetter("group")
2582 return map(itertools.cycle,
2583 [sorted(map(operator.attrgetter("name"), names))
2584 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2588 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2589 """Choose which nodes should talk to which other nodes.
2591 We will make nodes contact all nodes in their group, and one node from
2594 @warning: This algorithm has a known issue if one node group is much
2595 smaller than others (e.g. just one node). In such a case all other
2596 nodes will talk to the single node.
2599 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2600 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2602 return (online_nodes,
2603 dict((name, sorted([i.next() for i in sel]))
2604 for name in online_nodes))
2606 def BuildHooksEnv(self):
2609 Cluster-Verify hooks just ran in the post phase and their failure makes
2610 the output be logged in the verify output and the verification to fail.
2614 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2617 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2618 for node in self.my_node_info.values())
2622 def BuildHooksNodes(self):
2623 """Build hooks nodes.
2626 return ([], self.my_node_names)
2628 def Exec(self, feedback_fn):
2629 """Verify integrity of the node group, performing various test on nodes.
2632 # This method has too many local variables. pylint: disable=R0914
2633 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2635 if not self.my_node_names:
2637 feedback_fn("* Empty node group, skipping verification")
2641 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2642 verbose = self.op.verbose
2643 self._feedback_fn = feedback_fn
2645 vg_name = self.cfg.GetVGName()
2646 drbd_helper = self.cfg.GetDRBDHelper()
2647 cluster = self.cfg.GetClusterInfo()
2648 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2649 hypervisors = cluster.enabled_hypervisors
2650 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2652 i_non_redundant = [] # Non redundant instances
2653 i_non_a_balanced = [] # Non auto-balanced instances
2654 n_offline = 0 # Count of offline nodes
2655 n_drained = 0 # Count of nodes being drained
2656 node_vol_should = {}
2658 # FIXME: verify OS list
2661 filemap = _ComputeAncillaryFiles(cluster, False)
2663 # do local checksums
2664 master_node = self.master_node = self.cfg.GetMasterNode()
2665 master_ip = self.cfg.GetMasterIP()
2667 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2669 node_verify_param = {
2670 constants.NV_FILELIST:
2671 utils.UniqueSequence(filename
2672 for files in filemap
2673 for filename in files),
2674 constants.NV_NODELIST:
2675 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2676 self.all_node_info.values()),
2677 constants.NV_HYPERVISOR: hypervisors,
2678 constants.NV_HVPARAMS:
2679 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2680 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2681 for node in node_data_list
2682 if not node.offline],
2683 constants.NV_INSTANCELIST: hypervisors,
2684 constants.NV_VERSION: None,
2685 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2686 constants.NV_NODESETUP: None,
2687 constants.NV_TIME: None,
2688 constants.NV_MASTERIP: (master_node, master_ip),
2689 constants.NV_OSLIST: None,
2690 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2693 if vg_name is not None:
2694 node_verify_param[constants.NV_VGLIST] = None
2695 node_verify_param[constants.NV_LVLIST] = vg_name
2696 node_verify_param[constants.NV_PVLIST] = [vg_name]
2697 node_verify_param[constants.NV_DRBDLIST] = None
2700 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2703 # FIXME: this needs to be changed per node-group, not cluster-wide
2705 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2706 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707 bridges.add(default_nicpp[constants.NIC_LINK])
2708 for instance in self.my_inst_info.values():
2709 for nic in instance.nics:
2710 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2711 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2712 bridges.add(full_nic[constants.NIC_LINK])
2715 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2717 # Build our expected cluster state
2718 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2720 vm_capable=node.vm_capable))
2721 for node in node_data_list)
2725 for node in self.all_node_info.values():
2726 path = _SupportsOob(self.cfg, node)
2727 if path and path not in oob_paths:
2728 oob_paths.append(path)
2731 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2733 for instance in self.my_inst_names:
2734 inst_config = self.my_inst_info[instance]
2736 for nname in inst_config.all_nodes:
2737 if nname not in node_image:
2738 gnode = self.NodeImage(name=nname)
2739 gnode.ghost = (nname not in self.all_node_info)
2740 node_image[nname] = gnode
2742 inst_config.MapLVsByNode(node_vol_should)
2744 pnode = inst_config.primary_node
2745 node_image[pnode].pinst.append(instance)
2747 for snode in inst_config.secondary_nodes:
2748 nimg = node_image[snode]
2749 nimg.sinst.append(instance)
2750 if pnode not in nimg.sbp:
2751 nimg.sbp[pnode] = []
2752 nimg.sbp[pnode].append(instance)
2754 # At this point, we have the in-memory data structures complete,
2755 # except for the runtime information, which we'll gather next
2757 # Due to the way our RPC system works, exact response times cannot be
2758 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2759 # time before and after executing the request, we can at least have a time
2761 nvinfo_starttime = time.time()
2762 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2764 self.cfg.GetClusterName())
2765 nvinfo_endtime = time.time()
2767 if self.extra_lv_nodes and vg_name is not None:
2769 self.rpc.call_node_verify(self.extra_lv_nodes,
2770 {constants.NV_LVLIST: vg_name},
2771 self.cfg.GetClusterName())
2773 extra_lv_nvinfo = {}
2775 all_drbd_map = self.cfg.ComputeDRBDMap()
2777 feedback_fn("* Gathering disk information (%s nodes)" %
2778 len(self.my_node_names))
2779 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2782 feedback_fn("* Verifying configuration file consistency")
2784 # If not all nodes are being checked, we need to make sure the master node
2785 # and a non-checked vm_capable node are in the list.
2786 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2788 vf_nvinfo = all_nvinfo.copy()
2789 vf_node_info = list(self.my_node_info.values())
2790 additional_nodes = []
2791 if master_node not in self.my_node_info:
2792 additional_nodes.append(master_node)
2793 vf_node_info.append(self.all_node_info[master_node])
2794 # Add the first vm_capable node we find which is not included
2795 for node in absent_nodes:
2796 nodeinfo = self.all_node_info[node]
2797 if nodeinfo.vm_capable and not nodeinfo.offline:
2798 additional_nodes.append(node)
2799 vf_node_info.append(self.all_node_info[node])
2801 key = constants.NV_FILELIST
2802 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2803 {key: node_verify_param[key]},
2804 self.cfg.GetClusterName()))
2806 vf_nvinfo = all_nvinfo
2807 vf_node_info = self.my_node_info.values()
2809 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2811 feedback_fn("* Verifying node status")
2815 for node_i in node_data_list:
2817 nimg = node_image[node]
2821 feedback_fn("* Skipping offline node %s" % (node,))
2825 if node == master_node:
2827 elif node_i.master_candidate:
2828 ntype = "master candidate"
2829 elif node_i.drained:
2835 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2837 msg = all_nvinfo[node].fail_msg
2838 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2840 nimg.rpc_fail = True
2843 nresult = all_nvinfo[node].payload
2845 nimg.call_ok = self._VerifyNode(node_i, nresult)
2846 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2847 self._VerifyNodeNetwork(node_i, nresult)
2848 self._VerifyOob(node_i, nresult)
2851 self._VerifyNodeLVM(node_i, nresult, vg_name)
2852 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2855 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2856 self._UpdateNodeInstances(node_i, nresult, nimg)
2857 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2858 self._UpdateNodeOS(node_i, nresult, nimg)
2860 if not nimg.os_fail:
2861 if refos_img is None:
2863 self._VerifyNodeOS(node_i, nimg, refos_img)
2864 self._VerifyNodeBridges(node_i, nresult, bridges)
2866 # Check whether all running instancies are primary for the node. (This
2867 # can no longer be done from _VerifyInstance below, since some of the
2868 # wrong instances could be from other node groups.)
2869 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2871 for inst in non_primary_inst:
2872 test = inst in self.all_inst_info
2873 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2874 "instance should not run on node %s", node_i.name)
2875 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2876 "node is running unknown instance %s", inst)
2878 for node, result in extra_lv_nvinfo.items():
2879 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2880 node_image[node], vg_name)
2882 feedback_fn("* Verifying instance status")
2883 for instance in self.my_inst_names:
2885 feedback_fn("* Verifying instance %s" % instance)
2886 inst_config = self.my_inst_info[instance]
2887 self._VerifyInstance(instance, inst_config, node_image,
2889 inst_nodes_offline = []
2891 pnode = inst_config.primary_node
2892 pnode_img = node_image[pnode]
2893 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2894 self.ENODERPC, pnode, "instance %s, connection to"
2895 " primary node failed", instance)
2897 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2898 self.EINSTANCEBADNODE, instance,
2899 "instance is marked as running and lives on offline node %s",
2900 inst_config.primary_node)
2902 # If the instance is non-redundant we cannot survive losing its primary
2903 # node, so we are not N+1 compliant. On the other hand we have no disk
2904 # templates with more than one secondary so that situation is not well
2906 # FIXME: does not support file-backed instances
2907 if not inst_config.secondary_nodes:
2908 i_non_redundant.append(instance)
2910 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2911 instance, "instance has multiple secondary nodes: %s",
2912 utils.CommaJoin(inst_config.secondary_nodes),
2913 code=self.ETYPE_WARNING)
2915 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2916 pnode = inst_config.primary_node
2917 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2918 instance_groups = {}
2920 for node in instance_nodes:
2921 instance_groups.setdefault(self.all_node_info[node].group,
2925 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2926 # Sort so that we always list the primary node first.
2927 for group, nodes in sorted(instance_groups.items(),
2928 key=lambda (_, nodes): pnode in nodes,
2931 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2932 instance, "instance has primary and secondary nodes in"
2933 " different groups: %s", utils.CommaJoin(pretty_list),
2934 code=self.ETYPE_WARNING)
2936 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2937 i_non_a_balanced.append(instance)
2939 for snode in inst_config.secondary_nodes:
2940 s_img = node_image[snode]
2941 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2942 "instance %s, connection to secondary node failed", instance)
2945 inst_nodes_offline.append(snode)
2947 # warn that the instance lives on offline nodes
2948 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2949 "instance has offline secondary node(s) %s",
2950 utils.CommaJoin(inst_nodes_offline))
2951 # ... or ghost/non-vm_capable nodes
2952 for node in inst_config.all_nodes:
2953 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2954 "instance lives on ghost node %s", node)
2955 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2956 instance, "instance lives on non-vm_capable node %s", node)
2958 feedback_fn("* Verifying orphan volumes")
2959 reserved = utils.FieldSet(*cluster.reserved_lvs)
2961 # We will get spurious "unknown volume" warnings if any node of this group
2962 # is secondary for an instance whose primary is in another group. To avoid
2963 # them, we find these instances and add their volumes to node_vol_should.
2964 for inst in self.all_inst_info.values():
2965 for secondary in inst.secondary_nodes:
2966 if (secondary in self.my_node_info
2967 and inst.name not in self.my_inst_info):
2968 inst.MapLVsByNode(node_vol_should)
2971 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2973 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2974 feedback_fn("* Verifying N+1 Memory redundancy")
2975 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2977 feedback_fn("* Other Notes")
2979 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2980 % len(i_non_redundant))
2982 if i_non_a_balanced:
2983 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2984 % len(i_non_a_balanced))
2987 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2990 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2994 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2995 """Analyze the post-hooks' result
2997 This method analyses the hook result, handles it, and sends some
2998 nicely-formatted feedback back to the user.
3000 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3001 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3002 @param hooks_results: the results of the multi-node hooks rpc call
3003 @param feedback_fn: function used send feedback back to the caller
3004 @param lu_result: previous Exec result
3005 @return: the new Exec result, based on the previous result
3009 # We only really run POST phase hooks, only for non-empty groups,
3010 # and are only interested in their results
3011 if not self.my_node_names:
3014 elif phase == constants.HOOKS_PHASE_POST:
3015 # Used to change hooks' output to proper indentation
3016 feedback_fn("* Hooks Results")
3017 assert hooks_results, "invalid result from hooks"
3019 for node_name in hooks_results:
3020 res = hooks_results[node_name]
3022 test = msg and not res.offline
3023 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3024 "Communication failure in hooks execution: %s", msg)
3025 if res.offline or msg:
3026 # No need to investigate payload if node is offline or gave
3029 for script, hkr, output in res.payload:
3030 test = hkr == constants.HKR_FAIL
3031 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3032 "Script %s failed, output:", script)
3034 output = self._HOOKS_INDENT_RE.sub(" ", output)
3035 feedback_fn("%s" % output)
3041 class LUClusterVerifyDisks(NoHooksLU):
3042 """Verifies the cluster disks status.
3047 def ExpandNames(self):
3048 self.share_locks = _ShareAll()
3049 self.needed_locks = {
3050 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3053 def Exec(self, feedback_fn):
3054 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3056 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3057 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3058 for group in group_names])
3061 class LUGroupVerifyDisks(NoHooksLU):
3062 """Verifies the status of all disks in a node group.
3067 def ExpandNames(self):
3068 # Raises errors.OpPrereqError on its own if group can't be found
3069 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3071 self.share_locks = _ShareAll()
3072 self.needed_locks = {
3073 locking.LEVEL_INSTANCE: [],
3074 locking.LEVEL_NODEGROUP: [],
3075 locking.LEVEL_NODE: [],
3078 def DeclareLocks(self, level):
3079 if level == locking.LEVEL_INSTANCE:
3080 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3082 # Lock instances optimistically, needs verification once node and group
3083 # locks have been acquired
3084 self.needed_locks[locking.LEVEL_INSTANCE] = \
3085 self.cfg.GetNodeGroupInstances(self.group_uuid)
3087 elif level == locking.LEVEL_NODEGROUP:
3088 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3090 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3091 set([self.group_uuid] +
3092 # Lock all groups used by instances optimistically; this requires
3093 # going via the node before it's locked, requiring verification
3096 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3097 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3099 elif level == locking.LEVEL_NODE:
3100 # This will only lock the nodes in the group to be verified which contain
3102 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3103 self._LockInstancesNodes()
3105 # Lock all nodes in group to be verified
3106 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3107 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3108 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3110 def CheckPrereq(self):
3111 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3112 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3113 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3115 assert self.group_uuid in owned_groups
3117 # Check if locked instances are still correct
3118 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3120 # Get instance information
3121 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3123 # Check if node groups for locked instances are still correct
3124 for (instance_name, inst) in self.instances.items():
3125 assert owned_nodes.issuperset(inst.all_nodes), \
3126 "Instance %s's nodes changed while we kept the lock" % instance_name
3128 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3131 assert self.group_uuid in inst_groups, \
3132 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3134 def Exec(self, feedback_fn):
3135 """Verify integrity of cluster disks.
3137 @rtype: tuple of three items
3138 @return: a tuple of (dict of node-to-node_error, list of instances
3139 which need activate-disks, dict of instance: (node, volume) for
3144 res_instances = set()
3147 nv_dict = _MapInstanceDisksToNodes([inst
3148 for inst in self.instances.values()
3152 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3153 set(self.cfg.GetVmCapableNodeList()))
3155 node_lvs = self.rpc.call_lv_list(nodes, [])
3157 for (node, node_res) in node_lvs.items():
3158 if node_res.offline:
3161 msg = node_res.fail_msg
3163 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3164 res_nodes[node] = msg
3167 for lv_name, (_, _, lv_online) in node_res.payload.items():
3168 inst = nv_dict.pop((node, lv_name), None)
3169 if not (lv_online or inst is None):
3170 res_instances.add(inst)
3172 # any leftover items in nv_dict are missing LVs, let's arrange the data
3174 for key, inst in nv_dict.iteritems():
3175 res_missing.setdefault(inst, []).append(list(key))
3177 return (res_nodes, list(res_instances), res_missing)
3180 class LUClusterRepairDiskSizes(NoHooksLU):
3181 """Verifies the cluster disks sizes.
3186 def ExpandNames(self):
3187 if self.op.instances:
3188 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3189 self.needed_locks = {
3190 locking.LEVEL_NODE: [],
3191 locking.LEVEL_INSTANCE: self.wanted_names,
3193 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3195 self.wanted_names = None
3196 self.needed_locks = {
3197 locking.LEVEL_NODE: locking.ALL_SET,
3198 locking.LEVEL_INSTANCE: locking.ALL_SET,
3200 self.share_locks = {
3201 locking.LEVEL_NODE: 1,
3202 locking.LEVEL_INSTANCE: 0,
3205 def DeclareLocks(self, level):
3206 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3207 self._LockInstancesNodes(primary_only=True)
3209 def CheckPrereq(self):
3210 """Check prerequisites.
3212 This only checks the optional instance list against the existing names.
3215 if self.wanted_names is None:
3216 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3218 self.wanted_instances = \
3219 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3221 def _EnsureChildSizes(self, disk):
3222 """Ensure children of the disk have the needed disk size.
3224 This is valid mainly for DRBD8 and fixes an issue where the
3225 children have smaller disk size.
3227 @param disk: an L{ganeti.objects.Disk} object
3230 if disk.dev_type == constants.LD_DRBD8:
3231 assert disk.children, "Empty children for DRBD8?"
3232 fchild = disk.children[0]
3233 mismatch = fchild.size < disk.size
3235 self.LogInfo("Child disk has size %d, parent %d, fixing",
3236 fchild.size, disk.size)
3237 fchild.size = disk.size
3239 # and we recurse on this child only, not on the metadev
3240 return self._EnsureChildSizes(fchild) or mismatch
3244 def Exec(self, feedback_fn):
3245 """Verify the size of cluster disks.
3248 # TODO: check child disks too
3249 # TODO: check differences in size between primary/secondary nodes
3251 for instance in self.wanted_instances:
3252 pnode = instance.primary_node
3253 if pnode not in per_node_disks:
3254 per_node_disks[pnode] = []
3255 for idx, disk in enumerate(instance.disks):
3256 per_node_disks[pnode].append((instance, idx, disk))
3259 for node, dskl in per_node_disks.items():
3260 newl = [v[2].Copy() for v in dskl]
3262 self.cfg.SetDiskID(dsk, node)
3263 result = self.rpc.call_blockdev_getsize(node, newl)
3265 self.LogWarning("Failure in blockdev_getsize call to node"
3266 " %s, ignoring", node)
3268 if len(result.payload) != len(dskl):
3269 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3270 " result.payload=%s", node, len(dskl), result.payload)
3271 self.LogWarning("Invalid result from node %s, ignoring node results",
3274 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3276 self.LogWarning("Disk %d of instance %s did not return size"
3277 " information, ignoring", idx, instance.name)
3279 if not isinstance(size, (int, long)):
3280 self.LogWarning("Disk %d of instance %s did not return valid"
3281 " size information, ignoring", idx, instance.name)
3284 if size != disk.size:
3285 self.LogInfo("Disk %d of instance %s has mismatched size,"
3286 " correcting: recorded %d, actual %d", idx,
3287 instance.name, disk.size, size)
3289 self.cfg.Update(instance, feedback_fn)
3290 changed.append((instance.name, idx, size))
3291 if self._EnsureChildSizes(disk):
3292 self.cfg.Update(instance, feedback_fn)
3293 changed.append((instance.name, idx, disk.size))
3297 class LUClusterRename(LogicalUnit):
3298 """Rename the cluster.
3301 HPATH = "cluster-rename"
3302 HTYPE = constants.HTYPE_CLUSTER
3304 def BuildHooksEnv(self):
3309 "OP_TARGET": self.cfg.GetClusterName(),
3310 "NEW_NAME": self.op.name,
3313 def BuildHooksNodes(self):
3314 """Build hooks nodes.
3317 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3319 def CheckPrereq(self):
3320 """Verify that the passed name is a valid one.
3323 hostname = netutils.GetHostname(name=self.op.name,
3324 family=self.cfg.GetPrimaryIPFamily())
3326 new_name = hostname.name
3327 self.ip = new_ip = hostname.ip
3328 old_name = self.cfg.GetClusterName()
3329 old_ip = self.cfg.GetMasterIP()
3330 if new_name == old_name and new_ip == old_ip:
3331 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3332 " cluster has changed",
3334 if new_ip != old_ip:
3335 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3336 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3337 " reachable on the network" %
3338 new_ip, errors.ECODE_NOTUNIQUE)
3340 self.op.name = new_name
3342 def Exec(self, feedback_fn):
3343 """Rename the cluster.
3346 clustername = self.op.name
3349 # shutdown the master IP
3350 master = self.cfg.GetMasterNode()
3351 result = self.rpc.call_node_stop_master(master, False)
3352 result.Raise("Could not disable the master role")
3355 cluster = self.cfg.GetClusterInfo()
3356 cluster.cluster_name = clustername
3357 cluster.master_ip = ip
3358 self.cfg.Update(cluster, feedback_fn)
3360 # update the known hosts file
3361 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3362 node_list = self.cfg.GetOnlineNodeList()
3364 node_list.remove(master)
3367 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3369 result = self.rpc.call_node_start_master(master, False, False)
3370 msg = result.fail_msg
3372 self.LogWarning("Could not re-enable the master role on"
3373 " the master, please restart manually: %s", msg)
3378 class LUClusterSetParams(LogicalUnit):
3379 """Change the parameters of the cluster.
3382 HPATH = "cluster-modify"
3383 HTYPE = constants.HTYPE_CLUSTER
3386 def CheckArguments(self):
3390 if self.op.uid_pool:
3391 uidpool.CheckUidPool(self.op.uid_pool)
3393 if self.op.add_uids:
3394 uidpool.CheckUidPool(self.op.add_uids)
3396 if self.op.remove_uids:
3397 uidpool.CheckUidPool(self.op.remove_uids)
3399 def ExpandNames(self):
3400 # FIXME: in the future maybe other cluster params won't require checking on
3401 # all nodes to be modified.
3402 self.needed_locks = {
3403 locking.LEVEL_NODE: locking.ALL_SET,
3405 self.share_locks[locking.LEVEL_NODE] = 1
3407 def BuildHooksEnv(self):
3412 "OP_TARGET": self.cfg.GetClusterName(),
3413 "NEW_VG_NAME": self.op.vg_name,
3416 def BuildHooksNodes(self):
3417 """Build hooks nodes.
3420 mn = self.cfg.GetMasterNode()
3423 def CheckPrereq(self):
3424 """Check prerequisites.
3426 This checks whether the given params don't conflict and
3427 if the given volume group is valid.
3430 if self.op.vg_name is not None and not self.op.vg_name:
3431 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3432 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3433 " instances exist", errors.ECODE_INVAL)
3435 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3436 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3437 raise errors.OpPrereqError("Cannot disable drbd helper while"
3438 " drbd-based instances exist",
3441 node_list = self.owned_locks(locking.LEVEL_NODE)
3443 # if vg_name not None, checks given volume group on all nodes
3445 vglist = self.rpc.call_vg_list(node_list)
3446 for node in node_list:
3447 msg = vglist[node].fail_msg
3449 # ignoring down node
3450 self.LogWarning("Error while gathering data on node %s"
3451 " (ignoring node): %s", node, msg)
3453 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3455 constants.MIN_VG_SIZE)
3457 raise errors.OpPrereqError("Error on node '%s': %s" %
3458 (node, vgstatus), errors.ECODE_ENVIRON)
3460 if self.op.drbd_helper:
3461 # checks given drbd helper on all nodes
3462 helpers = self.rpc.call_drbd_helper(node_list)
3463 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3465 self.LogInfo("Not checking drbd helper on offline node %s", node)
3467 msg = helpers[node].fail_msg
3469 raise errors.OpPrereqError("Error checking drbd helper on node"
3470 " '%s': %s" % (node, msg),
3471 errors.ECODE_ENVIRON)
3472 node_helper = helpers[node].payload
3473 if node_helper != self.op.drbd_helper:
3474 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3475 (node, node_helper), errors.ECODE_ENVIRON)
3477 self.cluster = cluster = self.cfg.GetClusterInfo()
3478 # validate params changes
3479 if self.op.beparams:
3480 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3481 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3483 if self.op.ndparams:
3484 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3485 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3487 # TODO: we need a more general way to handle resetting
3488 # cluster-level parameters to default values
3489 if self.new_ndparams["oob_program"] == "":
3490 self.new_ndparams["oob_program"] = \
3491 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3493 if self.op.nicparams:
3494 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3495 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3496 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3499 # check all instances for consistency
3500 for instance in self.cfg.GetAllInstancesInfo().values():
3501 for nic_idx, nic in enumerate(instance.nics):
3502 params_copy = copy.deepcopy(nic.nicparams)
3503 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3505 # check parameter syntax
3507 objects.NIC.CheckParameterSyntax(params_filled)
3508 except errors.ConfigurationError, err:
3509 nic_errors.append("Instance %s, nic/%d: %s" %
3510 (instance.name, nic_idx, err))
3512 # if we're moving instances to routed, check that they have an ip
3513 target_mode = params_filled[constants.NIC_MODE]
3514 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3515 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3516 " address" % (instance.name, nic_idx))
3518 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3519 "\n".join(nic_errors))
3521 # hypervisor list/parameters
3522 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3523 if self.op.hvparams:
3524 for hv_name, hv_dict in self.op.hvparams.items():
3525 if hv_name not in self.new_hvparams:
3526 self.new_hvparams[hv_name] = hv_dict
3528 self.new_hvparams[hv_name].update(hv_dict)
3530 # os hypervisor parameters
3531 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3533 for os_name, hvs in self.op.os_hvp.items():
3534 if os_name not in self.new_os_hvp:
3535 self.new_os_hvp[os_name] = hvs
3537 for hv_name, hv_dict in hvs.items():
3538 if hv_name not in self.new_os_hvp[os_name]:
3539 self.new_os_hvp[os_name][hv_name] = hv_dict
3541 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3544 self.new_osp = objects.FillDict(cluster.osparams, {})
3545 if self.op.osparams:
3546 for os_name, osp in self.op.osparams.items():
3547 if os_name not in self.new_osp:
3548 self.new_osp[os_name] = {}
3550 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3553 if not self.new_osp[os_name]:
3554 # we removed all parameters
3555 del self.new_osp[os_name]
3557 # check the parameter validity (remote check)
3558 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3559 os_name, self.new_osp[os_name])
3561 # changes to the hypervisor list
3562 if self.op.enabled_hypervisors is not None:
3563 self.hv_list = self.op.enabled_hypervisors
3564 for hv in self.hv_list:
3565 # if the hypervisor doesn't already exist in the cluster
3566 # hvparams, we initialize it to empty, and then (in both
3567 # cases) we make sure to fill the defaults, as we might not
3568 # have a complete defaults list if the hypervisor wasn't
3570 if hv not in new_hvp:
3572 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3573 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3575 self.hv_list = cluster.enabled_hypervisors
3577 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3578 # either the enabled list has changed, or the parameters have, validate
3579 for hv_name, hv_params in self.new_hvparams.items():
3580 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3581 (self.op.enabled_hypervisors and
3582 hv_name in self.op.enabled_hypervisors)):
3583 # either this is a new hypervisor, or its parameters have changed
3584 hv_class = hypervisor.GetHypervisor(hv_name)
3585 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3586 hv_class.CheckParameterSyntax(hv_params)
3587 _CheckHVParams(self, node_list, hv_name, hv_params)
3590 # no need to check any newly-enabled hypervisors, since the
3591 # defaults have already been checked in the above code-block
3592 for os_name, os_hvp in self.new_os_hvp.items():
3593 for hv_name, hv_params in os_hvp.items():
3594 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3595 # we need to fill in the new os_hvp on top of the actual hv_p
3596 cluster_defaults = self.new_hvparams.get(hv_name, {})
3597 new_osp = objects.FillDict(cluster_defaults, hv_params)
3598 hv_class = hypervisor.GetHypervisor(hv_name)
3599 hv_class.CheckParameterSyntax(new_osp)
3600 _CheckHVParams(self, node_list, hv_name, new_osp)
3602 if self.op.default_iallocator:
3603 alloc_script = utils.FindFile(self.op.default_iallocator,
3604 constants.IALLOCATOR_SEARCH_PATH,
3606 if alloc_script is None:
3607 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3608 " specified" % self.op.default_iallocator,
3611 def Exec(self, feedback_fn):
3612 """Change the parameters of the cluster.
3615 if self.op.vg_name is not None:
3616 new_volume = self.op.vg_name
3619 if new_volume != self.cfg.GetVGName():
3620 self.cfg.SetVGName(new_volume)
3622 feedback_fn("Cluster LVM configuration already in desired"
3623 " state, not changing")
3624 if self.op.drbd_helper is not None:
3625 new_helper = self.op.drbd_helper
3628 if new_helper != self.cfg.GetDRBDHelper():
3629 self.cfg.SetDRBDHelper(new_helper)
3631 feedback_fn("Cluster DRBD helper already in desired state,"
3633 if self.op.hvparams:
3634 self.cluster.hvparams = self.new_hvparams
3636 self.cluster.os_hvp = self.new_os_hvp
3637 if self.op.enabled_hypervisors is not None:
3638 self.cluster.hvparams = self.new_hvparams
3639 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3640 if self.op.beparams:
3641 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3642 if self.op.nicparams:
3643 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3644 if self.op.osparams:
3645 self.cluster.osparams = self.new_osp
3646 if self.op.ndparams:
3647 self.cluster.ndparams = self.new_ndparams
3649 if self.op.candidate_pool_size is not None:
3650 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3651 # we need to update the pool size here, otherwise the save will fail
3652 _AdjustCandidatePool(self, [])
3654 if self.op.maintain_node_health is not None:
3655 self.cluster.maintain_node_health = self.op.maintain_node_health
3657 if self.op.prealloc_wipe_disks is not None:
3658 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3660 if self.op.add_uids is not None:
3661 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3663 if self.op.remove_uids is not None:
3664 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3666 if self.op.uid_pool is not None:
3667 self.cluster.uid_pool = self.op.uid_pool
3669 if self.op.default_iallocator is not None:
3670 self.cluster.default_iallocator = self.op.default_iallocator
3672 if self.op.reserved_lvs is not None:
3673 self.cluster.reserved_lvs = self.op.reserved_lvs
3675 def helper_os(aname, mods, desc):
3677 lst = getattr(self.cluster, aname)
3678 for key, val in mods:
3679 if key == constants.DDM_ADD:
3681 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3684 elif key == constants.DDM_REMOVE:
3688 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3690 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3692 if self.op.hidden_os:
3693 helper_os("hidden_os", self.op.hidden_os, "hidden")
3695 if self.op.blacklisted_os:
3696 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3698 if self.op.master_netdev:
3699 master = self.cfg.GetMasterNode()
3700 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3701 self.cluster.master_netdev)
3702 result = self.rpc.call_node_stop_master(master, False)
3703 result.Raise("Could not disable the master ip")
3704 feedback_fn("Changing master_netdev from %s to %s" %
3705 (self.cluster.master_netdev, self.op.master_netdev))
3706 self.cluster.master_netdev = self.op.master_netdev
3708 self.cfg.Update(self.cluster, feedback_fn)
3710 if self.op.master_netdev:
3711 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3712 self.op.master_netdev)
3713 result = self.rpc.call_node_start_master(master, False, False)
3715 self.LogWarning("Could not re-enable the master ip on"
3716 " the master, please restart manually: %s",
3720 def _UploadHelper(lu, nodes, fname):
3721 """Helper for uploading a file and showing warnings.
3724 if os.path.exists(fname):
3725 result = lu.rpc.call_upload_file(nodes, fname)
3726 for to_node, to_result in result.items():
3727 msg = to_result.fail_msg
3729 msg = ("Copy of file %s to node %s failed: %s" %
3730 (fname, to_node, msg))
3731 lu.proc.LogWarning(msg)
3734 def _ComputeAncillaryFiles(cluster, redist):
3735 """Compute files external to Ganeti which need to be consistent.
3737 @type redist: boolean
3738 @param redist: Whether to include files which need to be redistributed
3741 # Compute files for all nodes
3743 constants.SSH_KNOWN_HOSTS_FILE,
3744 constants.CONFD_HMAC_KEY,
3745 constants.CLUSTER_DOMAIN_SECRET_FILE,
3749 files_all.update(constants.ALL_CERT_FILES)
3750 files_all.update(ssconf.SimpleStore().GetFileList())
3752 # we need to ship at least the RAPI certificate
3753 files_all.add(constants.RAPI_CERT_FILE)
3755 if cluster.modify_etc_hosts:
3756 files_all.add(constants.ETC_HOSTS)
3758 # Files which must either exist on all nodes or on none
3759 files_all_opt = set([
3760 constants.RAPI_USERS_FILE,
3763 # Files which should only be on master candidates
3766 files_mc.add(constants.CLUSTER_CONF_FILE)
3768 # Files which should only be on VM-capable nodes
3769 files_vm = set(filename
3770 for hv_name in cluster.enabled_hypervisors
3771 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3773 # Filenames must be unique
3774 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3775 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3776 "Found file listed in more than one file list"
3778 return (files_all, files_all_opt, files_mc, files_vm)
3781 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3782 """Distribute additional files which are part of the cluster configuration.
3784 ConfigWriter takes care of distributing the config and ssconf files, but
3785 there are more files which should be distributed to all nodes. This function
3786 makes sure those are copied.
3788 @param lu: calling logical unit
3789 @param additional_nodes: list of nodes not in the config to distribute to
3790 @type additional_vm: boolean
3791 @param additional_vm: whether the additional nodes are vm-capable or not
3794 # Gather target nodes
3795 cluster = lu.cfg.GetClusterInfo()
3796 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3798 online_nodes = lu.cfg.GetOnlineNodeList()
3799 vm_nodes = lu.cfg.GetVmCapableNodeList()
3801 if additional_nodes is not None:
3802 online_nodes.extend(additional_nodes)
3804 vm_nodes.extend(additional_nodes)
3806 # Never distribute to master node
3807 for nodelist in [online_nodes, vm_nodes]:
3808 if master_info.name in nodelist:
3809 nodelist.remove(master_info.name)
3812 (files_all, files_all_opt, files_mc, files_vm) = \
3813 _ComputeAncillaryFiles(cluster, True)
3815 # Never re-distribute configuration file from here
3816 assert not (constants.CLUSTER_CONF_FILE in files_all or
3817 constants.CLUSTER_CONF_FILE in files_vm)
3818 assert not files_mc, "Master candidates not handled in this function"
3821 (online_nodes, files_all),
3822 (online_nodes, files_all_opt),
3823 (vm_nodes, files_vm),
3827 for (node_list, files) in filemap:
3829 _UploadHelper(lu, node_list, fname)
3832 class LUClusterRedistConf(NoHooksLU):
3833 """Force the redistribution of cluster configuration.
3835 This is a very simple LU.
3840 def ExpandNames(self):
3841 self.needed_locks = {
3842 locking.LEVEL_NODE: locking.ALL_SET,
3844 self.share_locks[locking.LEVEL_NODE] = 1
3846 def Exec(self, feedback_fn):
3847 """Redistribute the configuration.
3850 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3851 _RedistributeAncillaryFiles(self)
3854 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3855 """Sleep and poll for an instance's disk to sync.
3858 if not instance.disks or disks is not None and not disks:
3861 disks = _ExpandCheckDisks(instance, disks)
3864 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3866 node = instance.primary_node
3869 lu.cfg.SetDiskID(dev, node)
3871 # TODO: Convert to utils.Retry
3874 degr_retries = 10 # in seconds, as we sleep 1 second each time
3878 cumul_degraded = False
3879 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3880 msg = rstats.fail_msg
3882 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3885 raise errors.RemoteError("Can't contact node %s for mirror data,"
3886 " aborting." % node)
3889 rstats = rstats.payload
3891 for i, mstat in enumerate(rstats):
3893 lu.LogWarning("Can't compute data for node %s/%s",
3894 node, disks[i].iv_name)
3897 cumul_degraded = (cumul_degraded or
3898 (mstat.is_degraded and mstat.sync_percent is None))
3899 if mstat.sync_percent is not None:
3901 if mstat.estimated_time is not None:
3902 rem_time = ("%s remaining (estimated)" %
3903 utils.FormatSeconds(mstat.estimated_time))
3904 max_time = mstat.estimated_time
3906 rem_time = "no time estimate"
3907 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3908 (disks[i].iv_name, mstat.sync_percent, rem_time))
3910 # if we're done but degraded, let's do a few small retries, to
3911 # make sure we see a stable and not transient situation; therefore
3912 # we force restart of the loop
3913 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3914 logging.info("Degraded disks found, %d retries left", degr_retries)
3922 time.sleep(min(60, max_time))
3925 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3926 return not cumul_degraded
3929 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3930 """Check that mirrors are not degraded.
3932 The ldisk parameter, if True, will change the test from the
3933 is_degraded attribute (which represents overall non-ok status for
3934 the device(s)) to the ldisk (representing the local storage status).
3937 lu.cfg.SetDiskID(dev, node)
3941 if on_primary or dev.AssembleOnSecondary():
3942 rstats = lu.rpc.call_blockdev_find(node, dev)
3943 msg = rstats.fail_msg
3945 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3947 elif not rstats.payload:
3948 lu.LogWarning("Can't find disk on node %s", node)
3952 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3954 result = result and not rstats.payload.is_degraded
3957 for child in dev.children:
3958 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3963 class LUOobCommand(NoHooksLU):
3964 """Logical unit for OOB handling.
3968 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3970 def ExpandNames(self):
3971 """Gather locks we need.
3974 if self.op.node_names:
3975 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3976 lock_names = self.op.node_names
3978 lock_names = locking.ALL_SET
3980 self.needed_locks = {
3981 locking.LEVEL_NODE: lock_names,
3984 def CheckPrereq(self):
3985 """Check prerequisites.
3988 - the node exists in the configuration
3991 Any errors are signaled by raising errors.OpPrereqError.
3995 self.master_node = self.cfg.GetMasterNode()
3997 assert self.op.power_delay >= 0.0
3999 if self.op.node_names:
4000 if (self.op.command in self._SKIP_MASTER and
4001 self.master_node in self.op.node_names):
4002 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4003 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4005 if master_oob_handler:
4006 additional_text = ("run '%s %s %s' if you want to operate on the"
4007 " master regardless") % (master_oob_handler,
4011 additional_text = "it does not support out-of-band operations"
4013 raise errors.OpPrereqError(("Operating on the master node %s is not"
4014 " allowed for %s; %s") %
4015 (self.master_node, self.op.command,
4016 additional_text), errors.ECODE_INVAL)
4018 self.op.node_names = self.cfg.GetNodeList()
4019 if self.op.command in self._SKIP_MASTER:
4020 self.op.node_names.remove(self.master_node)
4022 if self.op.command in self._SKIP_MASTER:
4023 assert self.master_node not in self.op.node_names
4025 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4027 raise errors.OpPrereqError("Node %s not found" % node_name,
4030 self.nodes.append(node)
4032 if (not self.op.ignore_status and
4033 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4034 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4035 " not marked offline") % node_name,
4038 def Exec(self, feedback_fn):
4039 """Execute OOB and return result if we expect any.
4042 master_node = self.master_node
4045 for idx, node in enumerate(utils.NiceSort(self.nodes,
4046 key=lambda node: node.name)):
4047 node_entry = [(constants.RS_NORMAL, node.name)]
4048 ret.append(node_entry)
4050 oob_program = _SupportsOob(self.cfg, node)
4053 node_entry.append((constants.RS_UNAVAIL, None))
4056 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4057 self.op.command, oob_program, node.name)
4058 result = self.rpc.call_run_oob(master_node, oob_program,
4059 self.op.command, node.name,
4063 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4064 node.name, result.fail_msg)
4065 node_entry.append((constants.RS_NODATA, None))
4068 self._CheckPayload(result)
4069 except errors.OpExecError, err:
4070 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4072 node_entry.append((constants.RS_NODATA, None))
4074 if self.op.command == constants.OOB_HEALTH:
4075 # For health we should log important events
4076 for item, status in result.payload:
4077 if status in [constants.OOB_STATUS_WARNING,
4078 constants.OOB_STATUS_CRITICAL]:
4079 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4080 item, node.name, status)
4082 if self.op.command == constants.OOB_POWER_ON:
4084 elif self.op.command == constants.OOB_POWER_OFF:
4085 node.powered = False
4086 elif self.op.command == constants.OOB_POWER_STATUS:
4087 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4088 if powered != node.powered:
4089 logging.warning(("Recorded power state (%s) of node '%s' does not"
4090 " match actual power state (%s)"), node.powered,
4093 # For configuration changing commands we should update the node
4094 if self.op.command in (constants.OOB_POWER_ON,
4095 constants.OOB_POWER_OFF):
4096 self.cfg.Update(node, feedback_fn)
4098 node_entry.append((constants.RS_NORMAL, result.payload))
4100 if (self.op.command == constants.OOB_POWER_ON and
4101 idx < len(self.nodes) - 1):
4102 time.sleep(self.op.power_delay)
4106 def _CheckPayload(self, result):
4107 """Checks if the payload is valid.
4109 @param result: RPC result
4110 @raises errors.OpExecError: If payload is not valid
4114 if self.op.command == constants.OOB_HEALTH:
4115 if not isinstance(result.payload, list):
4116 errs.append("command 'health' is expected to return a list but got %s" %
4117 type(result.payload))
4119 for item, status in result.payload:
4120 if status not in constants.OOB_STATUSES:
4121 errs.append("health item '%s' has invalid status '%s'" %
4124 if self.op.command == constants.OOB_POWER_STATUS:
4125 if not isinstance(result.payload, dict):
4126 errs.append("power-status is expected to return a dict but got %s" %
4127 type(result.payload))
4129 if self.op.command in [
4130 constants.OOB_POWER_ON,
4131 constants.OOB_POWER_OFF,
4132 constants.OOB_POWER_CYCLE,
4134 if result.payload is not None:
4135 errs.append("%s is expected to not return payload but got '%s'" %
4136 (self.op.command, result.payload))
4139 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4140 utils.CommaJoin(errs))
4143 class _OsQuery(_QueryBase):
4144 FIELDS = query.OS_FIELDS
4146 def ExpandNames(self, lu):
4147 # Lock all nodes in shared mode
4148 # Temporary removal of locks, should be reverted later
4149 # TODO: reintroduce locks when they are lighter-weight
4150 lu.needed_locks = {}
4151 #self.share_locks[locking.LEVEL_NODE] = 1
4152 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4154 # The following variables interact with _QueryBase._GetNames
4156 self.wanted = self.names
4158 self.wanted = locking.ALL_SET
4160 self.do_locking = self.use_locking
4162 def DeclareLocks(self, lu, level):
4166 def _DiagnoseByOS(rlist):
4167 """Remaps a per-node return list into an a per-os per-node dictionary
4169 @param rlist: a map with node names as keys and OS objects as values
4172 @return: a dictionary with osnames as keys and as value another
4173 map, with nodes as keys and tuples of (path, status, diagnose,
4174 variants, parameters, api_versions) as values, eg::
4176 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4177 (/srv/..., False, "invalid api")],
4178 "node2": [(/srv/..., True, "", [], [])]}
4183 # we build here the list of nodes that didn't fail the RPC (at RPC
4184 # level), so that nodes with a non-responding node daemon don't
4185 # make all OSes invalid
4186 good_nodes = [node_name for node_name in rlist
4187 if not rlist[node_name].fail_msg]
4188 for node_name, nr in rlist.items():
4189 if nr.fail_msg or not nr.payload:
4191 for (name, path, status, diagnose, variants,
4192 params, api_versions) in nr.payload:
4193 if name not in all_os:
4194 # build a list of nodes for this os containing empty lists
4195 # for each node in node_list
4197 for nname in good_nodes:
4198 all_os[name][nname] = []
4199 # convert params from [name, help] to (name, help)
4200 params = [tuple(v) for v in params]
4201 all_os[name][node_name].append((path, status, diagnose,
4202 variants, params, api_versions))
4205 def _GetQueryData(self, lu):
4206 """Computes the list of nodes and their attributes.
4209 # Locking is not used
4210 assert not (compat.any(lu.glm.is_owned(level)
4211 for level in locking.LEVELS
4212 if level != locking.LEVEL_CLUSTER) or
4213 self.do_locking or self.use_locking)
4215 valid_nodes = [node.name
4216 for node in lu.cfg.GetAllNodesInfo().values()
4217 if not node.offline and node.vm_capable]
4218 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4219 cluster = lu.cfg.GetClusterInfo()
4223 for (os_name, os_data) in pol.items():
4224 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4225 hidden=(os_name in cluster.hidden_os),
4226 blacklisted=(os_name in cluster.blacklisted_os))
4230 api_versions = set()
4232 for idx, osl in enumerate(os_data.values()):
4233 info.valid = bool(info.valid and osl and osl[0][1])
4237 (node_variants, node_params, node_api) = osl[0][3:6]
4240 variants.update(node_variants)
4241 parameters.update(node_params)
4242 api_versions.update(node_api)
4244 # Filter out inconsistent values
4245 variants.intersection_update(node_variants)
4246 parameters.intersection_update(node_params)
4247 api_versions.intersection_update(node_api)
4249 info.variants = list(variants)
4250 info.parameters = list(parameters)
4251 info.api_versions = list(api_versions)
4253 data[os_name] = info
4255 # Prepare data in requested order
4256 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4260 class LUOsDiagnose(NoHooksLU):
4261 """Logical unit for OS diagnose/query.
4267 def _BuildFilter(fields, names):
4268 """Builds a filter for querying OSes.
4271 name_filter = qlang.MakeSimpleFilter("name", names)
4273 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4274 # respective field is not requested
4275 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4276 for fname in ["hidden", "blacklisted"]
4277 if fname not in fields]
4278 if "valid" not in fields:
4279 status_filter.append([qlang.OP_TRUE, "valid"])
4282 status_filter.insert(0, qlang.OP_AND)
4284 status_filter = None
4286 if name_filter and status_filter:
4287 return [qlang.OP_AND, name_filter, status_filter]
4291 return status_filter
4293 def CheckArguments(self):
4294 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4295 self.op.output_fields, False)
4297 def ExpandNames(self):
4298 self.oq.ExpandNames(self)
4300 def Exec(self, feedback_fn):
4301 return self.oq.OldStyleQuery(self)
4304 class LUNodeRemove(LogicalUnit):
4305 """Logical unit for removing a node.
4308 HPATH = "node-remove"
4309 HTYPE = constants.HTYPE_NODE
4311 def BuildHooksEnv(self):
4314 This doesn't run on the target node in the pre phase as a failed
4315 node would then be impossible to remove.
4319 "OP_TARGET": self.op.node_name,
4320 "NODE_NAME": self.op.node_name,
4323 def BuildHooksNodes(self):
4324 """Build hooks nodes.
4327 all_nodes = self.cfg.GetNodeList()
4329 all_nodes.remove(self.op.node_name)
4331 logging.warning("Node '%s', which is about to be removed, was not found"
4332 " in the list of all nodes", self.op.node_name)
4333 return (all_nodes, all_nodes)
4335 def CheckPrereq(self):
4336 """Check prerequisites.
4339 - the node exists in the configuration
4340 - it does not have primary or secondary instances
4341 - it's not the master
4343 Any errors are signaled by raising errors.OpPrereqError.
4346 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4347 node = self.cfg.GetNodeInfo(self.op.node_name)
4348 assert node is not None
4350 masternode = self.cfg.GetMasterNode()
4351 if node.name == masternode:
4352 raise errors.OpPrereqError("Node is the master node, failover to another"
4353 " node is required", errors.ECODE_INVAL)
4355 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4356 if node.name in instance.all_nodes:
4357 raise errors.OpPrereqError("Instance %s is still running on the node,"
4358 " please remove first" % instance_name,
4360 self.op.node_name = node.name
4363 def Exec(self, feedback_fn):
4364 """Removes the node from the cluster.
4368 logging.info("Stopping the node daemon and removing configs from node %s",
4371 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4373 # Promote nodes to master candidate as needed
4374 _AdjustCandidatePool(self, exceptions=[node.name])
4375 self.context.RemoveNode(node.name)
4377 # Run post hooks on the node before it's removed
4378 _RunPostHook(self, node.name)
4380 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4381 msg = result.fail_msg
4383 self.LogWarning("Errors encountered on the remote node while leaving"
4384 " the cluster: %s", msg)
4386 # Remove node from our /etc/hosts
4387 if self.cfg.GetClusterInfo().modify_etc_hosts:
4388 master_node = self.cfg.GetMasterNode()
4389 result = self.rpc.call_etc_hosts_modify(master_node,
4390 constants.ETC_HOSTS_REMOVE,
4392 result.Raise("Can't update hosts file with new host data")
4393 _RedistributeAncillaryFiles(self)
4396 class _NodeQuery(_QueryBase):
4397 FIELDS = query.NODE_FIELDS
4399 def ExpandNames(self, lu):
4400 lu.needed_locks = {}
4401 lu.share_locks = _ShareAll()
4404 self.wanted = _GetWantedNodes(lu, self.names)
4406 self.wanted = locking.ALL_SET
4408 self.do_locking = (self.use_locking and
4409 query.NQ_LIVE in self.requested_data)
4412 # If any non-static field is requested we need to lock the nodes
4413 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4415 def DeclareLocks(self, lu, level):
4418 def _GetQueryData(self, lu):
4419 """Computes the list of nodes and their attributes.
4422 all_info = lu.cfg.GetAllNodesInfo()
4424 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4426 # Gather data as requested
4427 if query.NQ_LIVE in self.requested_data:
4428 # filter out non-vm_capable nodes
4429 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4431 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4432 lu.cfg.GetHypervisorType())
4433 live_data = dict((name, nresult.payload)
4434 for (name, nresult) in node_data.items()
4435 if not nresult.fail_msg and nresult.payload)
4439 if query.NQ_INST in self.requested_data:
4440 node_to_primary = dict([(name, set()) for name in nodenames])
4441 node_to_secondary = dict([(name, set()) for name in nodenames])
4443 inst_data = lu.cfg.GetAllInstancesInfo()
4445 for inst in inst_data.values():
4446 if inst.primary_node in node_to_primary:
4447 node_to_primary[inst.primary_node].add(inst.name)
4448 for secnode in inst.secondary_nodes:
4449 if secnode in node_to_secondary:
4450 node_to_secondary[secnode].add(inst.name)
4452 node_to_primary = None
4453 node_to_secondary = None
4455 if query.NQ_OOB in self.requested_data:
4456 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4457 for name, node in all_info.iteritems())
4461 if query.NQ_GROUP in self.requested_data:
4462 groups = lu.cfg.GetAllNodeGroupsInfo()
4466 return query.NodeQueryData([all_info[name] for name in nodenames],
4467 live_data, lu.cfg.GetMasterNode(),
4468 node_to_primary, node_to_secondary, groups,
4469 oob_support, lu.cfg.GetClusterInfo())
4472 class LUNodeQuery(NoHooksLU):
4473 """Logical unit for querying nodes.
4476 # pylint: disable=W0142
4479 def CheckArguments(self):
4480 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4481 self.op.output_fields, self.op.use_locking)
4483 def ExpandNames(self):
4484 self.nq.ExpandNames(self)
4486 def Exec(self, feedback_fn):
4487 return self.nq.OldStyleQuery(self)
4490 class LUNodeQueryvols(NoHooksLU):
4491 """Logical unit for getting volumes on node(s).
4495 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4496 _FIELDS_STATIC = utils.FieldSet("node")
4498 def CheckArguments(self):
4499 _CheckOutputFields(static=self._FIELDS_STATIC,
4500 dynamic=self._FIELDS_DYNAMIC,
4501 selected=self.op.output_fields)
4503 def ExpandNames(self):
4504 self.needed_locks = {}
4505 self.share_locks[locking.LEVEL_NODE] = 1
4506 if not self.op.nodes:
4507 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4509 self.needed_locks[locking.LEVEL_NODE] = \
4510 _GetWantedNodes(self, self.op.nodes)
4512 def Exec(self, feedback_fn):
4513 """Computes the list of nodes and their attributes.
4516 nodenames = self.owned_locks(locking.LEVEL_NODE)
4517 volumes = self.rpc.call_node_volumes(nodenames)
4519 ilist = self.cfg.GetAllInstancesInfo()
4520 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4523 for node in nodenames:
4524 nresult = volumes[node]
4527 msg = nresult.fail_msg
4529 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4532 node_vols = sorted(nresult.payload,
4533 key=operator.itemgetter("dev"))
4535 for vol in node_vols:
4537 for field in self.op.output_fields:
4540 elif field == "phys":
4544 elif field == "name":
4546 elif field == "size":
4547 val = int(float(vol["size"]))
4548 elif field == "instance":
4549 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4551 raise errors.ParameterError(field)
4552 node_output.append(str(val))
4554 output.append(node_output)
4559 class LUNodeQueryStorage(NoHooksLU):
4560 """Logical unit for getting information on storage units on node(s).
4563 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4566 def CheckArguments(self):
4567 _CheckOutputFields(static=self._FIELDS_STATIC,
4568 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4569 selected=self.op.output_fields)
4571 def ExpandNames(self):
4572 self.needed_locks = {}
4573 self.share_locks[locking.LEVEL_NODE] = 1
4576 self.needed_locks[locking.LEVEL_NODE] = \
4577 _GetWantedNodes(self, self.op.nodes)
4579 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4581 def Exec(self, feedback_fn):
4582 """Computes the list of nodes and their attributes.
4585 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4587 # Always get name to sort by
4588 if constants.SF_NAME in self.op.output_fields:
4589 fields = self.op.output_fields[:]
4591 fields = [constants.SF_NAME] + self.op.output_fields
4593 # Never ask for node or type as it's only known to the LU
4594 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4595 while extra in fields:
4596 fields.remove(extra)
4598 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4599 name_idx = field_idx[constants.SF_NAME]
4601 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4602 data = self.rpc.call_storage_list(self.nodes,
4603 self.op.storage_type, st_args,
4604 self.op.name, fields)
4608 for node in utils.NiceSort(self.nodes):
4609 nresult = data[node]
4613 msg = nresult.fail_msg
4615 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4618 rows = dict([(row[name_idx], row) for row in nresult.payload])
4620 for name in utils.NiceSort(rows.keys()):
4625 for field in self.op.output_fields:
4626 if field == constants.SF_NODE:
4628 elif field == constants.SF_TYPE:
4629 val = self.op.storage_type
4630 elif field in field_idx:
4631 val = row[field_idx[field]]
4633 raise errors.ParameterError(field)
4642 class _InstanceQuery(_QueryBase):
4643 FIELDS = query.INSTANCE_FIELDS
4645 def ExpandNames(self, lu):
4646 lu.needed_locks = {}
4647 lu.share_locks = _ShareAll()
4650 self.wanted = _GetWantedInstances(lu, self.names)
4652 self.wanted = locking.ALL_SET
4654 self.do_locking = (self.use_locking and
4655 query.IQ_LIVE in self.requested_data)
4657 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4658 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4659 lu.needed_locks[locking.LEVEL_NODE] = []
4660 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4662 self.do_grouplocks = (self.do_locking and
4663 query.IQ_NODES in self.requested_data)
4665 def DeclareLocks(self, lu, level):
4667 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4668 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4670 # Lock all groups used by instances optimistically; this requires going
4671 # via the node before it's locked, requiring verification later on
4672 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4674 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4675 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4676 elif level == locking.LEVEL_NODE:
4677 lu._LockInstancesNodes() # pylint: disable=W0212
4680 def _CheckGroupLocks(lu):
4681 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4682 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4684 # Check if node groups for locked instances are still correct
4685 for instance_name in owned_instances:
4686 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4688 def _GetQueryData(self, lu):
4689 """Computes the list of instances and their attributes.
4692 if self.do_grouplocks:
4693 self._CheckGroupLocks(lu)
4695 cluster = lu.cfg.GetClusterInfo()
4696 all_info = lu.cfg.GetAllInstancesInfo()
4698 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4700 instance_list = [all_info[name] for name in instance_names]
4701 nodes = frozenset(itertools.chain(*(inst.all_nodes
4702 for inst in instance_list)))
4703 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4706 wrongnode_inst = set()
4708 # Gather data as requested
4709 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4711 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4713 result = node_data[name]
4715 # offline nodes will be in both lists
4716 assert result.fail_msg
4717 offline_nodes.append(name)
4719 bad_nodes.append(name)
4720 elif result.payload:
4721 for inst in result.payload:
4722 if inst in all_info:
4723 if all_info[inst].primary_node == name:
4724 live_data.update(result.payload)
4726 wrongnode_inst.add(inst)
4728 # orphan instance; we don't list it here as we don't
4729 # handle this case yet in the output of instance listing
4730 logging.warning("Orphan instance '%s' found on node %s",
4732 # else no instance is alive
4736 if query.IQ_DISKUSAGE in self.requested_data:
4737 disk_usage = dict((inst.name,
4738 _ComputeDiskSize(inst.disk_template,
4739 [{constants.IDISK_SIZE: disk.size}
4740 for disk in inst.disks]))
4741 for inst in instance_list)
4745 if query.IQ_CONSOLE in self.requested_data:
4747 for inst in instance_list:
4748 if inst.name in live_data:
4749 # Instance is running
4750 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4752 consinfo[inst.name] = None
4753 assert set(consinfo.keys()) == set(instance_names)
4757 if query.IQ_NODES in self.requested_data:
4758 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4760 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4761 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4762 for uuid in set(map(operator.attrgetter("group"),
4768 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4769 disk_usage, offline_nodes, bad_nodes,
4770 live_data, wrongnode_inst, consinfo,
4774 class LUQuery(NoHooksLU):
4775 """Query for resources/items of a certain kind.
4778 # pylint: disable=W0142
4781 def CheckArguments(self):
4782 qcls = _GetQueryImplementation(self.op.what)
4784 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4786 def ExpandNames(self):
4787 self.impl.ExpandNames(self)
4789 def DeclareLocks(self, level):
4790 self.impl.DeclareLocks(self, level)
4792 def Exec(self, feedback_fn):
4793 return self.impl.NewStyleQuery(self)
4796 class LUQueryFields(NoHooksLU):
4797 """Query for resources/items of a certain kind.
4800 # pylint: disable=W0142
4803 def CheckArguments(self):
4804 self.qcls = _GetQueryImplementation(self.op.what)
4806 def ExpandNames(self):
4807 self.needed_locks = {}
4809 def Exec(self, feedback_fn):
4810 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4813 class LUNodeModifyStorage(NoHooksLU):
4814 """Logical unit for modifying a storage volume on a node.
4819 def CheckArguments(self):
4820 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4822 storage_type = self.op.storage_type
4825 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4827 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4828 " modified" % storage_type,
4831 diff = set(self.op.changes.keys()) - modifiable
4833 raise errors.OpPrereqError("The following fields can not be modified for"
4834 " storage units of type '%s': %r" %
4835 (storage_type, list(diff)),
4838 def ExpandNames(self):
4839 self.needed_locks = {
4840 locking.LEVEL_NODE: self.op.node_name,
4843 def Exec(self, feedback_fn):
4844 """Computes the list of nodes and their attributes.
4847 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4848 result = self.rpc.call_storage_modify(self.op.node_name,
4849 self.op.storage_type, st_args,
4850 self.op.name, self.op.changes)
4851 result.Raise("Failed to modify storage unit '%s' on %s" %
4852 (self.op.name, self.op.node_name))
4855 class LUNodeAdd(LogicalUnit):
4856 """Logical unit for adding node to the cluster.
4860 HTYPE = constants.HTYPE_NODE
4861 _NFLAGS = ["master_capable", "vm_capable"]
4863 def CheckArguments(self):
4864 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4865 # validate/normalize the node name
4866 self.hostname = netutils.GetHostname(name=self.op.node_name,
4867 family=self.primary_ip_family)
4868 self.op.node_name = self.hostname.name
4870 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4871 raise errors.OpPrereqError("Cannot readd the master node",
4874 if self.op.readd and self.op.group:
4875 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4876 " being readded", errors.ECODE_INVAL)
4878 def BuildHooksEnv(self):
4881 This will run on all nodes before, and on all nodes + the new node after.
4885 "OP_TARGET": self.op.node_name,
4886 "NODE_NAME": self.op.node_name,
4887 "NODE_PIP": self.op.primary_ip,
4888 "NODE_SIP": self.op.secondary_ip,
4889 "MASTER_CAPABLE": str(self.op.master_capable),
4890 "VM_CAPABLE": str(self.op.vm_capable),
4893 def BuildHooksNodes(self):
4894 """Build hooks nodes.
4897 # Exclude added node
4898 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4899 post_nodes = pre_nodes + [self.op.node_name, ]
4901 return (pre_nodes, post_nodes)
4903 def CheckPrereq(self):
4904 """Check prerequisites.
4907 - the new node is not already in the config
4909 - its parameters (single/dual homed) matches the cluster
4911 Any errors are signaled by raising errors.OpPrereqError.
4915 hostname = self.hostname
4916 node = hostname.name
4917 primary_ip = self.op.primary_ip = hostname.ip
4918 if self.op.secondary_ip is None:
4919 if self.primary_ip_family == netutils.IP6Address.family:
4920 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4921 " IPv4 address must be given as secondary",
4923 self.op.secondary_ip = primary_ip
4925 secondary_ip = self.op.secondary_ip
4926 if not netutils.IP4Address.IsValid(secondary_ip):
4927 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4928 " address" % secondary_ip, errors.ECODE_INVAL)
4930 node_list = cfg.GetNodeList()
4931 if not self.op.readd and node in node_list:
4932 raise errors.OpPrereqError("Node %s is already in the configuration" %
4933 node, errors.ECODE_EXISTS)
4934 elif self.op.readd and node not in node_list:
4935 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4938 self.changed_primary_ip = False
4940 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4941 if self.op.readd and node == existing_node_name:
4942 if existing_node.secondary_ip != secondary_ip:
4943 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4944 " address configuration as before",
4946 if existing_node.primary_ip != primary_ip:
4947 self.changed_primary_ip = True
4951 if (existing_node.primary_ip == primary_ip or
4952 existing_node.secondary_ip == primary_ip or
4953 existing_node.primary_ip == secondary_ip or
4954 existing_node.secondary_ip == secondary_ip):
4955 raise errors.OpPrereqError("New node ip address(es) conflict with"
4956 " existing node %s" % existing_node.name,
4957 errors.ECODE_NOTUNIQUE)
4959 # After this 'if' block, None is no longer a valid value for the
4960 # _capable op attributes
4962 old_node = self.cfg.GetNodeInfo(node)
4963 assert old_node is not None, "Can't retrieve locked node %s" % node
4964 for attr in self._NFLAGS:
4965 if getattr(self.op, attr) is None:
4966 setattr(self.op, attr, getattr(old_node, attr))
4968 for attr in self._NFLAGS:
4969 if getattr(self.op, attr) is None:
4970 setattr(self.op, attr, True)
4972 if self.op.readd and not self.op.vm_capable:
4973 pri, sec = cfg.GetNodeInstances(node)
4975 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4976 " flag set to false, but it already holds"
4977 " instances" % node,
4980 # check that the type of the node (single versus dual homed) is the
4981 # same as for the master
4982 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4983 master_singlehomed = myself.secondary_ip == myself.primary_ip
4984 newbie_singlehomed = secondary_ip == primary_ip
4985 if master_singlehomed != newbie_singlehomed:
4986 if master_singlehomed:
4987 raise errors.OpPrereqError("The master has no secondary ip but the"
4988 " new node has one",
4991 raise errors.OpPrereqError("The master has a secondary ip but the"
4992 " new node doesn't have one",
4995 # checks reachability
4996 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4997 raise errors.OpPrereqError("Node not reachable by ping",
4998 errors.ECODE_ENVIRON)
5000 if not newbie_singlehomed:
5001 # check reachability from my secondary ip to newbie's secondary ip
5002 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5003 source=myself.secondary_ip):
5004 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5005 " based ping to node daemon port",
5006 errors.ECODE_ENVIRON)
5013 if self.op.master_capable:
5014 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5016 self.master_candidate = False
5019 self.new_node = old_node
5021 node_group = cfg.LookupNodeGroup(self.op.group)
5022 self.new_node = objects.Node(name=node,
5023 primary_ip=primary_ip,
5024 secondary_ip=secondary_ip,
5025 master_candidate=self.master_candidate,
5026 offline=False, drained=False,
5029 if self.op.ndparams:
5030 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5032 def Exec(self, feedback_fn):
5033 """Adds the new node to the cluster.
5036 new_node = self.new_node
5037 node = new_node.name
5039 # We adding a new node so we assume it's powered
5040 new_node.powered = True
5042 # for re-adds, reset the offline/drained/master-candidate flags;
5043 # we need to reset here, otherwise offline would prevent RPC calls
5044 # later in the procedure; this also means that if the re-add
5045 # fails, we are left with a non-offlined, broken node
5047 new_node.drained = new_node.offline = False # pylint: disable=W0201
5048 self.LogInfo("Readding a node, the offline/drained flags were reset")
5049 # if we demote the node, we do cleanup later in the procedure
5050 new_node.master_candidate = self.master_candidate
5051 if self.changed_primary_ip:
5052 new_node.primary_ip = self.op.primary_ip
5054 # copy the master/vm_capable flags
5055 for attr in self._NFLAGS:
5056 setattr(new_node, attr, getattr(self.op, attr))
5058 # notify the user about any possible mc promotion
5059 if new_node.master_candidate:
5060 self.LogInfo("Node will be a master candidate")
5062 if self.op.ndparams:
5063 new_node.ndparams = self.op.ndparams
5065 new_node.ndparams = {}
5067 # check connectivity
5068 result = self.rpc.call_version([node])[node]
5069 result.Raise("Can't get version information from node %s" % node)
5070 if constants.PROTOCOL_VERSION == result.payload:
5071 logging.info("Communication to node %s fine, sw version %s match",
5072 node, result.payload)
5074 raise errors.OpExecError("Version mismatch master version %s,"
5075 " node version %s" %
5076 (constants.PROTOCOL_VERSION, result.payload))
5078 # Add node to our /etc/hosts, and add key to known_hosts
5079 if self.cfg.GetClusterInfo().modify_etc_hosts:
5080 master_node = self.cfg.GetMasterNode()
5081 result = self.rpc.call_etc_hosts_modify(master_node,
5082 constants.ETC_HOSTS_ADD,
5085 result.Raise("Can't update hosts file with new host data")
5087 if new_node.secondary_ip != new_node.primary_ip:
5088 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5091 node_verify_list = [self.cfg.GetMasterNode()]
5092 node_verify_param = {
5093 constants.NV_NODELIST: ([node], {}),
5094 # TODO: do a node-net-test as well?
5097 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5098 self.cfg.GetClusterName())
5099 for verifier in node_verify_list:
5100 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5101 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5103 for failed in nl_payload:
5104 feedback_fn("ssh/hostname verification failed"
5105 " (checking from %s): %s" %
5106 (verifier, nl_payload[failed]))
5107 raise errors.OpExecError("ssh/hostname verification failed")
5110 _RedistributeAncillaryFiles(self)
5111 self.context.ReaddNode(new_node)
5112 # make sure we redistribute the config
5113 self.cfg.Update(new_node, feedback_fn)
5114 # and make sure the new node will not have old files around
5115 if not new_node.master_candidate:
5116 result = self.rpc.call_node_demote_from_mc(new_node.name)
5117 msg = result.fail_msg
5119 self.LogWarning("Node failed to demote itself from master"
5120 " candidate status: %s" % msg)
5122 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5123 additional_vm=self.op.vm_capable)
5124 self.context.AddNode(new_node, self.proc.GetECId())
5127 class LUNodeSetParams(LogicalUnit):
5128 """Modifies the parameters of a node.
5130 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5131 to the node role (as _ROLE_*)
5132 @cvar _R2F: a dictionary from node role to tuples of flags
5133 @cvar _FLAGS: a list of attribute names corresponding to the flags
5136 HPATH = "node-modify"
5137 HTYPE = constants.HTYPE_NODE
5139 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5141 (True, False, False): _ROLE_CANDIDATE,
5142 (False, True, False): _ROLE_DRAINED,
5143 (False, False, True): _ROLE_OFFLINE,
5144 (False, False, False): _ROLE_REGULAR,
5146 _R2F = dict((v, k) for k, v in _F2R.items())
5147 _FLAGS = ["master_candidate", "drained", "offline"]
5149 def CheckArguments(self):
5150 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5151 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5152 self.op.master_capable, self.op.vm_capable,
5153 self.op.secondary_ip, self.op.ndparams]
5154 if all_mods.count(None) == len(all_mods):
5155 raise errors.OpPrereqError("Please pass at least one modification",
5157 if all_mods.count(True) > 1:
5158 raise errors.OpPrereqError("Can't set the node into more than one"
5159 " state at the same time",
5162 # Boolean value that tells us whether we might be demoting from MC
5163 self.might_demote = (self.op.master_candidate == False or
5164 self.op.offline == True or
5165 self.op.drained == True or
5166 self.op.master_capable == False)
5168 if self.op.secondary_ip:
5169 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5170 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5171 " address" % self.op.secondary_ip,
5174 self.lock_all = self.op.auto_promote and self.might_demote
5175 self.lock_instances = self.op.secondary_ip is not None
5177 def ExpandNames(self):
5179 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5181 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5183 if self.lock_instances:
5184 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5186 def DeclareLocks(self, level):
5187 # If we have locked all instances, before waiting to lock nodes, release
5188 # all the ones living on nodes unrelated to the current operation.
5189 if level == locking.LEVEL_NODE and self.lock_instances:
5190 self.affected_instances = []
5191 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5194 # Build list of instances to release
5195 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5196 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5197 if (instance.disk_template in constants.DTS_INT_MIRROR and
5198 self.op.node_name in instance.all_nodes):
5199 instances_keep.append(instance_name)
5200 self.affected_instances.append(instance)
5202 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5204 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5205 set(instances_keep))
5207 def BuildHooksEnv(self):
5210 This runs on the master node.
5214 "OP_TARGET": self.op.node_name,
5215 "MASTER_CANDIDATE": str(self.op.master_candidate),
5216 "OFFLINE": str(self.op.offline),
5217 "DRAINED": str(self.op.drained),
5218 "MASTER_CAPABLE": str(self.op.master_capable),
5219 "VM_CAPABLE": str(self.op.vm_capable),
5222 def BuildHooksNodes(self):
5223 """Build hooks nodes.
5226 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5229 def CheckPrereq(self):
5230 """Check prerequisites.
5232 This only checks the instance list against the existing names.
5235 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5237 if (self.op.master_candidate is not None or
5238 self.op.drained is not None or
5239 self.op.offline is not None):
5240 # we can't change the master's node flags
5241 if self.op.node_name == self.cfg.GetMasterNode():
5242 raise errors.OpPrereqError("The master role can be changed"
5243 " only via master-failover",
5246 if self.op.master_candidate and not node.master_capable:
5247 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5248 " it a master candidate" % node.name,
5251 if self.op.vm_capable == False:
5252 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5254 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5255 " the vm_capable flag" % node.name,
5258 if node.master_candidate and self.might_demote and not self.lock_all:
5259 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5260 # check if after removing the current node, we're missing master
5262 (mc_remaining, mc_should, _) = \
5263 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5264 if mc_remaining < mc_should:
5265 raise errors.OpPrereqError("Not enough master candidates, please"
5266 " pass auto promote option to allow"
5267 " promotion", errors.ECODE_STATE)
5269 self.old_flags = old_flags = (node.master_candidate,
5270 node.drained, node.offline)
5271 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5272 self.old_role = old_role = self._F2R[old_flags]
5274 # Check for ineffective changes
5275 for attr in self._FLAGS:
5276 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5277 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5278 setattr(self.op, attr, None)
5280 # Past this point, any flag change to False means a transition
5281 # away from the respective state, as only real changes are kept
5283 # TODO: We might query the real power state if it supports OOB
5284 if _SupportsOob(self.cfg, node):
5285 if self.op.offline is False and not (node.powered or
5286 self.op.powered == True):
5287 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5288 " offline status can be reset") %
5290 elif self.op.powered is not None:
5291 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5292 " as it does not support out-of-band"
5293 " handling") % self.op.node_name)
5295 # If we're being deofflined/drained, we'll MC ourself if needed
5296 if (self.op.drained == False or self.op.offline == False or
5297 (self.op.master_capable and not node.master_capable)):
5298 if _DecideSelfPromotion(self):
5299 self.op.master_candidate = True
5300 self.LogInfo("Auto-promoting node to master candidate")
5302 # If we're no longer master capable, we'll demote ourselves from MC
5303 if self.op.master_capable == False and node.master_candidate:
5304 self.LogInfo("Demoting from master candidate")
5305 self.op.master_candidate = False
5308 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5309 if self.op.master_candidate:
5310 new_role = self._ROLE_CANDIDATE
5311 elif self.op.drained:
5312 new_role = self._ROLE_DRAINED
5313 elif self.op.offline:
5314 new_role = self._ROLE_OFFLINE
5315 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5316 # False is still in new flags, which means we're un-setting (the
5318 new_role = self._ROLE_REGULAR
5319 else: # no new flags, nothing, keep old role
5322 self.new_role = new_role
5324 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5325 # Trying to transition out of offline status
5326 result = self.rpc.call_version([node.name])[node.name]
5328 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5329 " to report its version: %s" %
5330 (node.name, result.fail_msg),
5333 self.LogWarning("Transitioning node from offline to online state"
5334 " without using re-add. Please make sure the node"
5337 if self.op.secondary_ip:
5338 # Ok even without locking, because this can't be changed by any LU
5339 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5340 master_singlehomed = master.secondary_ip == master.primary_ip
5341 if master_singlehomed and self.op.secondary_ip:
5342 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5343 " homed cluster", errors.ECODE_INVAL)
5346 if self.affected_instances:
5347 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5348 " node has instances (%s) configured"
5349 " to use it" % self.affected_instances)
5351 # On online nodes, check that no instances are running, and that
5352 # the node has the new ip and we can reach it.
5353 for instance in self.affected_instances:
5354 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5356 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5357 if master.name != node.name:
5358 # check reachability from master secondary ip to new secondary ip
5359 if not netutils.TcpPing(self.op.secondary_ip,
5360 constants.DEFAULT_NODED_PORT,
5361 source=master.secondary_ip):
5362 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5363 " based ping to node daemon port",
5364 errors.ECODE_ENVIRON)
5366 if self.op.ndparams:
5367 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5368 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5369 self.new_ndparams = new_ndparams
5371 def Exec(self, feedback_fn):
5376 old_role = self.old_role
5377 new_role = self.new_role
5381 if self.op.ndparams:
5382 node.ndparams = self.new_ndparams
5384 if self.op.powered is not None:
5385 node.powered = self.op.powered
5387 for attr in ["master_capable", "vm_capable"]:
5388 val = getattr(self.op, attr)
5390 setattr(node, attr, val)
5391 result.append((attr, str(val)))
5393 if new_role != old_role:
5394 # Tell the node to demote itself, if no longer MC and not offline
5395 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5396 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5398 self.LogWarning("Node failed to demote itself: %s", msg)
5400 new_flags = self._R2F[new_role]
5401 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5403 result.append((desc, str(nf)))
5404 (node.master_candidate, node.drained, node.offline) = new_flags
5406 # we locked all nodes, we adjust the CP before updating this node
5408 _AdjustCandidatePool(self, [node.name])
5410 if self.op.secondary_ip:
5411 node.secondary_ip = self.op.secondary_ip
5412 result.append(("secondary_ip", self.op.secondary_ip))
5414 # this will trigger configuration file update, if needed
5415 self.cfg.Update(node, feedback_fn)
5417 # this will trigger job queue propagation or cleanup if the mc
5419 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5420 self.context.ReaddNode(node)
5425 class LUNodePowercycle(NoHooksLU):
5426 """Powercycles a node.
5431 def CheckArguments(self):
5432 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5433 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5434 raise errors.OpPrereqError("The node is the master and the force"
5435 " parameter was not set",
5438 def ExpandNames(self):
5439 """Locking for PowercycleNode.
5441 This is a last-resort option and shouldn't block on other
5442 jobs. Therefore, we grab no locks.
5445 self.needed_locks = {}
5447 def Exec(self, feedback_fn):
5451 result = self.rpc.call_node_powercycle(self.op.node_name,
5452 self.cfg.GetHypervisorType())
5453 result.Raise("Failed to schedule the reboot")
5454 return result.payload
5457 class LUClusterQuery(NoHooksLU):
5458 """Query cluster configuration.
5463 def ExpandNames(self):
5464 self.needed_locks = {}
5466 def Exec(self, feedback_fn):
5467 """Return cluster config.
5470 cluster = self.cfg.GetClusterInfo()
5473 # Filter just for enabled hypervisors
5474 for os_name, hv_dict in cluster.os_hvp.items():
5475 os_hvp[os_name] = {}
5476 for hv_name, hv_params in hv_dict.items():
5477 if hv_name in cluster.enabled_hypervisors:
5478 os_hvp[os_name][hv_name] = hv_params
5480 # Convert ip_family to ip_version
5481 primary_ip_version = constants.IP4_VERSION
5482 if cluster.primary_ip_family == netutils.IP6Address.family:
5483 primary_ip_version = constants.IP6_VERSION
5486 "software_version": constants.RELEASE_VERSION,
5487 "protocol_version": constants.PROTOCOL_VERSION,
5488 "config_version": constants.CONFIG_VERSION,
5489 "os_api_version": max(constants.OS_API_VERSIONS),
5490 "export_version": constants.EXPORT_VERSION,
5491 "architecture": (platform.architecture()[0], platform.machine()),
5492 "name": cluster.cluster_name,
5493 "master": cluster.master_node,
5494 "default_hypervisor": cluster.enabled_hypervisors[0],
5495 "enabled_hypervisors": cluster.enabled_hypervisors,
5496 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5497 for hypervisor_name in cluster.enabled_hypervisors]),
5499 "beparams": cluster.beparams,
5500 "osparams": cluster.osparams,
5501 "nicparams": cluster.nicparams,
5502 "ndparams": cluster.ndparams,
5503 "candidate_pool_size": cluster.candidate_pool_size,
5504 "master_netdev": cluster.master_netdev,
5505 "volume_group_name": cluster.volume_group_name,
5506 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5507 "file_storage_dir": cluster.file_storage_dir,
5508 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5509 "maintain_node_health": cluster.maintain_node_health,
5510 "ctime": cluster.ctime,
5511 "mtime": cluster.mtime,
5512 "uuid": cluster.uuid,
5513 "tags": list(cluster.GetTags()),
5514 "uid_pool": cluster.uid_pool,
5515 "default_iallocator": cluster.default_iallocator,
5516 "reserved_lvs": cluster.reserved_lvs,
5517 "primary_ip_version": primary_ip_version,
5518 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5519 "hidden_os": cluster.hidden_os,
5520 "blacklisted_os": cluster.blacklisted_os,
5526 class LUClusterConfigQuery(NoHooksLU):
5527 """Return configuration values.
5531 _FIELDS_DYNAMIC = utils.FieldSet()
5532 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5533 "watcher_pause", "volume_group_name")
5535 def CheckArguments(self):
5536 _CheckOutputFields(static=self._FIELDS_STATIC,
5537 dynamic=self._FIELDS_DYNAMIC,
5538 selected=self.op.output_fields)
5540 def ExpandNames(self):
5541 self.needed_locks = {}
5543 def Exec(self, feedback_fn):
5544 """Dump a representation of the cluster config to the standard output.
5548 for field in self.op.output_fields:
5549 if field == "cluster_name":
5550 entry = self.cfg.GetClusterName()
5551 elif field == "master_node":
5552 entry = self.cfg.GetMasterNode()
5553 elif field == "drain_flag":
5554 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5555 elif field == "watcher_pause":
5556 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5557 elif field == "volume_group_name":
5558 entry = self.cfg.GetVGName()
5560 raise errors.ParameterError(field)
5561 values.append(entry)
5565 class LUInstanceActivateDisks(NoHooksLU):
5566 """Bring up an instance's disks.
5571 def ExpandNames(self):
5572 self._ExpandAndLockInstance()
5573 self.needed_locks[locking.LEVEL_NODE] = []
5574 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5576 def DeclareLocks(self, level):
5577 if level == locking.LEVEL_NODE:
5578 self._LockInstancesNodes()
5580 def CheckPrereq(self):
5581 """Check prerequisites.
5583 This checks that the instance is in the cluster.
5586 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5587 assert self.instance is not None, \
5588 "Cannot retrieve locked instance %s" % self.op.instance_name
5589 _CheckNodeOnline(self, self.instance.primary_node)
5591 def Exec(self, feedback_fn):
5592 """Activate the disks.
5595 disks_ok, disks_info = \
5596 _AssembleInstanceDisks(self, self.instance,
5597 ignore_size=self.op.ignore_size)
5599 raise errors.OpExecError("Cannot activate block devices")
5604 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5606 """Prepare the block devices for an instance.
5608 This sets up the block devices on all nodes.
5610 @type lu: L{LogicalUnit}
5611 @param lu: the logical unit on whose behalf we execute
5612 @type instance: L{objects.Instance}
5613 @param instance: the instance for whose disks we assemble
5614 @type disks: list of L{objects.Disk} or None
5615 @param disks: which disks to assemble (or all, if None)
5616 @type ignore_secondaries: boolean
5617 @param ignore_secondaries: if true, errors on secondary nodes
5618 won't result in an error return from the function
5619 @type ignore_size: boolean
5620 @param ignore_size: if true, the current known size of the disk
5621 will not be used during the disk activation, useful for cases
5622 when the size is wrong
5623 @return: False if the operation failed, otherwise a list of
5624 (host, instance_visible_name, node_visible_name)
5625 with the mapping from node devices to instance devices
5630 iname = instance.name
5631 disks = _ExpandCheckDisks(instance, disks)
5633 # With the two passes mechanism we try to reduce the window of
5634 # opportunity for the race condition of switching DRBD to primary
5635 # before handshaking occured, but we do not eliminate it
5637 # The proper fix would be to wait (with some limits) until the
5638 # connection has been made and drbd transitions from WFConnection
5639 # into any other network-connected state (Connected, SyncTarget,
5642 # 1st pass, assemble on all nodes in secondary mode
5643 for idx, inst_disk in enumerate(disks):
5644 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5646 node_disk = node_disk.Copy()
5647 node_disk.UnsetSize()
5648 lu.cfg.SetDiskID(node_disk, node)
5649 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5650 msg = result.fail_msg
5652 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5653 " (is_primary=False, pass=1): %s",
5654 inst_disk.iv_name, node, msg)
5655 if not ignore_secondaries:
5658 # FIXME: race condition on drbd migration to primary
5660 # 2nd pass, do only the primary node
5661 for idx, inst_disk in enumerate(disks):
5664 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5665 if node != instance.primary_node:
5668 node_disk = node_disk.Copy()
5669 node_disk.UnsetSize()
5670 lu.cfg.SetDiskID(node_disk, node)
5671 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5672 msg = result.fail_msg
5674 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5675 " (is_primary=True, pass=2): %s",
5676 inst_disk.iv_name, node, msg)
5679 dev_path = result.payload
5681 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5683 # leave the disks configured for the primary node
5684 # this is a workaround that would be fixed better by
5685 # improving the logical/physical id handling
5687 lu.cfg.SetDiskID(disk, instance.primary_node)
5689 return disks_ok, device_info
5692 def _StartInstanceDisks(lu, instance, force):
5693 """Start the disks of an instance.
5696 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5697 ignore_secondaries=force)
5699 _ShutdownInstanceDisks(lu, instance)
5700 if force is not None and not force:
5701 lu.proc.LogWarning("", hint="If the message above refers to a"
5703 " you can retry the operation using '--force'.")
5704 raise errors.OpExecError("Disk consistency error")
5707 class LUInstanceDeactivateDisks(NoHooksLU):
5708 """Shutdown an instance's disks.
5713 def ExpandNames(self):
5714 self._ExpandAndLockInstance()
5715 self.needed_locks[locking.LEVEL_NODE] = []
5716 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5718 def DeclareLocks(self, level):
5719 if level == locking.LEVEL_NODE:
5720 self._LockInstancesNodes()
5722 def CheckPrereq(self):
5723 """Check prerequisites.
5725 This checks that the instance is in the cluster.
5728 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5729 assert self.instance is not None, \
5730 "Cannot retrieve locked instance %s" % self.op.instance_name
5732 def Exec(self, feedback_fn):
5733 """Deactivate the disks
5736 instance = self.instance
5738 _ShutdownInstanceDisks(self, instance)
5740 _SafeShutdownInstanceDisks(self, instance)
5743 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5744 """Shutdown block devices of an instance.
5746 This function checks if an instance is running, before calling
5747 _ShutdownInstanceDisks.
5750 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5751 _ShutdownInstanceDisks(lu, instance, disks=disks)
5754 def _ExpandCheckDisks(instance, disks):
5755 """Return the instance disks selected by the disks list
5757 @type disks: list of L{objects.Disk} or None
5758 @param disks: selected disks
5759 @rtype: list of L{objects.Disk}
5760 @return: selected instance disks to act on
5764 return instance.disks
5766 if not set(disks).issubset(instance.disks):
5767 raise errors.ProgrammerError("Can only act on disks belonging to the"
5772 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5773 """Shutdown block devices of an instance.
5775 This does the shutdown on all nodes of the instance.
5777 If the ignore_primary is false, errors on the primary node are
5782 disks = _ExpandCheckDisks(instance, disks)
5785 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5786 lu.cfg.SetDiskID(top_disk, node)
5787 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5788 msg = result.fail_msg
5790 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5791 disk.iv_name, node, msg)
5792 if ((node == instance.primary_node and not ignore_primary) or
5793 (node != instance.primary_node and not result.offline)):
5798 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5799 """Checks if a node has enough free memory.
5801 This function check if a given node has the needed amount of free
5802 memory. In case the node has less memory or we cannot get the
5803 information from the node, this function raise an OpPrereqError
5806 @type lu: C{LogicalUnit}
5807 @param lu: a logical unit from which we get configuration data
5809 @param node: the node to check
5810 @type reason: C{str}
5811 @param reason: string to use in the error message
5812 @type requested: C{int}
5813 @param requested: the amount of memory in MiB to check for
5814 @type hypervisor_name: C{str}
5815 @param hypervisor_name: the hypervisor to ask for memory stats
5816 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5817 we cannot check the node
5820 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5821 nodeinfo[node].Raise("Can't get data from node %s" % node,
5822 prereq=True, ecode=errors.ECODE_ENVIRON)
5823 free_mem = nodeinfo[node].payload.get("memory_free", None)
5824 if not isinstance(free_mem, int):
5825 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5826 " was '%s'" % (node, free_mem),
5827 errors.ECODE_ENVIRON)
5828 if requested > free_mem:
5829 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5830 " needed %s MiB, available %s MiB" %
5831 (node, reason, requested, free_mem),
5835 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5836 """Checks if nodes have enough free disk space in the all VGs.
5838 This function check if all given nodes have the needed amount of
5839 free disk. In case any node has less disk or we cannot get the
5840 information from the node, this function raise an OpPrereqError
5843 @type lu: C{LogicalUnit}
5844 @param lu: a logical unit from which we get configuration data
5845 @type nodenames: C{list}
5846 @param nodenames: the list of node names to check
5847 @type req_sizes: C{dict}
5848 @param req_sizes: the hash of vg and corresponding amount of disk in
5850 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5851 or we cannot check the node
5854 for vg, req_size in req_sizes.items():
5855 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5858 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5859 """Checks if nodes have enough free disk space in the specified VG.
5861 This function check if all given nodes have the needed amount of
5862 free disk. In case any node has less disk or we cannot get the
5863 information from the node, this function raise an OpPrereqError
5866 @type lu: C{LogicalUnit}
5867 @param lu: a logical unit from which we get configuration data
5868 @type nodenames: C{list}
5869 @param nodenames: the list of node names to check
5871 @param vg: the volume group to check
5872 @type requested: C{int}
5873 @param requested: the amount of disk in MiB to check for
5874 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5875 or we cannot check the node
5878 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5879 for node in nodenames:
5880 info = nodeinfo[node]
5881 info.Raise("Cannot get current information from node %s" % node,
5882 prereq=True, ecode=errors.ECODE_ENVIRON)
5883 vg_free = info.payload.get("vg_free", None)
5884 if not isinstance(vg_free, int):
5885 raise errors.OpPrereqError("Can't compute free disk space on node"
5886 " %s for vg %s, result was '%s'" %
5887 (node, vg, vg_free), errors.ECODE_ENVIRON)
5888 if requested > vg_free:
5889 raise errors.OpPrereqError("Not enough disk space on target node %s"
5890 " vg %s: required %d MiB, available %d MiB" %
5891 (node, vg, requested, vg_free),
5895 class LUInstanceStartup(LogicalUnit):
5896 """Starts an instance.
5899 HPATH = "instance-start"
5900 HTYPE = constants.HTYPE_INSTANCE
5903 def CheckArguments(self):
5905 if self.op.beparams:
5906 # fill the beparams dict
5907 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5909 def ExpandNames(self):
5910 self._ExpandAndLockInstance()
5912 def BuildHooksEnv(self):
5915 This runs on master, primary and secondary nodes of the instance.
5919 "FORCE": self.op.force,
5922 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5926 def BuildHooksNodes(self):
5927 """Build hooks nodes.
5930 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5933 def CheckPrereq(self):
5934 """Check prerequisites.
5936 This checks that the instance is in the cluster.
5939 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5940 assert self.instance is not None, \
5941 "Cannot retrieve locked instance %s" % self.op.instance_name
5944 if self.op.hvparams:
5945 # check hypervisor parameter syntax (locally)
5946 cluster = self.cfg.GetClusterInfo()
5947 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5948 filled_hvp = cluster.FillHV(instance)
5949 filled_hvp.update(self.op.hvparams)
5950 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5951 hv_type.CheckParameterSyntax(filled_hvp)
5952 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5954 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5956 if self.primary_offline and self.op.ignore_offline_nodes:
5957 self.proc.LogWarning("Ignoring offline primary node")
5959 if self.op.hvparams or self.op.beparams:
5960 self.proc.LogWarning("Overridden parameters are ignored")
5962 _CheckNodeOnline(self, instance.primary_node)
5964 bep = self.cfg.GetClusterInfo().FillBE(instance)
5966 # check bridges existence
5967 _CheckInstanceBridgesExist(self, instance)
5969 remote_info = self.rpc.call_instance_info(instance.primary_node,
5971 instance.hypervisor)
5972 remote_info.Raise("Error checking node %s" % instance.primary_node,
5973 prereq=True, ecode=errors.ECODE_ENVIRON)
5974 if not remote_info.payload: # not running already
5975 _CheckNodeFreeMemory(self, instance.primary_node,
5976 "starting instance %s" % instance.name,
5977 bep[constants.BE_MEMORY], instance.hypervisor)
5979 def Exec(self, feedback_fn):
5980 """Start the instance.
5983 instance = self.instance
5984 force = self.op.force
5986 if not self.op.no_remember:
5987 self.cfg.MarkInstanceUp(instance.name)
5989 if self.primary_offline:
5990 assert self.op.ignore_offline_nodes
5991 self.proc.LogInfo("Primary node offline, marked instance as started")
5993 node_current = instance.primary_node
5995 _StartInstanceDisks(self, instance, force)
5997 result = self.rpc.call_instance_start(node_current, instance,
5998 self.op.hvparams, self.op.beparams,
5999 self.op.startup_paused)
6000 msg = result.fail_msg
6002 _ShutdownInstanceDisks(self, instance)
6003 raise errors.OpExecError("Could not start instance: %s" % msg)
6006 class LUInstanceReboot(LogicalUnit):
6007 """Reboot an instance.
6010 HPATH = "instance-reboot"
6011 HTYPE = constants.HTYPE_INSTANCE
6014 def ExpandNames(self):
6015 self._ExpandAndLockInstance()
6017 def BuildHooksEnv(self):
6020 This runs on master, primary and secondary nodes of the instance.
6024 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6025 "REBOOT_TYPE": self.op.reboot_type,
6026 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6029 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6033 def BuildHooksNodes(self):
6034 """Build hooks nodes.
6037 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6040 def CheckPrereq(self):
6041 """Check prerequisites.
6043 This checks that the instance is in the cluster.
6046 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6047 assert self.instance is not None, \
6048 "Cannot retrieve locked instance %s" % self.op.instance_name
6050 _CheckNodeOnline(self, instance.primary_node)
6052 # check bridges existence
6053 _CheckInstanceBridgesExist(self, instance)
6055 def Exec(self, feedback_fn):
6056 """Reboot the instance.
6059 instance = self.instance
6060 ignore_secondaries = self.op.ignore_secondaries
6061 reboot_type = self.op.reboot_type
6063 remote_info = self.rpc.call_instance_info(instance.primary_node,
6065 instance.hypervisor)
6066 remote_info.Raise("Error checking node %s" % instance.primary_node)
6067 instance_running = bool(remote_info.payload)
6069 node_current = instance.primary_node
6071 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6072 constants.INSTANCE_REBOOT_HARD]:
6073 for disk in instance.disks:
6074 self.cfg.SetDiskID(disk, node_current)
6075 result = self.rpc.call_instance_reboot(node_current, instance,
6077 self.op.shutdown_timeout)
6078 result.Raise("Could not reboot instance")
6080 if instance_running:
6081 result = self.rpc.call_instance_shutdown(node_current, instance,
6082 self.op.shutdown_timeout)
6083 result.Raise("Could not shutdown instance for full reboot")
6084 _ShutdownInstanceDisks(self, instance)
6086 self.LogInfo("Instance %s was already stopped, starting now",
6088 _StartInstanceDisks(self, instance, ignore_secondaries)
6089 result = self.rpc.call_instance_start(node_current, instance,
6091 msg = result.fail_msg
6093 _ShutdownInstanceDisks(self, instance)
6094 raise errors.OpExecError("Could not start instance for"
6095 " full reboot: %s" % msg)
6097 self.cfg.MarkInstanceUp(instance.name)
6100 class LUInstanceShutdown(LogicalUnit):
6101 """Shutdown an instance.
6104 HPATH = "instance-stop"
6105 HTYPE = constants.HTYPE_INSTANCE
6108 def ExpandNames(self):
6109 self._ExpandAndLockInstance()
6111 def BuildHooksEnv(self):
6114 This runs on master, primary and secondary nodes of the instance.
6117 env = _BuildInstanceHookEnvByObject(self, self.instance)
6118 env["TIMEOUT"] = self.op.timeout
6121 def BuildHooksNodes(self):
6122 """Build hooks nodes.
6125 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6128 def CheckPrereq(self):
6129 """Check prerequisites.
6131 This checks that the instance is in the cluster.
6134 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6135 assert self.instance is not None, \
6136 "Cannot retrieve locked instance %s" % self.op.instance_name
6138 self.primary_offline = \
6139 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6141 if self.primary_offline and self.op.ignore_offline_nodes:
6142 self.proc.LogWarning("Ignoring offline primary node")
6144 _CheckNodeOnline(self, self.instance.primary_node)
6146 def Exec(self, feedback_fn):
6147 """Shutdown the instance.
6150 instance = self.instance
6151 node_current = instance.primary_node
6152 timeout = self.op.timeout
6154 if not self.op.no_remember:
6155 self.cfg.MarkInstanceDown(instance.name)
6157 if self.primary_offline:
6158 assert self.op.ignore_offline_nodes
6159 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6161 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6162 msg = result.fail_msg
6164 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6166 _ShutdownInstanceDisks(self, instance)
6169 class LUInstanceReinstall(LogicalUnit):
6170 """Reinstall an instance.
6173 HPATH = "instance-reinstall"
6174 HTYPE = constants.HTYPE_INSTANCE
6177 def ExpandNames(self):
6178 self._ExpandAndLockInstance()
6180 def BuildHooksEnv(self):
6183 This runs on master, primary and secondary nodes of the instance.
6186 return _BuildInstanceHookEnvByObject(self, self.instance)
6188 def BuildHooksNodes(self):
6189 """Build hooks nodes.
6192 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6195 def CheckPrereq(self):
6196 """Check prerequisites.
6198 This checks that the instance is in the cluster and is not running.
6201 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6202 assert instance is not None, \
6203 "Cannot retrieve locked instance %s" % self.op.instance_name
6204 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6205 " offline, cannot reinstall")
6206 for node in instance.secondary_nodes:
6207 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6208 " cannot reinstall")
6210 if instance.disk_template == constants.DT_DISKLESS:
6211 raise errors.OpPrereqError("Instance '%s' has no disks" %
6212 self.op.instance_name,
6214 _CheckInstanceDown(self, instance, "cannot reinstall")
6216 if self.op.os_type is not None:
6218 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6219 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6220 instance_os = self.op.os_type
6222 instance_os = instance.os
6224 nodelist = list(instance.all_nodes)
6226 if self.op.osparams:
6227 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6228 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6229 self.os_inst = i_osdict # the new dict (without defaults)
6233 self.instance = instance
6235 def Exec(self, feedback_fn):
6236 """Reinstall the instance.
6239 inst = self.instance
6241 if self.op.os_type is not None:
6242 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6243 inst.os = self.op.os_type
6244 # Write to configuration
6245 self.cfg.Update(inst, feedback_fn)
6247 _StartInstanceDisks(self, inst, None)
6249 feedback_fn("Running the instance OS create scripts...")
6250 # FIXME: pass debug option from opcode to backend
6251 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6252 self.op.debug_level,
6253 osparams=self.os_inst)
6254 result.Raise("Could not install OS for instance %s on node %s" %
6255 (inst.name, inst.primary_node))
6257 _ShutdownInstanceDisks(self, inst)
6260 class LUInstanceRecreateDisks(LogicalUnit):
6261 """Recreate an instance's missing disks.
6264 HPATH = "instance-recreate-disks"
6265 HTYPE = constants.HTYPE_INSTANCE
6268 def CheckArguments(self):
6269 # normalise the disk list
6270 self.op.disks = sorted(frozenset(self.op.disks))
6272 def ExpandNames(self):
6273 self._ExpandAndLockInstance()
6274 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6276 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6277 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6279 self.needed_locks[locking.LEVEL_NODE] = []
6281 def DeclareLocks(self, level):
6282 if level == locking.LEVEL_NODE:
6283 # if we replace the nodes, we only need to lock the old primary,
6284 # otherwise we need to lock all nodes for disk re-creation
6285 primary_only = bool(self.op.nodes)
6286 self._LockInstancesNodes(primary_only=primary_only)
6288 def BuildHooksEnv(self):
6291 This runs on master, primary and secondary nodes of the instance.
6294 return _BuildInstanceHookEnvByObject(self, self.instance)
6296 def BuildHooksNodes(self):
6297 """Build hooks nodes.
6300 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6303 def CheckPrereq(self):
6304 """Check prerequisites.
6306 This checks that the instance is in the cluster and is not running.
6309 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6310 assert instance is not None, \
6311 "Cannot retrieve locked instance %s" % self.op.instance_name
6313 if len(self.op.nodes) != len(instance.all_nodes):
6314 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6315 " %d replacement nodes were specified" %
6316 (instance.name, len(instance.all_nodes),
6317 len(self.op.nodes)),
6319 assert instance.disk_template != constants.DT_DRBD8 or \
6320 len(self.op.nodes) == 2
6321 assert instance.disk_template != constants.DT_PLAIN or \
6322 len(self.op.nodes) == 1
6323 primary_node = self.op.nodes[0]
6325 primary_node = instance.primary_node
6326 _CheckNodeOnline(self, primary_node)
6328 if instance.disk_template == constants.DT_DISKLESS:
6329 raise errors.OpPrereqError("Instance '%s' has no disks" %
6330 self.op.instance_name, errors.ECODE_INVAL)
6331 # if we replace nodes *and* the old primary is offline, we don't
6333 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6334 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6335 if not (self.op.nodes and old_pnode.offline):
6336 _CheckInstanceDown(self, instance, "cannot recreate disks")
6338 if not self.op.disks:
6339 self.op.disks = range(len(instance.disks))
6341 for idx in self.op.disks:
6342 if idx >= len(instance.disks):
6343 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6345 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6346 raise errors.OpPrereqError("Can't recreate disks partially and"
6347 " change the nodes at the same time",
6349 self.instance = instance
6351 def Exec(self, feedback_fn):
6352 """Recreate the disks.
6355 instance = self.instance
6358 mods = [] # keeps track of needed logical_id changes
6360 for idx, disk in enumerate(instance.disks):
6361 if idx not in self.op.disks: # disk idx has not been passed in
6364 # update secondaries for disks, if needed
6366 if disk.dev_type == constants.LD_DRBD8:
6367 # need to update the nodes and minors
6368 assert len(self.op.nodes) == 2
6369 assert len(disk.logical_id) == 6 # otherwise disk internals
6371 (_, _, old_port, _, _, old_secret) = disk.logical_id
6372 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6373 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6374 new_minors[0], new_minors[1], old_secret)
6375 assert len(disk.logical_id) == len(new_id)
6376 mods.append((idx, new_id))
6378 # now that we have passed all asserts above, we can apply the mods
6379 # in a single run (to avoid partial changes)
6380 for idx, new_id in mods:
6381 instance.disks[idx].logical_id = new_id
6383 # change primary node, if needed
6385 instance.primary_node = self.op.nodes[0]
6386 self.LogWarning("Changing the instance's nodes, you will have to"
6387 " remove any disks left on the older nodes manually")
6390 self.cfg.Update(instance, feedback_fn)
6392 _CreateDisks(self, instance, to_skip=to_skip)
6395 class LUInstanceRename(LogicalUnit):
6396 """Rename an instance.
6399 HPATH = "instance-rename"
6400 HTYPE = constants.HTYPE_INSTANCE
6402 def CheckArguments(self):
6406 if self.op.ip_check and not self.op.name_check:
6407 # TODO: make the ip check more flexible and not depend on the name check
6408 raise errors.OpPrereqError("IP address check requires a name check",
6411 def BuildHooksEnv(self):
6414 This runs on master, primary and secondary nodes of the instance.
6417 env = _BuildInstanceHookEnvByObject(self, self.instance)
6418 env["INSTANCE_NEW_NAME"] = self.op.new_name
6421 def BuildHooksNodes(self):
6422 """Build hooks nodes.
6425 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6428 def CheckPrereq(self):
6429 """Check prerequisites.
6431 This checks that the instance is in the cluster and is not running.
6434 self.op.instance_name = _ExpandInstanceName(self.cfg,
6435 self.op.instance_name)
6436 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6437 assert instance is not None
6438 _CheckNodeOnline(self, instance.primary_node)
6439 _CheckInstanceDown(self, instance, "cannot rename")
6440 self.instance = instance
6442 new_name = self.op.new_name
6443 if self.op.name_check:
6444 hostname = netutils.GetHostname(name=new_name)
6445 if hostname.name != new_name:
6446 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6448 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6449 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6450 " same as given hostname '%s'") %
6451 (hostname.name, self.op.new_name),
6453 new_name = self.op.new_name = hostname.name
6454 if (self.op.ip_check and
6455 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6456 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6457 (hostname.ip, new_name),
6458 errors.ECODE_NOTUNIQUE)
6460 instance_list = self.cfg.GetInstanceList()
6461 if new_name in instance_list and new_name != instance.name:
6462 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6463 new_name, errors.ECODE_EXISTS)
6465 def Exec(self, feedback_fn):
6466 """Rename the instance.
6469 inst = self.instance
6470 old_name = inst.name
6472 rename_file_storage = False
6473 if (inst.disk_template in constants.DTS_FILEBASED and
6474 self.op.new_name != inst.name):
6475 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6476 rename_file_storage = True
6478 self.cfg.RenameInstance(inst.name, self.op.new_name)
6479 # Change the instance lock. This is definitely safe while we hold the BGL.
6480 # Otherwise the new lock would have to be added in acquired mode.
6482 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6483 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6485 # re-read the instance from the configuration after rename
6486 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6488 if rename_file_storage:
6489 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6490 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6491 old_file_storage_dir,
6492 new_file_storage_dir)
6493 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6494 " (but the instance has been renamed in Ganeti)" %
6495 (inst.primary_node, old_file_storage_dir,
6496 new_file_storage_dir))
6498 _StartInstanceDisks(self, inst, None)
6500 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6501 old_name, self.op.debug_level)
6502 msg = result.fail_msg
6504 msg = ("Could not run OS rename script for instance %s on node %s"
6505 " (but the instance has been renamed in Ganeti): %s" %
6506 (inst.name, inst.primary_node, msg))
6507 self.proc.LogWarning(msg)
6509 _ShutdownInstanceDisks(self, inst)
6514 class LUInstanceRemove(LogicalUnit):
6515 """Remove an instance.
6518 HPATH = "instance-remove"
6519 HTYPE = constants.HTYPE_INSTANCE
6522 def ExpandNames(self):
6523 self._ExpandAndLockInstance()
6524 self.needed_locks[locking.LEVEL_NODE] = []
6525 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6527 def DeclareLocks(self, level):
6528 if level == locking.LEVEL_NODE:
6529 self._LockInstancesNodes()
6531 def BuildHooksEnv(self):
6534 This runs on master, primary and secondary nodes of the instance.
6537 env = _BuildInstanceHookEnvByObject(self, self.instance)
6538 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6541 def BuildHooksNodes(self):
6542 """Build hooks nodes.
6545 nl = [self.cfg.GetMasterNode()]
6546 nl_post = list(self.instance.all_nodes) + nl
6547 return (nl, nl_post)
6549 def CheckPrereq(self):
6550 """Check prerequisites.
6552 This checks that the instance is in the cluster.
6555 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6556 assert self.instance is not None, \
6557 "Cannot retrieve locked instance %s" % self.op.instance_name
6559 def Exec(self, feedback_fn):
6560 """Remove the instance.
6563 instance = self.instance
6564 logging.info("Shutting down instance %s on node %s",
6565 instance.name, instance.primary_node)
6567 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6568 self.op.shutdown_timeout)
6569 msg = result.fail_msg
6571 if self.op.ignore_failures:
6572 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6574 raise errors.OpExecError("Could not shutdown instance %s on"
6576 (instance.name, instance.primary_node, msg))
6578 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6581 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6582 """Utility function to remove an instance.
6585 logging.info("Removing block devices for instance %s", instance.name)
6587 if not _RemoveDisks(lu, instance):
6588 if not ignore_failures:
6589 raise errors.OpExecError("Can't remove instance's disks")
6590 feedback_fn("Warning: can't remove instance's disks")
6592 logging.info("Removing instance %s out of cluster config", instance.name)
6594 lu.cfg.RemoveInstance(instance.name)
6596 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6597 "Instance lock removal conflict"
6599 # Remove lock for the instance
6600 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6603 class LUInstanceQuery(NoHooksLU):
6604 """Logical unit for querying instances.
6607 # pylint: disable=W0142
6610 def CheckArguments(self):
6611 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6612 self.op.output_fields, self.op.use_locking)
6614 def ExpandNames(self):
6615 self.iq.ExpandNames(self)
6617 def DeclareLocks(self, level):
6618 self.iq.DeclareLocks(self, level)
6620 def Exec(self, feedback_fn):
6621 return self.iq.OldStyleQuery(self)
6624 class LUInstanceFailover(LogicalUnit):
6625 """Failover an instance.
6628 HPATH = "instance-failover"
6629 HTYPE = constants.HTYPE_INSTANCE
6632 def CheckArguments(self):
6633 """Check the arguments.
6636 self.iallocator = getattr(self.op, "iallocator", None)
6637 self.target_node = getattr(self.op, "target_node", None)
6639 def ExpandNames(self):
6640 self._ExpandAndLockInstance()
6642 if self.op.target_node is not None:
6643 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6645 self.needed_locks[locking.LEVEL_NODE] = []
6646 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6648 ignore_consistency = self.op.ignore_consistency
6649 shutdown_timeout = self.op.shutdown_timeout
6650 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6653 ignore_consistency=ignore_consistency,
6654 shutdown_timeout=shutdown_timeout)
6655 self.tasklets = [self._migrater]
6657 def DeclareLocks(self, level):
6658 if level == locking.LEVEL_NODE:
6659 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6660 if instance.disk_template in constants.DTS_EXT_MIRROR:
6661 if self.op.target_node is None:
6662 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6664 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6665 self.op.target_node]
6666 del self.recalculate_locks[locking.LEVEL_NODE]
6668 self._LockInstancesNodes()
6670 def BuildHooksEnv(self):
6673 This runs on master, primary and secondary nodes of the instance.
6676 instance = self._migrater.instance
6677 source_node = instance.primary_node
6678 target_node = self.op.target_node
6680 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6681 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6682 "OLD_PRIMARY": source_node,
6683 "NEW_PRIMARY": target_node,
6686 if instance.disk_template in constants.DTS_INT_MIRROR:
6687 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6688 env["NEW_SECONDARY"] = source_node
6690 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6692 env.update(_BuildInstanceHookEnvByObject(self, instance))
6696 def BuildHooksNodes(self):
6697 """Build hooks nodes.
6700 instance = self._migrater.instance
6701 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6702 return (nl, nl + [instance.primary_node])
6705 class LUInstanceMigrate(LogicalUnit):
6706 """Migrate an instance.
6708 This is migration without shutting down, compared to the failover,
6709 which is done with shutdown.
6712 HPATH = "instance-migrate"
6713 HTYPE = constants.HTYPE_INSTANCE
6716 def ExpandNames(self):
6717 self._ExpandAndLockInstance()
6719 if self.op.target_node is not None:
6720 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6722 self.needed_locks[locking.LEVEL_NODE] = []
6723 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6725 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6726 cleanup=self.op.cleanup,
6728 fallback=self.op.allow_failover)
6729 self.tasklets = [self._migrater]
6731 def DeclareLocks(self, level):
6732 if level == locking.LEVEL_NODE:
6733 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6734 if instance.disk_template in constants.DTS_EXT_MIRROR:
6735 if self.op.target_node is None:
6736 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6738 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6739 self.op.target_node]
6740 del self.recalculate_locks[locking.LEVEL_NODE]
6742 self._LockInstancesNodes()
6744 def BuildHooksEnv(self):
6747 This runs on master, primary and secondary nodes of the instance.
6750 instance = self._migrater.instance
6751 source_node = instance.primary_node
6752 target_node = self.op.target_node
6753 env = _BuildInstanceHookEnvByObject(self, instance)
6755 "MIGRATE_LIVE": self._migrater.live,
6756 "MIGRATE_CLEANUP": self.op.cleanup,
6757 "OLD_PRIMARY": source_node,
6758 "NEW_PRIMARY": target_node,
6761 if instance.disk_template in constants.DTS_INT_MIRROR:
6762 env["OLD_SECONDARY"] = target_node
6763 env["NEW_SECONDARY"] = source_node
6765 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6769 def BuildHooksNodes(self):
6770 """Build hooks nodes.
6773 instance = self._migrater.instance
6774 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6775 return (nl, nl + [instance.primary_node])
6778 class LUInstanceMove(LogicalUnit):
6779 """Move an instance by data-copying.
6782 HPATH = "instance-move"
6783 HTYPE = constants.HTYPE_INSTANCE
6786 def ExpandNames(self):
6787 self._ExpandAndLockInstance()
6788 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6789 self.op.target_node = target_node
6790 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6791 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6793 def DeclareLocks(self, level):
6794 if level == locking.LEVEL_NODE:
6795 self._LockInstancesNodes(primary_only=True)
6797 def BuildHooksEnv(self):
6800 This runs on master, primary and secondary nodes of the instance.
6804 "TARGET_NODE": self.op.target_node,
6805 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6807 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6810 def BuildHooksNodes(self):
6811 """Build hooks nodes.
6815 self.cfg.GetMasterNode(),
6816 self.instance.primary_node,
6817 self.op.target_node,
6821 def CheckPrereq(self):
6822 """Check prerequisites.
6824 This checks that the instance is in the cluster.
6827 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6828 assert self.instance is not None, \
6829 "Cannot retrieve locked instance %s" % self.op.instance_name
6831 node = self.cfg.GetNodeInfo(self.op.target_node)
6832 assert node is not None, \
6833 "Cannot retrieve locked node %s" % self.op.target_node
6835 self.target_node = target_node = node.name
6837 if target_node == instance.primary_node:
6838 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6839 (instance.name, target_node),
6842 bep = self.cfg.GetClusterInfo().FillBE(instance)
6844 for idx, dsk in enumerate(instance.disks):
6845 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6846 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6847 " cannot copy" % idx, errors.ECODE_STATE)
6849 _CheckNodeOnline(self, target_node)
6850 _CheckNodeNotDrained(self, target_node)
6851 _CheckNodeVmCapable(self, target_node)
6853 if instance.admin_up:
6854 # check memory requirements on the secondary node
6855 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6856 instance.name, bep[constants.BE_MEMORY],
6857 instance.hypervisor)
6859 self.LogInfo("Not checking memory on the secondary node as"
6860 " instance will not be started")
6862 # check bridge existance
6863 _CheckInstanceBridgesExist(self, instance, node=target_node)
6865 def Exec(self, feedback_fn):
6866 """Move an instance.
6868 The move is done by shutting it down on its present node, copying
6869 the data over (slow) and starting it on the new node.
6872 instance = self.instance
6874 source_node = instance.primary_node
6875 target_node = self.target_node
6877 self.LogInfo("Shutting down instance %s on source node %s",
6878 instance.name, source_node)
6880 result = self.rpc.call_instance_shutdown(source_node, instance,
6881 self.op.shutdown_timeout)
6882 msg = result.fail_msg
6884 if self.op.ignore_consistency:
6885 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6886 " Proceeding anyway. Please make sure node"
6887 " %s is down. Error details: %s",
6888 instance.name, source_node, source_node, msg)
6890 raise errors.OpExecError("Could not shutdown instance %s on"
6892 (instance.name, source_node, msg))
6894 # create the target disks
6896 _CreateDisks(self, instance, target_node=target_node)
6897 except errors.OpExecError:
6898 self.LogWarning("Device creation failed, reverting...")
6900 _RemoveDisks(self, instance, target_node=target_node)
6902 self.cfg.ReleaseDRBDMinors(instance.name)
6905 cluster_name = self.cfg.GetClusterInfo().cluster_name
6908 # activate, get path, copy the data over
6909 for idx, disk in enumerate(instance.disks):
6910 self.LogInfo("Copying data for disk %d", idx)
6911 result = self.rpc.call_blockdev_assemble(target_node, disk,
6912 instance.name, True, idx)
6914 self.LogWarning("Can't assemble newly created disk %d: %s",
6915 idx, result.fail_msg)
6916 errs.append(result.fail_msg)
6918 dev_path = result.payload
6919 result = self.rpc.call_blockdev_export(source_node, disk,
6920 target_node, dev_path,
6923 self.LogWarning("Can't copy data over for disk %d: %s",
6924 idx, result.fail_msg)
6925 errs.append(result.fail_msg)
6929 self.LogWarning("Some disks failed to copy, aborting")
6931 _RemoveDisks(self, instance, target_node=target_node)
6933 self.cfg.ReleaseDRBDMinors(instance.name)
6934 raise errors.OpExecError("Errors during disk copy: %s" %
6937 instance.primary_node = target_node
6938 self.cfg.Update(instance, feedback_fn)
6940 self.LogInfo("Removing the disks on the original node")
6941 _RemoveDisks(self, instance, target_node=source_node)
6943 # Only start the instance if it's marked as up
6944 if instance.admin_up:
6945 self.LogInfo("Starting instance %s on node %s",
6946 instance.name, target_node)
6948 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6949 ignore_secondaries=True)
6951 _ShutdownInstanceDisks(self, instance)
6952 raise errors.OpExecError("Can't activate the instance's disks")
6954 result = self.rpc.call_instance_start(target_node, instance,
6956 msg = result.fail_msg
6958 _ShutdownInstanceDisks(self, instance)
6959 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6960 (instance.name, target_node, msg))
6963 class LUNodeMigrate(LogicalUnit):
6964 """Migrate all instances from a node.
6967 HPATH = "node-migrate"
6968 HTYPE = constants.HTYPE_NODE
6971 def CheckArguments(self):
6974 def ExpandNames(self):
6975 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6977 self.share_locks = _ShareAll()
6978 self.needed_locks = {
6979 locking.LEVEL_NODE: [self.op.node_name],
6982 def BuildHooksEnv(self):
6985 This runs on the master, the primary and all the secondaries.
6989 "NODE_NAME": self.op.node_name,
6992 def BuildHooksNodes(self):
6993 """Build hooks nodes.
6996 nl = [self.cfg.GetMasterNode()]
6999 def CheckPrereq(self):
7002 def Exec(self, feedback_fn):
7003 # Prepare jobs for migration instances
7005 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7008 iallocator=self.op.iallocator,
7009 target_node=self.op.target_node)]
7010 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7013 # TODO: Run iallocator in this opcode and pass correct placement options to
7014 # OpInstanceMigrate. Since other jobs can modify the cluster between
7015 # running the iallocator and the actual migration, a good consistency model
7016 # will have to be found.
7018 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7019 frozenset([self.op.node_name]))
7021 return ResultWithJobs(jobs)
7024 class TLMigrateInstance(Tasklet):
7025 """Tasklet class for instance migration.
7028 @ivar live: whether the migration will be done live or non-live;
7029 this variable is initalized only after CheckPrereq has run
7030 @type cleanup: boolean
7031 @ivar cleanup: Wheater we cleanup from a failed migration
7032 @type iallocator: string
7033 @ivar iallocator: The iallocator used to determine target_node
7034 @type target_node: string
7035 @ivar target_node: If given, the target_node to reallocate the instance to
7036 @type failover: boolean
7037 @ivar failover: Whether operation results in failover or migration
7038 @type fallback: boolean
7039 @ivar fallback: Whether fallback to failover is allowed if migration not
7041 @type ignore_consistency: boolean
7042 @ivar ignore_consistency: Wheter we should ignore consistency between source
7044 @type shutdown_timeout: int
7045 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7048 def __init__(self, lu, instance_name, cleanup=False,
7049 failover=False, fallback=False,
7050 ignore_consistency=False,
7051 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7052 """Initializes this class.
7055 Tasklet.__init__(self, lu)
7058 self.instance_name = instance_name
7059 self.cleanup = cleanup
7060 self.live = False # will be overridden later
7061 self.failover = failover
7062 self.fallback = fallback
7063 self.ignore_consistency = ignore_consistency
7064 self.shutdown_timeout = shutdown_timeout
7066 def CheckPrereq(self):
7067 """Check prerequisites.
7069 This checks that the instance is in the cluster.
7072 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7073 instance = self.cfg.GetInstanceInfo(instance_name)
7074 assert instance is not None
7075 self.instance = instance
7077 if (not self.cleanup and not instance.admin_up and not self.failover and
7079 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7081 self.failover = True
7083 if instance.disk_template not in constants.DTS_MIRRORED:
7088 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7089 " %s" % (instance.disk_template, text),
7092 if instance.disk_template in constants.DTS_EXT_MIRROR:
7093 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7095 if self.lu.op.iallocator:
7096 self._RunAllocator()
7098 # We set set self.target_node as it is required by
7100 self.target_node = self.lu.op.target_node
7102 # self.target_node is already populated, either directly or by the
7104 target_node = self.target_node
7105 if self.target_node == instance.primary_node:
7106 raise errors.OpPrereqError("Cannot migrate instance %s"
7107 " to its primary (%s)" %
7108 (instance.name, instance.primary_node))
7110 if len(self.lu.tasklets) == 1:
7111 # It is safe to release locks only when we're the only tasklet
7113 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7114 keep=[instance.primary_node, self.target_node])
7117 secondary_nodes = instance.secondary_nodes
7118 if not secondary_nodes:
7119 raise errors.ConfigurationError("No secondary node but using"
7120 " %s disk template" %
7121 instance.disk_template)
7122 target_node = secondary_nodes[0]
7123 if self.lu.op.iallocator or (self.lu.op.target_node and
7124 self.lu.op.target_node != target_node):
7126 text = "failed over"
7129 raise errors.OpPrereqError("Instances with disk template %s cannot"
7130 " be %s to arbitrary nodes"
7131 " (neither an iallocator nor a target"
7132 " node can be passed)" %
7133 (instance.disk_template, text),
7136 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7138 # check memory requirements on the secondary node
7139 if not self.cleanup and (not self.failover or instance.admin_up):
7140 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7141 instance.name, i_be[constants.BE_MEMORY],
7142 instance.hypervisor)
7144 self.lu.LogInfo("Not checking memory on the secondary node as"
7145 " instance will not be started")
7147 # check bridge existance
7148 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7150 if not self.cleanup:
7151 _CheckNodeNotDrained(self.lu, target_node)
7152 if not self.failover:
7153 result = self.rpc.call_instance_migratable(instance.primary_node,
7155 if result.fail_msg and self.fallback:
7156 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7158 self.failover = True
7160 result.Raise("Can't migrate, please use failover",
7161 prereq=True, ecode=errors.ECODE_STATE)
7163 assert not (self.failover and self.cleanup)
7165 if not self.failover:
7166 if self.lu.op.live is not None and self.lu.op.mode is not None:
7167 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7168 " parameters are accepted",
7170 if self.lu.op.live is not None:
7172 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7174 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7175 # reset the 'live' parameter to None so that repeated
7176 # invocations of CheckPrereq do not raise an exception
7177 self.lu.op.live = None
7178 elif self.lu.op.mode is None:
7179 # read the default value from the hypervisor
7180 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7182 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7184 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7186 # Failover is never live
7189 def _RunAllocator(self):
7190 """Run the allocator based on input opcode.
7193 ial = IAllocator(self.cfg, self.rpc,
7194 mode=constants.IALLOCATOR_MODE_RELOC,
7195 name=self.instance_name,
7196 # TODO See why hail breaks with a single node below
7197 relocate_from=[self.instance.primary_node,
7198 self.instance.primary_node],
7201 ial.Run(self.lu.op.iallocator)
7204 raise errors.OpPrereqError("Can't compute nodes using"
7205 " iallocator '%s': %s" %
7206 (self.lu.op.iallocator, ial.info),
7208 if len(ial.result) != ial.required_nodes:
7209 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7210 " of nodes (%s), required %s" %
7211 (self.lu.op.iallocator, len(ial.result),
7212 ial.required_nodes), errors.ECODE_FAULT)
7213 self.target_node = ial.result[0]
7214 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7215 self.instance_name, self.lu.op.iallocator,
7216 utils.CommaJoin(ial.result))
7218 def _WaitUntilSync(self):
7219 """Poll with custom rpc for disk sync.
7221 This uses our own step-based rpc call.
7224 self.feedback_fn("* wait until resync is done")
7228 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7230 self.instance.disks)
7232 for node, nres in result.items():
7233 nres.Raise("Cannot resync disks on node %s" % node)
7234 node_done, node_percent = nres.payload
7235 all_done = all_done and node_done
7236 if node_percent is not None:
7237 min_percent = min(min_percent, node_percent)
7239 if min_percent < 100:
7240 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7243 def _EnsureSecondary(self, node):
7244 """Demote a node to secondary.
7247 self.feedback_fn("* switching node %s to secondary mode" % node)
7249 for dev in self.instance.disks:
7250 self.cfg.SetDiskID(dev, node)
7252 result = self.rpc.call_blockdev_close(node, self.instance.name,
7253 self.instance.disks)
7254 result.Raise("Cannot change disk to secondary on node %s" % node)
7256 def _GoStandalone(self):
7257 """Disconnect from the network.
7260 self.feedback_fn("* changing into standalone mode")
7261 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7262 self.instance.disks)
7263 for node, nres in result.items():
7264 nres.Raise("Cannot disconnect disks node %s" % node)
7266 def _GoReconnect(self, multimaster):
7267 """Reconnect to the network.
7273 msg = "single-master"
7274 self.feedback_fn("* changing disks into %s mode" % msg)
7275 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7276 self.instance.disks,
7277 self.instance.name, multimaster)
7278 for node, nres in result.items():
7279 nres.Raise("Cannot change disks config on node %s" % node)
7281 def _ExecCleanup(self):
7282 """Try to cleanup after a failed migration.
7284 The cleanup is done by:
7285 - check that the instance is running only on one node
7286 (and update the config if needed)
7287 - change disks on its secondary node to secondary
7288 - wait until disks are fully synchronized
7289 - disconnect from the network
7290 - change disks into single-master mode
7291 - wait again until disks are fully synchronized
7294 instance = self.instance
7295 target_node = self.target_node
7296 source_node = self.source_node
7298 # check running on only one node
7299 self.feedback_fn("* checking where the instance actually runs"
7300 " (if this hangs, the hypervisor might be in"
7302 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7303 for node, result in ins_l.items():
7304 result.Raise("Can't contact node %s" % node)
7306 runningon_source = instance.name in ins_l[source_node].payload
7307 runningon_target = instance.name in ins_l[target_node].payload
7309 if runningon_source and runningon_target:
7310 raise errors.OpExecError("Instance seems to be running on two nodes,"
7311 " or the hypervisor is confused; you will have"
7312 " to ensure manually that it runs only on one"
7313 " and restart this operation")
7315 if not (runningon_source or runningon_target):
7316 raise errors.OpExecError("Instance does not seem to be running at all;"
7317 " in this case it's safer to repair by"
7318 " running 'gnt-instance stop' to ensure disk"
7319 " shutdown, and then restarting it")
7321 if runningon_target:
7322 # the migration has actually succeeded, we need to update the config
7323 self.feedback_fn("* instance running on secondary node (%s),"
7324 " updating config" % target_node)
7325 instance.primary_node = target_node
7326 self.cfg.Update(instance, self.feedback_fn)
7327 demoted_node = source_node
7329 self.feedback_fn("* instance confirmed to be running on its"
7330 " primary node (%s)" % source_node)
7331 demoted_node = target_node
7333 if instance.disk_template in constants.DTS_INT_MIRROR:
7334 self._EnsureSecondary(demoted_node)
7336 self._WaitUntilSync()
7337 except errors.OpExecError:
7338 # we ignore here errors, since if the device is standalone, it
7339 # won't be able to sync
7341 self._GoStandalone()
7342 self._GoReconnect(False)
7343 self._WaitUntilSync()
7345 self.feedback_fn("* done")
7347 def _RevertDiskStatus(self):
7348 """Try to revert the disk status after a failed migration.
7351 target_node = self.target_node
7352 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7356 self._EnsureSecondary(target_node)
7357 self._GoStandalone()
7358 self._GoReconnect(False)
7359 self._WaitUntilSync()
7360 except errors.OpExecError, err:
7361 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7362 " please try to recover the instance manually;"
7363 " error '%s'" % str(err))
7365 def _AbortMigration(self):
7366 """Call the hypervisor code to abort a started migration.
7369 instance = self.instance
7370 target_node = self.target_node
7371 migration_info = self.migration_info
7373 abort_result = self.rpc.call_finalize_migration(target_node,
7377 abort_msg = abort_result.fail_msg
7379 logging.error("Aborting migration failed on target node %s: %s",
7380 target_node, abort_msg)
7381 # Don't raise an exception here, as we stil have to try to revert the
7382 # disk status, even if this step failed.
7384 def _ExecMigration(self):
7385 """Migrate an instance.
7387 The migrate is done by:
7388 - change the disks into dual-master mode
7389 - wait until disks are fully synchronized again
7390 - migrate the instance
7391 - change disks on the new secondary node (the old primary) to secondary
7392 - wait until disks are fully synchronized
7393 - change disks into single-master mode
7396 instance = self.instance
7397 target_node = self.target_node
7398 source_node = self.source_node
7400 self.feedback_fn("* checking disk consistency between source and target")
7401 for dev in instance.disks:
7402 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7403 raise errors.OpExecError("Disk %s is degraded or not fully"
7404 " synchronized on target node,"
7405 " aborting migration" % dev.iv_name)
7407 # First get the migration information from the remote node
7408 result = self.rpc.call_migration_info(source_node, instance)
7409 msg = result.fail_msg
7411 log_err = ("Failed fetching source migration information from %s: %s" %
7413 logging.error(log_err)
7414 raise errors.OpExecError(log_err)
7416 self.migration_info = migration_info = result.payload
7418 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7419 # Then switch the disks to master/master mode
7420 self._EnsureSecondary(target_node)
7421 self._GoStandalone()
7422 self._GoReconnect(True)
7423 self._WaitUntilSync()
7425 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7426 result = self.rpc.call_accept_instance(target_node,
7429 self.nodes_ip[target_node])
7431 msg = result.fail_msg
7433 logging.error("Instance pre-migration failed, trying to revert"
7434 " disk status: %s", msg)
7435 self.feedback_fn("Pre-migration failed, aborting")
7436 self._AbortMigration()
7437 self._RevertDiskStatus()
7438 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7439 (instance.name, msg))
7441 self.feedback_fn("* migrating instance to %s" % target_node)
7442 result = self.rpc.call_instance_migrate(source_node, instance,
7443 self.nodes_ip[target_node],
7445 msg = result.fail_msg
7447 logging.error("Instance migration failed, trying to revert"
7448 " disk status: %s", msg)
7449 self.feedback_fn("Migration failed, aborting")
7450 self._AbortMigration()
7451 self._RevertDiskStatus()
7452 raise errors.OpExecError("Could not migrate instance %s: %s" %
7453 (instance.name, msg))
7455 instance.primary_node = target_node
7456 # distribute new instance config to the other nodes
7457 self.cfg.Update(instance, self.feedback_fn)
7459 result = self.rpc.call_finalize_migration(target_node,
7463 msg = result.fail_msg
7465 logging.error("Instance migration succeeded, but finalization failed:"
7467 raise errors.OpExecError("Could not finalize instance migration: %s" %
7470 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7471 self._EnsureSecondary(source_node)
7472 self._WaitUntilSync()
7473 self._GoStandalone()
7474 self._GoReconnect(False)
7475 self._WaitUntilSync()
7477 self.feedback_fn("* done")
7479 def _ExecFailover(self):
7480 """Failover an instance.
7482 The failover is done by shutting it down on its present node and
7483 starting it on the secondary.
7486 instance = self.instance
7487 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7489 source_node = instance.primary_node
7490 target_node = self.target_node
7492 if instance.admin_up:
7493 self.feedback_fn("* checking disk consistency between source and target")
7494 for dev in instance.disks:
7495 # for drbd, these are drbd over lvm
7496 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7497 if primary_node.offline:
7498 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7500 (primary_node.name, dev.iv_name, target_node))
7501 elif not self.ignore_consistency:
7502 raise errors.OpExecError("Disk %s is degraded on target node,"
7503 " aborting failover" % dev.iv_name)
7505 self.feedback_fn("* not checking disk consistency as instance is not"
7508 self.feedback_fn("* shutting down instance on source node")
7509 logging.info("Shutting down instance %s on node %s",
7510 instance.name, source_node)
7512 result = self.rpc.call_instance_shutdown(source_node, instance,
7513 self.shutdown_timeout)
7514 msg = result.fail_msg
7516 if self.ignore_consistency or primary_node.offline:
7517 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7518 " proceeding anyway; please make sure node"
7519 " %s is down; error details: %s",
7520 instance.name, source_node, source_node, msg)
7522 raise errors.OpExecError("Could not shutdown instance %s on"
7524 (instance.name, source_node, msg))
7526 self.feedback_fn("* deactivating the instance's disks on source node")
7527 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7528 raise errors.OpExecError("Can't shut down the instance's disks")
7530 instance.primary_node = target_node
7531 # distribute new instance config to the other nodes
7532 self.cfg.Update(instance, self.feedback_fn)
7534 # Only start the instance if it's marked as up
7535 if instance.admin_up:
7536 self.feedback_fn("* activating the instance's disks on target node %s" %
7538 logging.info("Starting instance %s on node %s",
7539 instance.name, target_node)
7541 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7542 ignore_secondaries=True)
7544 _ShutdownInstanceDisks(self.lu, instance)
7545 raise errors.OpExecError("Can't activate the instance's disks")
7547 self.feedback_fn("* starting the instance on the target node %s" %
7549 result = self.rpc.call_instance_start(target_node, instance, None, None,
7551 msg = result.fail_msg
7553 _ShutdownInstanceDisks(self.lu, instance)
7554 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7555 (instance.name, target_node, msg))
7557 def Exec(self, feedback_fn):
7558 """Perform the migration.
7561 self.feedback_fn = feedback_fn
7562 self.source_node = self.instance.primary_node
7564 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7565 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7566 self.target_node = self.instance.secondary_nodes[0]
7567 # Otherwise self.target_node has been populated either
7568 # directly, or through an iallocator.
7570 self.all_nodes = [self.source_node, self.target_node]
7571 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7572 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7575 feedback_fn("Failover instance %s" % self.instance.name)
7576 self._ExecFailover()
7578 feedback_fn("Migrating instance %s" % self.instance.name)
7581 return self._ExecCleanup()
7583 return self._ExecMigration()
7586 def _CreateBlockDev(lu, node, instance, device, force_create,
7588 """Create a tree of block devices on a given node.
7590 If this device type has to be created on secondaries, create it and
7593 If not, just recurse to children keeping the same 'force' value.
7595 @param lu: the lu on whose behalf we execute
7596 @param node: the node on which to create the device
7597 @type instance: L{objects.Instance}
7598 @param instance: the instance which owns the device
7599 @type device: L{objects.Disk}
7600 @param device: the device to create
7601 @type force_create: boolean
7602 @param force_create: whether to force creation of this device; this
7603 will be change to True whenever we find a device which has
7604 CreateOnSecondary() attribute
7605 @param info: the extra 'metadata' we should attach to the device
7606 (this will be represented as a LVM tag)
7607 @type force_open: boolean
7608 @param force_open: this parameter will be passes to the
7609 L{backend.BlockdevCreate} function where it specifies
7610 whether we run on primary or not, and it affects both
7611 the child assembly and the device own Open() execution
7614 if device.CreateOnSecondary():
7618 for child in device.children:
7619 _CreateBlockDev(lu, node, instance, child, force_create,
7622 if not force_create:
7625 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7628 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7629 """Create a single block device on a given node.
7631 This will not recurse over children of the device, so they must be
7634 @param lu: the lu on whose behalf we execute
7635 @param node: the node on which to create the device
7636 @type instance: L{objects.Instance}
7637 @param instance: the instance which owns the device
7638 @type device: L{objects.Disk}
7639 @param device: the device to create
7640 @param info: the extra 'metadata' we should attach to the device
7641 (this will be represented as a LVM tag)
7642 @type force_open: boolean
7643 @param force_open: this parameter will be passes to the
7644 L{backend.BlockdevCreate} function where it specifies
7645 whether we run on primary or not, and it affects both
7646 the child assembly and the device own Open() execution
7649 lu.cfg.SetDiskID(device, node)
7650 result = lu.rpc.call_blockdev_create(node, device, device.size,
7651 instance.name, force_open, info)
7652 result.Raise("Can't create block device %s on"
7653 " node %s for instance %s" % (device, node, instance.name))
7654 if device.physical_id is None:
7655 device.physical_id = result.payload
7658 def _GenerateUniqueNames(lu, exts):
7659 """Generate a suitable LV name.
7661 This will generate a logical volume name for the given instance.
7666 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7667 results.append("%s%s" % (new_id, val))
7671 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7672 iv_name, p_minor, s_minor):
7673 """Generate a drbd8 device complete with its children.
7676 assert len(vgnames) == len(names) == 2
7677 port = lu.cfg.AllocatePort()
7678 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7679 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7680 logical_id=(vgnames[0], names[0]))
7681 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7682 logical_id=(vgnames[1], names[1]))
7683 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7684 logical_id=(primary, secondary, port,
7687 children=[dev_data, dev_meta],
7692 def _GenerateDiskTemplate(lu, template_name,
7693 instance_name, primary_node,
7694 secondary_nodes, disk_info,
7695 file_storage_dir, file_driver,
7696 base_index, feedback_fn):
7697 """Generate the entire disk layout for a given template type.
7700 #TODO: compute space requirements
7702 vgname = lu.cfg.GetVGName()
7703 disk_count = len(disk_info)
7705 if template_name == constants.DT_DISKLESS:
7707 elif template_name == constants.DT_PLAIN:
7708 if len(secondary_nodes) != 0:
7709 raise errors.ProgrammerError("Wrong template configuration")
7711 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7712 for i in range(disk_count)])
7713 for idx, disk in enumerate(disk_info):
7714 disk_index = idx + base_index
7715 vg = disk.get(constants.IDISK_VG, vgname)
7716 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7717 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7718 size=disk[constants.IDISK_SIZE],
7719 logical_id=(vg, names[idx]),
7720 iv_name="disk/%d" % disk_index,
7721 mode=disk[constants.IDISK_MODE])
7722 disks.append(disk_dev)
7723 elif template_name == constants.DT_DRBD8:
7724 if len(secondary_nodes) != 1:
7725 raise errors.ProgrammerError("Wrong template configuration")
7726 remote_node = secondary_nodes[0]
7727 minors = lu.cfg.AllocateDRBDMinor(
7728 [primary_node, remote_node] * len(disk_info), instance_name)
7731 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7732 for i in range(disk_count)]):
7733 names.append(lv_prefix + "_data")
7734 names.append(lv_prefix + "_meta")
7735 for idx, disk in enumerate(disk_info):
7736 disk_index = idx + base_index
7737 data_vg = disk.get(constants.IDISK_VG, vgname)
7738 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7739 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7740 disk[constants.IDISK_SIZE],
7742 names[idx * 2:idx * 2 + 2],
7743 "disk/%d" % disk_index,
7744 minors[idx * 2], minors[idx * 2 + 1])
7745 disk_dev.mode = disk[constants.IDISK_MODE]
7746 disks.append(disk_dev)
7747 elif template_name == constants.DT_FILE:
7748 if len(secondary_nodes) != 0:
7749 raise errors.ProgrammerError("Wrong template configuration")
7751 opcodes.RequireFileStorage()
7753 for idx, disk in enumerate(disk_info):
7754 disk_index = idx + base_index
7755 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7756 size=disk[constants.IDISK_SIZE],
7757 iv_name="disk/%d" % disk_index,
7758 logical_id=(file_driver,
7759 "%s/disk%d" % (file_storage_dir,
7761 mode=disk[constants.IDISK_MODE])
7762 disks.append(disk_dev)
7763 elif template_name == constants.DT_SHARED_FILE:
7764 if len(secondary_nodes) != 0:
7765 raise errors.ProgrammerError("Wrong template configuration")
7767 opcodes.RequireSharedFileStorage()
7769 for idx, disk in enumerate(disk_info):
7770 disk_index = idx + base_index
7771 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7772 size=disk[constants.IDISK_SIZE],
7773 iv_name="disk/%d" % disk_index,
7774 logical_id=(file_driver,
7775 "%s/disk%d" % (file_storage_dir,
7777 mode=disk[constants.IDISK_MODE])
7778 disks.append(disk_dev)
7779 elif template_name == constants.DT_BLOCK:
7780 if len(secondary_nodes) != 0:
7781 raise errors.ProgrammerError("Wrong template configuration")
7783 for idx, disk in enumerate(disk_info):
7784 disk_index = idx + base_index
7785 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7786 size=disk[constants.IDISK_SIZE],
7787 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7788 disk[constants.IDISK_ADOPT]),
7789 iv_name="disk/%d" % disk_index,
7790 mode=disk[constants.IDISK_MODE])
7791 disks.append(disk_dev)
7794 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7798 def _GetInstanceInfoText(instance):
7799 """Compute that text that should be added to the disk's metadata.
7802 return "originstname+%s" % instance.name
7805 def _CalcEta(time_taken, written, total_size):
7806 """Calculates the ETA based on size written and total size.
7808 @param time_taken: The time taken so far
7809 @param written: amount written so far
7810 @param total_size: The total size of data to be written
7811 @return: The remaining time in seconds
7814 avg_time = time_taken / float(written)
7815 return (total_size - written) * avg_time
7818 def _WipeDisks(lu, instance):
7819 """Wipes instance disks.
7821 @type lu: L{LogicalUnit}
7822 @param lu: the logical unit on whose behalf we execute
7823 @type instance: L{objects.Instance}
7824 @param instance: the instance whose disks we should create
7825 @return: the success of the wipe
7828 node = instance.primary_node
7830 for device in instance.disks:
7831 lu.cfg.SetDiskID(device, node)
7833 logging.info("Pause sync of instance %s disks", instance.name)
7834 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7836 for idx, success in enumerate(result.payload):
7838 logging.warn("pause-sync of instance %s for disks %d failed",
7842 for idx, device in enumerate(instance.disks):
7843 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7844 # MAX_WIPE_CHUNK at max
7845 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7846 constants.MIN_WIPE_CHUNK_PERCENT)
7847 # we _must_ make this an int, otherwise rounding errors will
7849 wipe_chunk_size = int(wipe_chunk_size)
7851 lu.LogInfo("* Wiping disk %d", idx)
7852 logging.info("Wiping disk %d for instance %s, node %s using"
7853 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7858 start_time = time.time()
7860 while offset < size:
7861 wipe_size = min(wipe_chunk_size, size - offset)
7862 logging.debug("Wiping disk %d, offset %s, chunk %s",
7863 idx, offset, wipe_size)
7864 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7865 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7866 (idx, offset, wipe_size))
7869 if now - last_output >= 60:
7870 eta = _CalcEta(now - start_time, offset, size)
7871 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7872 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7875 logging.info("Resume sync of instance %s disks", instance.name)
7877 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7879 for idx, success in enumerate(result.payload):
7881 lu.LogWarning("Resume sync of disk %d failed, please have a"
7882 " look at the status and troubleshoot the issue", idx)
7883 logging.warn("resume-sync of instance %s for disks %d failed",
7887 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7888 """Create all disks for an instance.
7890 This abstracts away some work from AddInstance.
7892 @type lu: L{LogicalUnit}
7893 @param lu: the logical unit on whose behalf we execute
7894 @type instance: L{objects.Instance}
7895 @param instance: the instance whose disks we should create
7897 @param to_skip: list of indices to skip
7898 @type target_node: string
7899 @param target_node: if passed, overrides the target node for creation
7901 @return: the success of the creation
7904 info = _GetInstanceInfoText(instance)
7905 if target_node is None:
7906 pnode = instance.primary_node
7907 all_nodes = instance.all_nodes
7912 if instance.disk_template in constants.DTS_FILEBASED:
7913 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7914 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7916 result.Raise("Failed to create directory '%s' on"
7917 " node %s" % (file_storage_dir, pnode))
7919 # Note: this needs to be kept in sync with adding of disks in
7920 # LUInstanceSetParams
7921 for idx, device in enumerate(instance.disks):
7922 if to_skip and idx in to_skip:
7924 logging.info("Creating volume %s for instance %s",
7925 device.iv_name, instance.name)
7927 for node in all_nodes:
7928 f_create = node == pnode
7929 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7932 def _RemoveDisks(lu, instance, target_node=None):
7933 """Remove all disks for an instance.
7935 This abstracts away some work from `AddInstance()` and
7936 `RemoveInstance()`. Note that in case some of the devices couldn't
7937 be removed, the removal will continue with the other ones (compare
7938 with `_CreateDisks()`).
7940 @type lu: L{LogicalUnit}
7941 @param lu: the logical unit on whose behalf we execute
7942 @type instance: L{objects.Instance}
7943 @param instance: the instance whose disks we should remove
7944 @type target_node: string
7945 @param target_node: used to override the node on which to remove the disks
7947 @return: the success of the removal
7950 logging.info("Removing block devices for instance %s", instance.name)
7953 for device in instance.disks:
7955 edata = [(target_node, device)]
7957 edata = device.ComputeNodeTree(instance.primary_node)
7958 for node, disk in edata:
7959 lu.cfg.SetDiskID(disk, node)
7960 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7962 lu.LogWarning("Could not remove block device %s on node %s,"
7963 " continuing anyway: %s", device.iv_name, node, msg)
7966 # if this is a DRBD disk, return its port to the pool
7967 if device.dev_type in constants.LDS_DRBD:
7968 tcp_port = device.logical_id[2]
7969 lu.cfg.AddTcpUdpPort(tcp_port)
7971 if instance.disk_template == constants.DT_FILE:
7972 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7976 tgt = instance.primary_node
7977 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7979 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7980 file_storage_dir, instance.primary_node, result.fail_msg)
7986 def _ComputeDiskSizePerVG(disk_template, disks):
7987 """Compute disk size requirements in the volume group
7990 def _compute(disks, payload):
7991 """Universal algorithm.
7996 vgs[disk[constants.IDISK_VG]] = \
7997 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8001 # Required free disk space as a function of disk and swap space
8003 constants.DT_DISKLESS: {},
8004 constants.DT_PLAIN: _compute(disks, 0),
8005 # 128 MB are added for drbd metadata for each disk
8006 constants.DT_DRBD8: _compute(disks, 128),
8007 constants.DT_FILE: {},
8008 constants.DT_SHARED_FILE: {},
8011 if disk_template not in req_size_dict:
8012 raise errors.ProgrammerError("Disk template '%s' size requirement"
8013 " is unknown" % disk_template)
8015 return req_size_dict[disk_template]
8018 def _ComputeDiskSize(disk_template, disks):
8019 """Compute disk size requirements in the volume group
8022 # Required free disk space as a function of disk and swap space
8024 constants.DT_DISKLESS: None,
8025 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8026 # 128 MB are added for drbd metadata for each disk
8027 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8028 constants.DT_FILE: None,
8029 constants.DT_SHARED_FILE: 0,
8030 constants.DT_BLOCK: 0,
8033 if disk_template not in req_size_dict:
8034 raise errors.ProgrammerError("Disk template '%s' size requirement"
8035 " is unknown" % disk_template)
8037 return req_size_dict[disk_template]
8040 def _FilterVmNodes(lu, nodenames):
8041 """Filters out non-vm_capable nodes from a list.
8043 @type lu: L{LogicalUnit}
8044 @param lu: the logical unit for which we check
8045 @type nodenames: list
8046 @param nodenames: the list of nodes on which we should check
8048 @return: the list of vm-capable nodes
8051 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8052 return [name for name in nodenames if name not in vm_nodes]
8055 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8056 """Hypervisor parameter validation.
8058 This function abstract the hypervisor parameter validation to be
8059 used in both instance create and instance modify.
8061 @type lu: L{LogicalUnit}
8062 @param lu: the logical unit for which we check
8063 @type nodenames: list
8064 @param nodenames: the list of nodes on which we should check
8065 @type hvname: string
8066 @param hvname: the name of the hypervisor we should use
8067 @type hvparams: dict
8068 @param hvparams: the parameters which we need to check
8069 @raise errors.OpPrereqError: if the parameters are not valid
8072 nodenames = _FilterVmNodes(lu, nodenames)
8073 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8076 for node in nodenames:
8080 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8083 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8084 """OS parameters validation.
8086 @type lu: L{LogicalUnit}
8087 @param lu: the logical unit for which we check
8088 @type required: boolean
8089 @param required: whether the validation should fail if the OS is not
8091 @type nodenames: list
8092 @param nodenames: the list of nodes on which we should check
8093 @type osname: string
8094 @param osname: the name of the hypervisor we should use
8095 @type osparams: dict
8096 @param osparams: the parameters which we need to check
8097 @raise errors.OpPrereqError: if the parameters are not valid
8100 nodenames = _FilterVmNodes(lu, nodenames)
8101 result = lu.rpc.call_os_validate(required, nodenames, osname,
8102 [constants.OS_VALIDATE_PARAMETERS],
8104 for node, nres in result.items():
8105 # we don't check for offline cases since this should be run only
8106 # against the master node and/or an instance's nodes
8107 nres.Raise("OS Parameters validation failed on node %s" % node)
8108 if not nres.payload:
8109 lu.LogInfo("OS %s not found on node %s, validation skipped",
8113 class LUInstanceCreate(LogicalUnit):
8114 """Create an instance.
8117 HPATH = "instance-add"
8118 HTYPE = constants.HTYPE_INSTANCE
8121 def CheckArguments(self):
8125 # do not require name_check to ease forward/backward compatibility
8127 if self.op.no_install and self.op.start:
8128 self.LogInfo("No-installation mode selected, disabling startup")
8129 self.op.start = False
8130 # validate/normalize the instance name
8131 self.op.instance_name = \
8132 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8134 if self.op.ip_check and not self.op.name_check:
8135 # TODO: make the ip check more flexible and not depend on the name check
8136 raise errors.OpPrereqError("Cannot do IP address check without a name"
8137 " check", errors.ECODE_INVAL)
8139 # check nics' parameter names
8140 for nic in self.op.nics:
8141 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8143 # check disks. parameter names and consistent adopt/no-adopt strategy
8144 has_adopt = has_no_adopt = False
8145 for disk in self.op.disks:
8146 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8147 if constants.IDISK_ADOPT in disk:
8151 if has_adopt and has_no_adopt:
8152 raise errors.OpPrereqError("Either all disks are adopted or none is",
8155 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8156 raise errors.OpPrereqError("Disk adoption is not supported for the"
8157 " '%s' disk template" %
8158 self.op.disk_template,
8160 if self.op.iallocator is not None:
8161 raise errors.OpPrereqError("Disk adoption not allowed with an"
8162 " iallocator script", errors.ECODE_INVAL)
8163 if self.op.mode == constants.INSTANCE_IMPORT:
8164 raise errors.OpPrereqError("Disk adoption not allowed for"
8165 " instance import", errors.ECODE_INVAL)
8167 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8168 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8169 " but no 'adopt' parameter given" %
8170 self.op.disk_template,
8173 self.adopt_disks = has_adopt
8175 # instance name verification
8176 if self.op.name_check:
8177 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8178 self.op.instance_name = self.hostname1.name
8179 # used in CheckPrereq for ip ping check
8180 self.check_ip = self.hostname1.ip
8182 self.check_ip = None
8184 # file storage checks
8185 if (self.op.file_driver and
8186 not self.op.file_driver in constants.FILE_DRIVER):
8187 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8188 self.op.file_driver, errors.ECODE_INVAL)
8190 if self.op.disk_template == constants.DT_FILE:
8191 opcodes.RequireFileStorage()
8192 elif self.op.disk_template == constants.DT_SHARED_FILE:
8193 opcodes.RequireSharedFileStorage()
8195 ### Node/iallocator related checks
8196 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8198 if self.op.pnode is not None:
8199 if self.op.disk_template in constants.DTS_INT_MIRROR:
8200 if self.op.snode is None:
8201 raise errors.OpPrereqError("The networked disk templates need"
8202 " a mirror node", errors.ECODE_INVAL)
8204 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8206 self.op.snode = None
8208 self._cds = _GetClusterDomainSecret()
8210 if self.op.mode == constants.INSTANCE_IMPORT:
8211 # On import force_variant must be True, because if we forced it at
8212 # initial install, our only chance when importing it back is that it
8214 self.op.force_variant = True
8216 if self.op.no_install:
8217 self.LogInfo("No-installation mode has no effect during import")
8219 elif self.op.mode == constants.INSTANCE_CREATE:
8220 if self.op.os_type is None:
8221 raise errors.OpPrereqError("No guest OS specified",
8223 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8224 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8225 " installation" % self.op.os_type,
8227 if self.op.disk_template is None:
8228 raise errors.OpPrereqError("No disk template specified",
8231 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8232 # Check handshake to ensure both clusters have the same domain secret
8233 src_handshake = self.op.source_handshake
8234 if not src_handshake:
8235 raise errors.OpPrereqError("Missing source handshake",
8238 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8241 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8244 # Load and check source CA
8245 self.source_x509_ca_pem = self.op.source_x509_ca
8246 if not self.source_x509_ca_pem:
8247 raise errors.OpPrereqError("Missing source X509 CA",
8251 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8253 except OpenSSL.crypto.Error, err:
8254 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8255 (err, ), errors.ECODE_INVAL)
8257 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8258 if errcode is not None:
8259 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8262 self.source_x509_ca = cert
8264 src_instance_name = self.op.source_instance_name
8265 if not src_instance_name:
8266 raise errors.OpPrereqError("Missing source instance name",
8269 self.source_instance_name = \
8270 netutils.GetHostname(name=src_instance_name).name
8273 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8274 self.op.mode, errors.ECODE_INVAL)
8276 def ExpandNames(self):
8277 """ExpandNames for CreateInstance.
8279 Figure out the right locks for instance creation.
8282 self.needed_locks = {}
8284 instance_name = self.op.instance_name
8285 # this is just a preventive check, but someone might still add this
8286 # instance in the meantime, and creation will fail at lock-add time
8287 if instance_name in self.cfg.GetInstanceList():
8288 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8289 instance_name, errors.ECODE_EXISTS)
8291 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8293 if self.op.iallocator:
8294 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8296 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8297 nodelist = [self.op.pnode]
8298 if self.op.snode is not None:
8299 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8300 nodelist.append(self.op.snode)
8301 self.needed_locks[locking.LEVEL_NODE] = nodelist
8303 # in case of import lock the source node too
8304 if self.op.mode == constants.INSTANCE_IMPORT:
8305 src_node = self.op.src_node
8306 src_path = self.op.src_path
8308 if src_path is None:
8309 self.op.src_path = src_path = self.op.instance_name
8311 if src_node is None:
8312 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8313 self.op.src_node = None
8314 if os.path.isabs(src_path):
8315 raise errors.OpPrereqError("Importing an instance from a path"
8316 " requires a source node option",
8319 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8320 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8321 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8322 if not os.path.isabs(src_path):
8323 self.op.src_path = src_path = \
8324 utils.PathJoin(constants.EXPORT_DIR, src_path)
8326 def _RunAllocator(self):
8327 """Run the allocator based on input opcode.
8330 nics = [n.ToDict() for n in self.nics]
8331 ial = IAllocator(self.cfg, self.rpc,
8332 mode=constants.IALLOCATOR_MODE_ALLOC,
8333 name=self.op.instance_name,
8334 disk_template=self.op.disk_template,
8337 vcpus=self.be_full[constants.BE_VCPUS],
8338 memory=self.be_full[constants.BE_MEMORY],
8341 hypervisor=self.op.hypervisor,
8344 ial.Run(self.op.iallocator)
8347 raise errors.OpPrereqError("Can't compute nodes using"
8348 " iallocator '%s': %s" %
8349 (self.op.iallocator, ial.info),
8351 if len(ial.result) != ial.required_nodes:
8352 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8353 " of nodes (%s), required %s" %
8354 (self.op.iallocator, len(ial.result),
8355 ial.required_nodes), errors.ECODE_FAULT)
8356 self.op.pnode = ial.result[0]
8357 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8358 self.op.instance_name, self.op.iallocator,
8359 utils.CommaJoin(ial.result))
8360 if ial.required_nodes == 2:
8361 self.op.snode = ial.result[1]
8363 def BuildHooksEnv(self):
8366 This runs on master, primary and secondary nodes of the instance.
8370 "ADD_MODE": self.op.mode,
8372 if self.op.mode == constants.INSTANCE_IMPORT:
8373 env["SRC_NODE"] = self.op.src_node
8374 env["SRC_PATH"] = self.op.src_path
8375 env["SRC_IMAGES"] = self.src_images
8377 env.update(_BuildInstanceHookEnv(
8378 name=self.op.instance_name,
8379 primary_node=self.op.pnode,
8380 secondary_nodes=self.secondaries,
8381 status=self.op.start,
8382 os_type=self.op.os_type,
8383 memory=self.be_full[constants.BE_MEMORY],
8384 vcpus=self.be_full[constants.BE_VCPUS],
8385 nics=_NICListToTuple(self, self.nics),
8386 disk_template=self.op.disk_template,
8387 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8388 for d in self.disks],
8391 hypervisor_name=self.op.hypervisor,
8397 def BuildHooksNodes(self):
8398 """Build hooks nodes.
8401 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8404 def _ReadExportInfo(self):
8405 """Reads the export information from disk.
8407 It will override the opcode source node and path with the actual
8408 information, if these two were not specified before.
8410 @return: the export information
8413 assert self.op.mode == constants.INSTANCE_IMPORT
8415 src_node = self.op.src_node
8416 src_path = self.op.src_path
8418 if src_node is None:
8419 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8420 exp_list = self.rpc.call_export_list(locked_nodes)
8422 for node in exp_list:
8423 if exp_list[node].fail_msg:
8425 if src_path in exp_list[node].payload:
8427 self.op.src_node = src_node = node
8428 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8432 raise errors.OpPrereqError("No export found for relative path %s" %
8433 src_path, errors.ECODE_INVAL)
8435 _CheckNodeOnline(self, src_node)
8436 result = self.rpc.call_export_info(src_node, src_path)
8437 result.Raise("No export or invalid export found in dir %s" % src_path)
8439 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8440 if not export_info.has_section(constants.INISECT_EXP):
8441 raise errors.ProgrammerError("Corrupted export config",
8442 errors.ECODE_ENVIRON)
8444 ei_version = export_info.get(constants.INISECT_EXP, "version")
8445 if (int(ei_version) != constants.EXPORT_VERSION):
8446 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8447 (ei_version, constants.EXPORT_VERSION),
8448 errors.ECODE_ENVIRON)
8451 def _ReadExportParams(self, einfo):
8452 """Use export parameters as defaults.
8454 In case the opcode doesn't specify (as in override) some instance
8455 parameters, then try to use them from the export information, if
8459 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8461 if self.op.disk_template is None:
8462 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8463 self.op.disk_template = einfo.get(constants.INISECT_INS,
8466 raise errors.OpPrereqError("No disk template specified and the export"
8467 " is missing the disk_template information",
8470 if not self.op.disks:
8471 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8473 # TODO: import the disk iv_name too
8474 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8475 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8476 disks.append({constants.IDISK_SIZE: disk_sz})
8477 self.op.disks = disks
8479 raise errors.OpPrereqError("No disk info specified and the export"
8480 " is missing the disk information",
8483 if (not self.op.nics and
8484 einfo.has_option(constants.INISECT_INS, "nic_count")):
8486 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8488 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8489 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8494 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8495 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8497 if (self.op.hypervisor is None and
8498 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8499 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8501 if einfo.has_section(constants.INISECT_HYP):
8502 # use the export parameters but do not override the ones
8503 # specified by the user
8504 for name, value in einfo.items(constants.INISECT_HYP):
8505 if name not in self.op.hvparams:
8506 self.op.hvparams[name] = value
8508 if einfo.has_section(constants.INISECT_BEP):
8509 # use the parameters, without overriding
8510 for name, value in einfo.items(constants.INISECT_BEP):
8511 if name not in self.op.beparams:
8512 self.op.beparams[name] = value
8514 # try to read the parameters old style, from the main section
8515 for name in constants.BES_PARAMETERS:
8516 if (name not in self.op.beparams and
8517 einfo.has_option(constants.INISECT_INS, name)):
8518 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8520 if einfo.has_section(constants.INISECT_OSP):
8521 # use the parameters, without overriding
8522 for name, value in einfo.items(constants.INISECT_OSP):
8523 if name not in self.op.osparams:
8524 self.op.osparams[name] = value
8526 def _RevertToDefaults(self, cluster):
8527 """Revert the instance parameters to the default values.
8531 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8532 for name in self.op.hvparams.keys():
8533 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8534 del self.op.hvparams[name]
8536 be_defs = cluster.SimpleFillBE({})
8537 for name in self.op.beparams.keys():
8538 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8539 del self.op.beparams[name]
8541 nic_defs = cluster.SimpleFillNIC({})
8542 for nic in self.op.nics:
8543 for name in constants.NICS_PARAMETERS:
8544 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8547 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8548 for name in self.op.osparams.keys():
8549 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8550 del self.op.osparams[name]
8552 def _CalculateFileStorageDir(self):
8553 """Calculate final instance file storage dir.
8556 # file storage dir calculation/check
8557 self.instance_file_storage_dir = None
8558 if self.op.disk_template in constants.DTS_FILEBASED:
8559 # build the full file storage dir path
8562 if self.op.disk_template == constants.DT_SHARED_FILE:
8563 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8565 get_fsd_fn = self.cfg.GetFileStorageDir
8567 cfg_storagedir = get_fsd_fn()
8568 if not cfg_storagedir:
8569 raise errors.OpPrereqError("Cluster file storage dir not defined")
8570 joinargs.append(cfg_storagedir)
8572 if self.op.file_storage_dir is not None:
8573 joinargs.append(self.op.file_storage_dir)
8575 joinargs.append(self.op.instance_name)
8577 # pylint: disable=W0142
8578 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8580 def CheckPrereq(self):
8581 """Check prerequisites.
8584 self._CalculateFileStorageDir()
8586 if self.op.mode == constants.INSTANCE_IMPORT:
8587 export_info = self._ReadExportInfo()
8588 self._ReadExportParams(export_info)
8590 if (not self.cfg.GetVGName() and
8591 self.op.disk_template not in constants.DTS_NOT_LVM):
8592 raise errors.OpPrereqError("Cluster does not support lvm-based"
8593 " instances", errors.ECODE_STATE)
8595 if self.op.hypervisor is None:
8596 self.op.hypervisor = self.cfg.GetHypervisorType()
8598 cluster = self.cfg.GetClusterInfo()
8599 enabled_hvs = cluster.enabled_hypervisors
8600 if self.op.hypervisor not in enabled_hvs:
8601 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8602 " cluster (%s)" % (self.op.hypervisor,
8603 ",".join(enabled_hvs)),
8606 # Check tag validity
8607 for tag in self.op.tags:
8608 objects.TaggableObject.ValidateTag(tag)
8610 # check hypervisor parameter syntax (locally)
8611 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8612 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8614 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8615 hv_type.CheckParameterSyntax(filled_hvp)
8616 self.hv_full = filled_hvp
8617 # check that we don't specify global parameters on an instance
8618 _CheckGlobalHvParams(self.op.hvparams)
8620 # fill and remember the beparams dict
8621 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8622 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8624 # build os parameters
8625 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8627 # now that hvp/bep are in final format, let's reset to defaults,
8629 if self.op.identify_defaults:
8630 self._RevertToDefaults(cluster)
8634 for idx, nic in enumerate(self.op.nics):
8635 nic_mode_req = nic.get(constants.INIC_MODE, None)
8636 nic_mode = nic_mode_req
8637 if nic_mode is None:
8638 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8640 # in routed mode, for the first nic, the default ip is 'auto'
8641 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8642 default_ip_mode = constants.VALUE_AUTO
8644 default_ip_mode = constants.VALUE_NONE
8646 # ip validity checks
8647 ip = nic.get(constants.INIC_IP, default_ip_mode)
8648 if ip is None or ip.lower() == constants.VALUE_NONE:
8650 elif ip.lower() == constants.VALUE_AUTO:
8651 if not self.op.name_check:
8652 raise errors.OpPrereqError("IP address set to auto but name checks"
8653 " have been skipped",
8655 nic_ip = self.hostname1.ip
8657 if not netutils.IPAddress.IsValid(ip):
8658 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8662 # TODO: check the ip address for uniqueness
8663 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8664 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8667 # MAC address verification
8668 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8669 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8670 mac = utils.NormalizeAndValidateMac(mac)
8673 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8674 except errors.ReservationError:
8675 raise errors.OpPrereqError("MAC address %s already in use"
8676 " in cluster" % mac,
8677 errors.ECODE_NOTUNIQUE)
8679 # Build nic parameters
8680 link = nic.get(constants.INIC_LINK, None)
8683 nicparams[constants.NIC_MODE] = nic_mode_req
8685 nicparams[constants.NIC_LINK] = link
8687 check_params = cluster.SimpleFillNIC(nicparams)
8688 objects.NIC.CheckParameterSyntax(check_params)
8689 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8691 # disk checks/pre-build
8692 default_vg = self.cfg.GetVGName()
8694 for disk in self.op.disks:
8695 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8696 if mode not in constants.DISK_ACCESS_SET:
8697 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8698 mode, errors.ECODE_INVAL)
8699 size = disk.get(constants.IDISK_SIZE, None)
8701 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8704 except (TypeError, ValueError):
8705 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8708 data_vg = disk.get(constants.IDISK_VG, default_vg)
8710 constants.IDISK_SIZE: size,
8711 constants.IDISK_MODE: mode,
8712 constants.IDISK_VG: data_vg,
8713 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8715 if constants.IDISK_ADOPT in disk:
8716 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8717 self.disks.append(new_disk)
8719 if self.op.mode == constants.INSTANCE_IMPORT:
8721 # Check that the new instance doesn't have less disks than the export
8722 instance_disks = len(self.disks)
8723 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8724 if instance_disks < export_disks:
8725 raise errors.OpPrereqError("Not enough disks to import."
8726 " (instance: %d, export: %d)" %
8727 (instance_disks, export_disks),
8731 for idx in range(export_disks):
8732 option = "disk%d_dump" % idx
8733 if export_info.has_option(constants.INISECT_INS, option):
8734 # FIXME: are the old os-es, disk sizes, etc. useful?
8735 export_name = export_info.get(constants.INISECT_INS, option)
8736 image = utils.PathJoin(self.op.src_path, export_name)
8737 disk_images.append(image)
8739 disk_images.append(False)
8741 self.src_images = disk_images
8743 old_name = export_info.get(constants.INISECT_INS, "name")
8745 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8746 except (TypeError, ValueError), err:
8747 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8748 " an integer: %s" % str(err),
8750 if self.op.instance_name == old_name:
8751 for idx, nic in enumerate(self.nics):
8752 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8753 nic_mac_ini = "nic%d_mac" % idx
8754 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8756 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8758 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8759 if self.op.ip_check:
8760 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8761 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8762 (self.check_ip, self.op.instance_name),
8763 errors.ECODE_NOTUNIQUE)
8765 #### mac address generation
8766 # By generating here the mac address both the allocator and the hooks get
8767 # the real final mac address rather than the 'auto' or 'generate' value.
8768 # There is a race condition between the generation and the instance object
8769 # creation, which means that we know the mac is valid now, but we're not
8770 # sure it will be when we actually add the instance. If things go bad
8771 # adding the instance will abort because of a duplicate mac, and the
8772 # creation job will fail.
8773 for nic in self.nics:
8774 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8775 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8779 if self.op.iallocator is not None:
8780 self._RunAllocator()
8782 #### node related checks
8784 # check primary node
8785 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8786 assert self.pnode is not None, \
8787 "Cannot retrieve locked node %s" % self.op.pnode
8789 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8790 pnode.name, errors.ECODE_STATE)
8792 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8793 pnode.name, errors.ECODE_STATE)
8794 if not pnode.vm_capable:
8795 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8796 " '%s'" % pnode.name, errors.ECODE_STATE)
8798 self.secondaries = []
8800 # mirror node verification
8801 if self.op.disk_template in constants.DTS_INT_MIRROR:
8802 if self.op.snode == pnode.name:
8803 raise errors.OpPrereqError("The secondary node cannot be the"
8804 " primary node", errors.ECODE_INVAL)
8805 _CheckNodeOnline(self, self.op.snode)
8806 _CheckNodeNotDrained(self, self.op.snode)
8807 _CheckNodeVmCapable(self, self.op.snode)
8808 self.secondaries.append(self.op.snode)
8810 nodenames = [pnode.name] + self.secondaries
8812 if not self.adopt_disks:
8813 # Check lv size requirements, if not adopting
8814 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8815 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8817 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8818 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8819 disk[constants.IDISK_ADOPT])
8820 for disk in self.disks])
8821 if len(all_lvs) != len(self.disks):
8822 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8824 for lv_name in all_lvs:
8826 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8827 # to ReserveLV uses the same syntax
8828 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8829 except errors.ReservationError:
8830 raise errors.OpPrereqError("LV named %s used by another instance" %
8831 lv_name, errors.ECODE_NOTUNIQUE)
8833 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8834 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8836 node_lvs = self.rpc.call_lv_list([pnode.name],
8837 vg_names.payload.keys())[pnode.name]
8838 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8839 node_lvs = node_lvs.payload
8841 delta = all_lvs.difference(node_lvs.keys())
8843 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8844 utils.CommaJoin(delta),
8846 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8848 raise errors.OpPrereqError("Online logical volumes found, cannot"
8849 " adopt: %s" % utils.CommaJoin(online_lvs),
8851 # update the size of disk based on what is found
8852 for dsk in self.disks:
8853 dsk[constants.IDISK_SIZE] = \
8854 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8855 dsk[constants.IDISK_ADOPT])][0]))
8857 elif self.op.disk_template == constants.DT_BLOCK:
8858 # Normalize and de-duplicate device paths
8859 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8860 for disk in self.disks])
8861 if len(all_disks) != len(self.disks):
8862 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8864 baddisks = [d for d in all_disks
8865 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8867 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8868 " cannot be adopted" %
8869 (", ".join(baddisks),
8870 constants.ADOPTABLE_BLOCKDEV_ROOT),
8873 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8874 list(all_disks))[pnode.name]
8875 node_disks.Raise("Cannot get block device information from node %s" %
8877 node_disks = node_disks.payload
8878 delta = all_disks.difference(node_disks.keys())
8880 raise errors.OpPrereqError("Missing block device(s): %s" %
8881 utils.CommaJoin(delta),
8883 for dsk in self.disks:
8884 dsk[constants.IDISK_SIZE] = \
8885 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8887 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8889 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8890 # check OS parameters (remotely)
8891 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8893 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8895 # memory check on primary node
8897 _CheckNodeFreeMemory(self, self.pnode.name,
8898 "creating instance %s" % self.op.instance_name,
8899 self.be_full[constants.BE_MEMORY],
8902 self.dry_run_result = list(nodenames)
8904 def Exec(self, feedback_fn):
8905 """Create and add the instance to the cluster.
8908 instance = self.op.instance_name
8909 pnode_name = self.pnode.name
8911 ht_kind = self.op.hypervisor
8912 if ht_kind in constants.HTS_REQ_PORT:
8913 network_port = self.cfg.AllocatePort()
8917 disks = _GenerateDiskTemplate(self,
8918 self.op.disk_template,
8919 instance, pnode_name,
8922 self.instance_file_storage_dir,
8923 self.op.file_driver,
8927 iobj = objects.Instance(name=instance, os=self.op.os_type,
8928 primary_node=pnode_name,
8929 nics=self.nics, disks=disks,
8930 disk_template=self.op.disk_template,
8932 network_port=network_port,
8933 beparams=self.op.beparams,
8934 hvparams=self.op.hvparams,
8935 hypervisor=self.op.hypervisor,
8936 osparams=self.op.osparams,
8940 for tag in self.op.tags:
8943 if self.adopt_disks:
8944 if self.op.disk_template == constants.DT_PLAIN:
8945 # rename LVs to the newly-generated names; we need to construct
8946 # 'fake' LV disks with the old data, plus the new unique_id
8947 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8949 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8950 rename_to.append(t_dsk.logical_id)
8951 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8952 self.cfg.SetDiskID(t_dsk, pnode_name)
8953 result = self.rpc.call_blockdev_rename(pnode_name,
8954 zip(tmp_disks, rename_to))
8955 result.Raise("Failed to rename adoped LVs")
8957 feedback_fn("* creating instance disks...")
8959 _CreateDisks(self, iobj)
8960 except errors.OpExecError:
8961 self.LogWarning("Device creation failed, reverting...")
8963 _RemoveDisks(self, iobj)
8965 self.cfg.ReleaseDRBDMinors(instance)
8968 feedback_fn("adding instance %s to cluster config" % instance)
8970 self.cfg.AddInstance(iobj, self.proc.GetECId())
8972 # Declare that we don't want to remove the instance lock anymore, as we've
8973 # added the instance to the config
8974 del self.remove_locks[locking.LEVEL_INSTANCE]
8976 if self.op.mode == constants.INSTANCE_IMPORT:
8977 # Release unused nodes
8978 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8981 _ReleaseLocks(self, locking.LEVEL_NODE)
8984 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8985 feedback_fn("* wiping instance disks...")
8987 _WipeDisks(self, iobj)
8988 except errors.OpExecError, err:
8989 logging.exception("Wiping disks failed")
8990 self.LogWarning("Wiping instance disks failed (%s)", err)
8994 # Something is already wrong with the disks, don't do anything else
8996 elif self.op.wait_for_sync:
8997 disk_abort = not _WaitForSync(self, iobj)
8998 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8999 # make sure the disks are not degraded (still sync-ing is ok)
9000 feedback_fn("* checking mirrors status")
9001 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9006 _RemoveDisks(self, iobj)
9007 self.cfg.RemoveInstance(iobj.name)
9008 # Make sure the instance lock gets removed
9009 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9010 raise errors.OpExecError("There are some degraded disks for"
9013 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9014 if self.op.mode == constants.INSTANCE_CREATE:
9015 if not self.op.no_install:
9016 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9017 not self.op.wait_for_sync)
9019 feedback_fn("* pausing disk sync to install instance OS")
9020 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9022 for idx, success in enumerate(result.payload):
9024 logging.warn("pause-sync of instance %s for disk %d failed",
9027 feedback_fn("* running the instance OS create scripts...")
9028 # FIXME: pass debug option from opcode to backend
9029 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9030 self.op.debug_level)
9032 feedback_fn("* resuming disk sync")
9033 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9035 for idx, success in enumerate(result.payload):
9037 logging.warn("resume-sync of instance %s for disk %d failed",
9040 result.Raise("Could not add os for instance %s"
9041 " on node %s" % (instance, pnode_name))
9043 elif self.op.mode == constants.INSTANCE_IMPORT:
9044 feedback_fn("* running the instance OS import scripts...")
9048 for idx, image in enumerate(self.src_images):
9052 # FIXME: pass debug option from opcode to backend
9053 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9054 constants.IEIO_FILE, (image, ),
9055 constants.IEIO_SCRIPT,
9056 (iobj.disks[idx], idx),
9058 transfers.append(dt)
9061 masterd.instance.TransferInstanceData(self, feedback_fn,
9062 self.op.src_node, pnode_name,
9063 self.pnode.secondary_ip,
9065 if not compat.all(import_result):
9066 self.LogWarning("Some disks for instance %s on node %s were not"
9067 " imported successfully" % (instance, pnode_name))
9069 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9070 feedback_fn("* preparing remote import...")
9071 # The source cluster will stop the instance before attempting to make a
9072 # connection. In some cases stopping an instance can take a long time,
9073 # hence the shutdown timeout is added to the connection timeout.
9074 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9075 self.op.source_shutdown_timeout)
9076 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9078 assert iobj.primary_node == self.pnode.name
9080 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9081 self.source_x509_ca,
9082 self._cds, timeouts)
9083 if not compat.all(disk_results):
9084 # TODO: Should the instance still be started, even if some disks
9085 # failed to import (valid for local imports, too)?
9086 self.LogWarning("Some disks for instance %s on node %s were not"
9087 " imported successfully" % (instance, pnode_name))
9089 # Run rename script on newly imported instance
9090 assert iobj.name == instance
9091 feedback_fn("Running rename script for %s" % instance)
9092 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9093 self.source_instance_name,
9094 self.op.debug_level)
9096 self.LogWarning("Failed to run rename script for %s on node"
9097 " %s: %s" % (instance, pnode_name, result.fail_msg))
9100 # also checked in the prereq part
9101 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9105 iobj.admin_up = True
9106 self.cfg.Update(iobj, feedback_fn)
9107 logging.info("Starting instance %s on node %s", instance, pnode_name)
9108 feedback_fn("* starting instance...")
9109 result = self.rpc.call_instance_start(pnode_name, iobj,
9111 result.Raise("Could not start instance")
9113 return list(iobj.all_nodes)
9116 class LUInstanceConsole(NoHooksLU):
9117 """Connect to an instance's console.
9119 This is somewhat special in that it returns the command line that
9120 you need to run on the master node in order to connect to the
9126 def ExpandNames(self):
9127 self._ExpandAndLockInstance()
9129 def CheckPrereq(self):
9130 """Check prerequisites.
9132 This checks that the instance is in the cluster.
9135 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9136 assert self.instance is not None, \
9137 "Cannot retrieve locked instance %s" % self.op.instance_name
9138 _CheckNodeOnline(self, self.instance.primary_node)
9140 def Exec(self, feedback_fn):
9141 """Connect to the console of an instance
9144 instance = self.instance
9145 node = instance.primary_node
9147 node_insts = self.rpc.call_instance_list([node],
9148 [instance.hypervisor])[node]
9149 node_insts.Raise("Can't get node information from %s" % node)
9151 if instance.name not in node_insts.payload:
9152 if instance.admin_up:
9153 state = constants.INSTST_ERRORDOWN
9155 state = constants.INSTST_ADMINDOWN
9156 raise errors.OpExecError("Instance %s is not running (state %s)" %
9157 (instance.name, state))
9159 logging.debug("Connecting to console of %s on %s", instance.name, node)
9161 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9164 def _GetInstanceConsole(cluster, instance):
9165 """Returns console information for an instance.
9167 @type cluster: L{objects.Cluster}
9168 @type instance: L{objects.Instance}
9172 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9173 # beparams and hvparams are passed separately, to avoid editing the
9174 # instance and then saving the defaults in the instance itself.
9175 hvparams = cluster.FillHV(instance)
9176 beparams = cluster.FillBE(instance)
9177 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9179 assert console.instance == instance.name
9180 assert console.Validate()
9182 return console.ToDict()
9185 class LUInstanceReplaceDisks(LogicalUnit):
9186 """Replace the disks of an instance.
9189 HPATH = "mirrors-replace"
9190 HTYPE = constants.HTYPE_INSTANCE
9193 def CheckArguments(self):
9194 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9197 def ExpandNames(self):
9198 self._ExpandAndLockInstance()
9200 assert locking.LEVEL_NODE not in self.needed_locks
9201 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9203 assert self.op.iallocator is None or self.op.remote_node is None, \
9204 "Conflicting options"
9206 if self.op.remote_node is not None:
9207 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9209 # Warning: do not remove the locking of the new secondary here
9210 # unless DRBD8.AddChildren is changed to work in parallel;
9211 # currently it doesn't since parallel invocations of
9212 # FindUnusedMinor will conflict
9213 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9214 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9216 self.needed_locks[locking.LEVEL_NODE] = []
9217 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9219 if self.op.iallocator is not None:
9220 # iallocator will select a new node in the same group
9221 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9223 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9224 self.op.iallocator, self.op.remote_node,
9225 self.op.disks, False, self.op.early_release)
9227 self.tasklets = [self.replacer]
9229 def DeclareLocks(self, level):
9230 if level == locking.LEVEL_NODEGROUP:
9231 assert self.op.remote_node is None
9232 assert self.op.iallocator is not None
9233 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9235 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9236 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9237 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9239 elif level == locking.LEVEL_NODE:
9240 if self.op.iallocator is not None:
9241 assert self.op.remote_node is None
9242 assert not self.needed_locks[locking.LEVEL_NODE]
9244 # Lock member nodes of all locked groups
9245 self.needed_locks[locking.LEVEL_NODE] = [node_name
9246 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9247 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9249 self._LockInstancesNodes()
9251 def BuildHooksEnv(self):
9254 This runs on the master, the primary and all the secondaries.
9257 instance = self.replacer.instance
9259 "MODE": self.op.mode,
9260 "NEW_SECONDARY": self.op.remote_node,
9261 "OLD_SECONDARY": instance.secondary_nodes[0],
9263 env.update(_BuildInstanceHookEnvByObject(self, instance))
9266 def BuildHooksNodes(self):
9267 """Build hooks nodes.
9270 instance = self.replacer.instance
9272 self.cfg.GetMasterNode(),
9273 instance.primary_node,
9275 if self.op.remote_node is not None:
9276 nl.append(self.op.remote_node)
9279 def CheckPrereq(self):
9280 """Check prerequisites.
9283 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9284 self.op.iallocator is None)
9286 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9288 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9290 return LogicalUnit.CheckPrereq(self)
9293 class TLReplaceDisks(Tasklet):
9294 """Replaces disks for an instance.
9296 Note: Locking is not within the scope of this class.
9299 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9300 disks, delay_iallocator, early_release):
9301 """Initializes this class.
9304 Tasklet.__init__(self, lu)
9307 self.instance_name = instance_name
9309 self.iallocator_name = iallocator_name
9310 self.remote_node = remote_node
9312 self.delay_iallocator = delay_iallocator
9313 self.early_release = early_release
9316 self.instance = None
9317 self.new_node = None
9318 self.target_node = None
9319 self.other_node = None
9320 self.remote_node_info = None
9321 self.node_secondary_ip = None
9324 def CheckArguments(mode, remote_node, iallocator):
9325 """Helper function for users of this class.
9328 # check for valid parameter combination
9329 if mode == constants.REPLACE_DISK_CHG:
9330 if remote_node is None and iallocator is None:
9331 raise errors.OpPrereqError("When changing the secondary either an"
9332 " iallocator script must be used or the"
9333 " new node given", errors.ECODE_INVAL)
9335 if remote_node is not None and iallocator is not None:
9336 raise errors.OpPrereqError("Give either the iallocator or the new"
9337 " secondary, not both", errors.ECODE_INVAL)
9339 elif remote_node is not None or iallocator is not None:
9340 # Not replacing the secondary
9341 raise errors.OpPrereqError("The iallocator and new node options can"
9342 " only be used when changing the"
9343 " secondary node", errors.ECODE_INVAL)
9346 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9347 """Compute a new secondary node using an IAllocator.
9350 ial = IAllocator(lu.cfg, lu.rpc,
9351 mode=constants.IALLOCATOR_MODE_RELOC,
9353 relocate_from=list(relocate_from))
9355 ial.Run(iallocator_name)
9358 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9359 " %s" % (iallocator_name, ial.info),
9362 if len(ial.result) != ial.required_nodes:
9363 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9364 " of nodes (%s), required %s" %
9366 len(ial.result), ial.required_nodes),
9369 remote_node_name = ial.result[0]
9371 lu.LogInfo("Selected new secondary for instance '%s': %s",
9372 instance_name, remote_node_name)
9374 return remote_node_name
9376 def _FindFaultyDisks(self, node_name):
9377 """Wrapper for L{_FindFaultyInstanceDisks}.
9380 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9383 def _CheckDisksActivated(self, instance):
9384 """Checks if the instance disks are activated.
9386 @param instance: The instance to check disks
9387 @return: True if they are activated, False otherwise
9390 nodes = instance.all_nodes
9392 for idx, dev in enumerate(instance.disks):
9394 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9395 self.cfg.SetDiskID(dev, node)
9397 result = self.rpc.call_blockdev_find(node, dev)
9401 elif result.fail_msg or not result.payload:
9406 def CheckPrereq(self):
9407 """Check prerequisites.
9409 This checks that the instance is in the cluster.
9412 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9413 assert instance is not None, \
9414 "Cannot retrieve locked instance %s" % self.instance_name
9416 if instance.disk_template != constants.DT_DRBD8:
9417 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9418 " instances", errors.ECODE_INVAL)
9420 if len(instance.secondary_nodes) != 1:
9421 raise errors.OpPrereqError("The instance has a strange layout,"
9422 " expected one secondary but found %d" %
9423 len(instance.secondary_nodes),
9426 if not self.delay_iallocator:
9427 self._CheckPrereq2()
9429 def _CheckPrereq2(self):
9430 """Check prerequisites, second part.
9432 This function should always be part of CheckPrereq. It was separated and is
9433 now called from Exec because during node evacuation iallocator was only
9434 called with an unmodified cluster model, not taking planned changes into
9438 instance = self.instance
9439 secondary_node = instance.secondary_nodes[0]
9441 if self.iallocator_name is None:
9442 remote_node = self.remote_node
9444 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9445 instance.name, instance.secondary_nodes)
9447 if remote_node is None:
9448 self.remote_node_info = None
9450 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9451 "Remote node '%s' is not locked" % remote_node
9453 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9454 assert self.remote_node_info is not None, \
9455 "Cannot retrieve locked node %s" % remote_node
9457 if remote_node == self.instance.primary_node:
9458 raise errors.OpPrereqError("The specified node is the primary node of"
9459 " the instance", errors.ECODE_INVAL)
9461 if remote_node == secondary_node:
9462 raise errors.OpPrereqError("The specified node is already the"
9463 " secondary node of the instance",
9466 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9467 constants.REPLACE_DISK_CHG):
9468 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9471 if self.mode == constants.REPLACE_DISK_AUTO:
9472 if not self._CheckDisksActivated(instance):
9473 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9474 " first" % self.instance_name,
9476 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9477 faulty_secondary = self._FindFaultyDisks(secondary_node)
9479 if faulty_primary and faulty_secondary:
9480 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9481 " one node and can not be repaired"
9482 " automatically" % self.instance_name,
9486 self.disks = faulty_primary
9487 self.target_node = instance.primary_node
9488 self.other_node = secondary_node
9489 check_nodes = [self.target_node, self.other_node]
9490 elif faulty_secondary:
9491 self.disks = faulty_secondary
9492 self.target_node = secondary_node
9493 self.other_node = instance.primary_node
9494 check_nodes = [self.target_node, self.other_node]
9500 # Non-automatic modes
9501 if self.mode == constants.REPLACE_DISK_PRI:
9502 self.target_node = instance.primary_node
9503 self.other_node = secondary_node
9504 check_nodes = [self.target_node, self.other_node]
9506 elif self.mode == constants.REPLACE_DISK_SEC:
9507 self.target_node = secondary_node
9508 self.other_node = instance.primary_node
9509 check_nodes = [self.target_node, self.other_node]
9511 elif self.mode == constants.REPLACE_DISK_CHG:
9512 self.new_node = remote_node
9513 self.other_node = instance.primary_node
9514 self.target_node = secondary_node
9515 check_nodes = [self.new_node, self.other_node]
9517 _CheckNodeNotDrained(self.lu, remote_node)
9518 _CheckNodeVmCapable(self.lu, remote_node)
9520 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9521 assert old_node_info is not None
9522 if old_node_info.offline and not self.early_release:
9523 # doesn't make sense to delay the release
9524 self.early_release = True
9525 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9526 " early-release mode", secondary_node)
9529 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9532 # If not specified all disks should be replaced
9534 self.disks = range(len(self.instance.disks))
9536 for node in check_nodes:
9537 _CheckNodeOnline(self.lu, node)
9539 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9542 if node_name is not None)
9544 # Release unneeded node locks
9545 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9547 # Release any owned node group
9548 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9549 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9551 # Check whether disks are valid
9552 for disk_idx in self.disks:
9553 instance.FindDisk(disk_idx)
9555 # Get secondary node IP addresses
9556 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9557 in self.cfg.GetMultiNodeInfo(touched_nodes))
9559 def Exec(self, feedback_fn):
9560 """Execute disk replacement.
9562 This dispatches the disk replacement to the appropriate handler.
9565 if self.delay_iallocator:
9566 self._CheckPrereq2()
9569 # Verify owned locks before starting operation
9570 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9571 assert set(owned_nodes) == set(self.node_secondary_ip), \
9572 ("Incorrect node locks, owning %s, expected %s" %
9573 (owned_nodes, self.node_secondary_ip.keys()))
9575 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9576 assert list(owned_instances) == [self.instance_name], \
9577 "Instance '%s' not locked" % self.instance_name
9579 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9580 "Should not own any node group lock at this point"
9583 feedback_fn("No disks need replacement")
9586 feedback_fn("Replacing disk(s) %s for %s" %
9587 (utils.CommaJoin(self.disks), self.instance.name))
9589 activate_disks = (not self.instance.admin_up)
9591 # Activate the instance disks if we're replacing them on a down instance
9593 _StartInstanceDisks(self.lu, self.instance, True)
9596 # Should we replace the secondary node?
9597 if self.new_node is not None:
9598 fn = self._ExecDrbd8Secondary
9600 fn = self._ExecDrbd8DiskOnly
9602 result = fn(feedback_fn)
9604 # Deactivate the instance disks if we're replacing them on a
9607 _SafeShutdownInstanceDisks(self.lu, self.instance)
9610 # Verify owned locks
9611 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9612 nodes = frozenset(self.node_secondary_ip)
9613 assert ((self.early_release and not owned_nodes) or
9614 (not self.early_release and not (set(owned_nodes) - nodes))), \
9615 ("Not owning the correct locks, early_release=%s, owned=%r,"
9616 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9620 def _CheckVolumeGroup(self, nodes):
9621 self.lu.LogInfo("Checking volume groups")
9623 vgname = self.cfg.GetVGName()
9625 # Make sure volume group exists on all involved nodes
9626 results = self.rpc.call_vg_list(nodes)
9628 raise errors.OpExecError("Can't list volume groups on the nodes")
9632 res.Raise("Error checking node %s" % node)
9633 if vgname not in res.payload:
9634 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9637 def _CheckDisksExistence(self, nodes):
9638 # Check disk existence
9639 for idx, dev in enumerate(self.instance.disks):
9640 if idx not in self.disks:
9644 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9645 self.cfg.SetDiskID(dev, node)
9647 result = self.rpc.call_blockdev_find(node, dev)
9649 msg = result.fail_msg
9650 if msg or not result.payload:
9652 msg = "disk not found"
9653 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9656 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9657 for idx, dev in enumerate(self.instance.disks):
9658 if idx not in self.disks:
9661 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9664 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9666 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9667 " replace disks for instance %s" %
9668 (node_name, self.instance.name))
9670 def _CreateNewStorage(self, node_name):
9671 """Create new storage on the primary or secondary node.
9673 This is only used for same-node replaces, not for changing the
9674 secondary node, hence we don't want to modify the existing disk.
9679 for idx, dev in enumerate(self.instance.disks):
9680 if idx not in self.disks:
9683 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9685 self.cfg.SetDiskID(dev, node_name)
9687 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9688 names = _GenerateUniqueNames(self.lu, lv_names)
9690 vg_data = dev.children[0].logical_id[0]
9691 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9692 logical_id=(vg_data, names[0]))
9693 vg_meta = dev.children[1].logical_id[0]
9694 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9695 logical_id=(vg_meta, names[1]))
9697 new_lvs = [lv_data, lv_meta]
9698 old_lvs = [child.Copy() for child in dev.children]
9699 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9701 # we pass force_create=True to force the LVM creation
9702 for new_lv in new_lvs:
9703 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9704 _GetInstanceInfoText(self.instance), False)
9708 def _CheckDevices(self, node_name, iv_names):
9709 for name, (dev, _, _) in iv_names.iteritems():
9710 self.cfg.SetDiskID(dev, node_name)
9712 result = self.rpc.call_blockdev_find(node_name, dev)
9714 msg = result.fail_msg
9715 if msg or not result.payload:
9717 msg = "disk not found"
9718 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9721 if result.payload.is_degraded:
9722 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9724 def _RemoveOldStorage(self, node_name, iv_names):
9725 for name, (_, old_lvs, _) in iv_names.iteritems():
9726 self.lu.LogInfo("Remove logical volumes for %s" % name)
9729 self.cfg.SetDiskID(lv, node_name)
9731 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9733 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9734 hint="remove unused LVs manually")
9736 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9737 """Replace a disk on the primary or secondary for DRBD 8.
9739 The algorithm for replace is quite complicated:
9741 1. for each disk to be replaced:
9743 1. create new LVs on the target node with unique names
9744 1. detach old LVs from the drbd device
9745 1. rename old LVs to name_replaced.<time_t>
9746 1. rename new LVs to old LVs
9747 1. attach the new LVs (with the old names now) to the drbd device
9749 1. wait for sync across all devices
9751 1. for each modified disk:
9753 1. remove old LVs (which have the name name_replaces.<time_t>)
9755 Failures are not very well handled.
9760 # Step: check device activation
9761 self.lu.LogStep(1, steps_total, "Check device existence")
9762 self._CheckDisksExistence([self.other_node, self.target_node])
9763 self._CheckVolumeGroup([self.target_node, self.other_node])
9765 # Step: check other node consistency
9766 self.lu.LogStep(2, steps_total, "Check peer consistency")
9767 self._CheckDisksConsistency(self.other_node,
9768 self.other_node == self.instance.primary_node,
9771 # Step: create new storage
9772 self.lu.LogStep(3, steps_total, "Allocate new storage")
9773 iv_names = self._CreateNewStorage(self.target_node)
9775 # Step: for each lv, detach+rename*2+attach
9776 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9777 for dev, old_lvs, new_lvs in iv_names.itervalues():
9778 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9780 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9782 result.Raise("Can't detach drbd from local storage on node"
9783 " %s for device %s" % (self.target_node, dev.iv_name))
9785 #cfg.Update(instance)
9787 # ok, we created the new LVs, so now we know we have the needed
9788 # storage; as such, we proceed on the target node to rename
9789 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9790 # using the assumption that logical_id == physical_id (which in
9791 # turn is the unique_id on that node)
9793 # FIXME(iustin): use a better name for the replaced LVs
9794 temp_suffix = int(time.time())
9795 ren_fn = lambda d, suff: (d.physical_id[0],
9796 d.physical_id[1] + "_replaced-%s" % suff)
9798 # Build the rename list based on what LVs exist on the node
9799 rename_old_to_new = []
9800 for to_ren in old_lvs:
9801 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9802 if not result.fail_msg and result.payload:
9804 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9806 self.lu.LogInfo("Renaming the old LVs on the target node")
9807 result = self.rpc.call_blockdev_rename(self.target_node,
9809 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9811 # Now we rename the new LVs to the old LVs
9812 self.lu.LogInfo("Renaming the new LVs on the target node")
9813 rename_new_to_old = [(new, old.physical_id)
9814 for old, new in zip(old_lvs, new_lvs)]
9815 result = self.rpc.call_blockdev_rename(self.target_node,
9817 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9819 # Intermediate steps of in memory modifications
9820 for old, new in zip(old_lvs, new_lvs):
9821 new.logical_id = old.logical_id
9822 self.cfg.SetDiskID(new, self.target_node)
9824 # We need to modify old_lvs so that removal later removes the
9825 # right LVs, not the newly added ones; note that old_lvs is a
9827 for disk in old_lvs:
9828 disk.logical_id = ren_fn(disk, temp_suffix)
9829 self.cfg.SetDiskID(disk, self.target_node)
9831 # Now that the new lvs have the old name, we can add them to the device
9832 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9833 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9835 msg = result.fail_msg
9837 for new_lv in new_lvs:
9838 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9841 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9842 hint=("cleanup manually the unused logical"
9844 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9847 if self.early_release:
9848 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9850 self._RemoveOldStorage(self.target_node, iv_names)
9851 # WARNING: we release both node locks here, do not do other RPCs
9852 # than WaitForSync to the primary node
9853 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9854 names=[self.target_node, self.other_node])
9857 # This can fail as the old devices are degraded and _WaitForSync
9858 # does a combined result over all disks, so we don't check its return value
9859 self.lu.LogStep(cstep, steps_total, "Sync devices")
9861 _WaitForSync(self.lu, self.instance)
9863 # Check all devices manually
9864 self._CheckDevices(self.instance.primary_node, iv_names)
9866 # Step: remove old storage
9867 if not self.early_release:
9868 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9870 self._RemoveOldStorage(self.target_node, iv_names)
9872 def _ExecDrbd8Secondary(self, feedback_fn):
9873 """Replace the secondary node for DRBD 8.
9875 The algorithm for replace is quite complicated:
9876 - for all disks of the instance:
9877 - create new LVs on the new node with same names
9878 - shutdown the drbd device on the old secondary
9879 - disconnect the drbd network on the primary
9880 - create the drbd device on the new secondary
9881 - network attach the drbd on the primary, using an artifice:
9882 the drbd code for Attach() will connect to the network if it
9883 finds a device which is connected to the good local disks but
9885 - wait for sync across all devices
9886 - remove all disks from the old secondary
9888 Failures are not very well handled.
9893 pnode = self.instance.primary_node
9895 # Step: check device activation
9896 self.lu.LogStep(1, steps_total, "Check device existence")
9897 self._CheckDisksExistence([self.instance.primary_node])
9898 self._CheckVolumeGroup([self.instance.primary_node])
9900 # Step: check other node consistency
9901 self.lu.LogStep(2, steps_total, "Check peer consistency")
9902 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9904 # Step: create new storage
9905 self.lu.LogStep(3, steps_total, "Allocate new storage")
9906 for idx, dev in enumerate(self.instance.disks):
9907 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9908 (self.new_node, idx))
9909 # we pass force_create=True to force LVM creation
9910 for new_lv in dev.children:
9911 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9912 _GetInstanceInfoText(self.instance), False)
9914 # Step 4: dbrd minors and drbd setups changes
9915 # after this, we must manually remove the drbd minors on both the
9916 # error and the success paths
9917 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9918 minors = self.cfg.AllocateDRBDMinor([self.new_node
9919 for dev in self.instance.disks],
9921 logging.debug("Allocated minors %r", minors)
9924 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9925 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9926 (self.new_node, idx))
9927 # create new devices on new_node; note that we create two IDs:
9928 # one without port, so the drbd will be activated without
9929 # networking information on the new node at this stage, and one
9930 # with network, for the latter activation in step 4
9931 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9932 if self.instance.primary_node == o_node1:
9935 assert self.instance.primary_node == o_node2, "Three-node instance?"
9938 new_alone_id = (self.instance.primary_node, self.new_node, None,
9939 p_minor, new_minor, o_secret)
9940 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9941 p_minor, new_minor, o_secret)
9943 iv_names[idx] = (dev, dev.children, new_net_id)
9944 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9946 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9947 logical_id=new_alone_id,
9948 children=dev.children,
9951 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9952 _GetInstanceInfoText(self.instance), False)
9953 except errors.GenericError:
9954 self.cfg.ReleaseDRBDMinors(self.instance.name)
9957 # We have new devices, shutdown the drbd on the old secondary
9958 for idx, dev in enumerate(self.instance.disks):
9959 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9960 self.cfg.SetDiskID(dev, self.target_node)
9961 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9963 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9964 "node: %s" % (idx, msg),
9965 hint=("Please cleanup this device manually as"
9966 " soon as possible"))
9968 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9969 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9970 self.instance.disks)[pnode]
9972 msg = result.fail_msg
9974 # detaches didn't succeed (unlikely)
9975 self.cfg.ReleaseDRBDMinors(self.instance.name)
9976 raise errors.OpExecError("Can't detach the disks from the network on"
9977 " old node: %s" % (msg,))
9979 # if we managed to detach at least one, we update all the disks of
9980 # the instance to point to the new secondary
9981 self.lu.LogInfo("Updating instance configuration")
9982 for dev, _, new_logical_id in iv_names.itervalues():
9983 dev.logical_id = new_logical_id
9984 self.cfg.SetDiskID(dev, self.instance.primary_node)
9986 self.cfg.Update(self.instance, feedback_fn)
9988 # and now perform the drbd attach
9989 self.lu.LogInfo("Attaching primary drbds to new secondary"
9990 " (standalone => connected)")
9991 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9993 self.node_secondary_ip,
9994 self.instance.disks,
9997 for to_node, to_result in result.items():
9998 msg = to_result.fail_msg
10000 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10002 hint=("please do a gnt-instance info to see the"
10003 " status of disks"))
10005 if self.early_release:
10006 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10008 self._RemoveOldStorage(self.target_node, iv_names)
10009 # WARNING: we release all node locks here, do not do other RPCs
10010 # than WaitForSync to the primary node
10011 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10012 names=[self.instance.primary_node,
10017 # This can fail as the old devices are degraded and _WaitForSync
10018 # does a combined result over all disks, so we don't check its return value
10019 self.lu.LogStep(cstep, steps_total, "Sync devices")
10021 _WaitForSync(self.lu, self.instance)
10023 # Check all devices manually
10024 self._CheckDevices(self.instance.primary_node, iv_names)
10026 # Step: remove old storage
10027 if not self.early_release:
10028 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10029 self._RemoveOldStorage(self.target_node, iv_names)
10032 class LURepairNodeStorage(NoHooksLU):
10033 """Repairs the volume group on a node.
10038 def CheckArguments(self):
10039 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10041 storage_type = self.op.storage_type
10043 if (constants.SO_FIX_CONSISTENCY not in
10044 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10045 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10046 " repaired" % storage_type,
10047 errors.ECODE_INVAL)
10049 def ExpandNames(self):
10050 self.needed_locks = {
10051 locking.LEVEL_NODE: [self.op.node_name],
10054 def _CheckFaultyDisks(self, instance, node_name):
10055 """Ensure faulty disks abort the opcode or at least warn."""
10057 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10059 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10060 " node '%s'" % (instance.name, node_name),
10061 errors.ECODE_STATE)
10062 except errors.OpPrereqError, err:
10063 if self.op.ignore_consistency:
10064 self.proc.LogWarning(str(err.args[0]))
10068 def CheckPrereq(self):
10069 """Check prerequisites.
10072 # Check whether any instance on this node has faulty disks
10073 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10074 if not inst.admin_up:
10076 check_nodes = set(inst.all_nodes)
10077 check_nodes.discard(self.op.node_name)
10078 for inst_node_name in check_nodes:
10079 self._CheckFaultyDisks(inst, inst_node_name)
10081 def Exec(self, feedback_fn):
10082 feedback_fn("Repairing storage unit '%s' on %s ..." %
10083 (self.op.name, self.op.node_name))
10085 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10086 result = self.rpc.call_storage_execute(self.op.node_name,
10087 self.op.storage_type, st_args,
10089 constants.SO_FIX_CONSISTENCY)
10090 result.Raise("Failed to repair storage unit '%s' on %s" %
10091 (self.op.name, self.op.node_name))
10094 class LUNodeEvacuate(NoHooksLU):
10095 """Evacuates instances off a list of nodes.
10100 def CheckArguments(self):
10101 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10103 def ExpandNames(self):
10104 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10106 if self.op.remote_node is not None:
10107 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10108 assert self.op.remote_node
10110 if self.op.remote_node == self.op.node_name:
10111 raise errors.OpPrereqError("Can not use evacuated node as a new"
10112 " secondary node", errors.ECODE_INVAL)
10114 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10115 raise errors.OpPrereqError("Without the use of an iallocator only"
10116 " secondary instances can be evacuated",
10117 errors.ECODE_INVAL)
10120 self.share_locks = _ShareAll()
10121 self.needed_locks = {
10122 locking.LEVEL_INSTANCE: [],
10123 locking.LEVEL_NODEGROUP: [],
10124 locking.LEVEL_NODE: [],
10127 # Determine nodes (via group) optimistically, needs verification once locks
10128 # have been acquired
10129 self.lock_nodes = self._DetermineNodes()
10131 def _DetermineNodes(self):
10132 """Gets the list of nodes to operate on.
10135 if self.op.remote_node is None:
10136 # Iallocator will choose any node(s) in the same group
10137 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10139 group_nodes = frozenset([self.op.remote_node])
10141 # Determine nodes to be locked
10142 return set([self.op.node_name]) | group_nodes
10144 def _DetermineInstances(self):
10145 """Builds list of instances to operate on.
10148 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10150 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10151 # Primary instances only
10152 inst_fn = _GetNodePrimaryInstances
10153 assert self.op.remote_node is None, \
10154 "Evacuating primary instances requires iallocator"
10155 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10156 # Secondary instances only
10157 inst_fn = _GetNodeSecondaryInstances
10160 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10161 inst_fn = _GetNodeInstances
10162 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10164 raise errors.OpPrereqError("Due to an issue with the iallocator"
10165 " interface it is not possible to evacuate"
10166 " all instances at once; specify explicitly"
10167 " whether to evacuate primary or secondary"
10169 errors.ECODE_INVAL)
10171 return inst_fn(self.cfg, self.op.node_name)
10173 def DeclareLocks(self, level):
10174 if level == locking.LEVEL_INSTANCE:
10175 # Lock instances optimistically, needs verification once node and group
10176 # locks have been acquired
10177 self.needed_locks[locking.LEVEL_INSTANCE] = \
10178 set(i.name for i in self._DetermineInstances())
10180 elif level == locking.LEVEL_NODEGROUP:
10181 # Lock node groups for all potential target nodes optimistically, needs
10182 # verification once nodes have been acquired
10183 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10184 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10186 elif level == locking.LEVEL_NODE:
10187 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10189 def CheckPrereq(self):
10191 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10192 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10193 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10195 need_nodes = self._DetermineNodes()
10197 if not owned_nodes.issuperset(need_nodes):
10198 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10199 " locks were acquired, current nodes are"
10200 " are '%s', used to be '%s'; retry the"
10202 (self.op.node_name,
10203 utils.CommaJoin(need_nodes),
10204 utils.CommaJoin(owned_nodes)),
10205 errors.ECODE_STATE)
10207 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10208 if owned_groups != wanted_groups:
10209 raise errors.OpExecError("Node groups changed since locks were acquired,"
10210 " current groups are '%s', used to be '%s';"
10211 " retry the operation" %
10212 (utils.CommaJoin(wanted_groups),
10213 utils.CommaJoin(owned_groups)))
10215 # Determine affected instances
10216 self.instances = self._DetermineInstances()
10217 self.instance_names = [i.name for i in self.instances]
10219 if set(self.instance_names) != owned_instances:
10220 raise errors.OpExecError("Instances on node '%s' changed since locks"
10221 " were acquired, current instances are '%s',"
10222 " used to be '%s'; retry the operation" %
10223 (self.op.node_name,
10224 utils.CommaJoin(self.instance_names),
10225 utils.CommaJoin(owned_instances)))
10227 if self.instance_names:
10228 self.LogInfo("Evacuating instances from node '%s': %s",
10230 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10232 self.LogInfo("No instances to evacuate from node '%s'",
10235 if self.op.remote_node is not None:
10236 for i in self.instances:
10237 if i.primary_node == self.op.remote_node:
10238 raise errors.OpPrereqError("Node %s is the primary node of"
10239 " instance %s, cannot use it as"
10241 (self.op.remote_node, i.name),
10242 errors.ECODE_INVAL)
10244 def Exec(self, feedback_fn):
10245 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10247 if not self.instance_names:
10248 # No instances to evacuate
10251 elif self.op.iallocator is not None:
10252 # TODO: Implement relocation to other group
10253 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10254 evac_mode=self.op.mode,
10255 instances=list(self.instance_names))
10257 ial.Run(self.op.iallocator)
10259 if not ial.success:
10260 raise errors.OpPrereqError("Can't compute node evacuation using"
10261 " iallocator '%s': %s" %
10262 (self.op.iallocator, ial.info),
10263 errors.ECODE_NORES)
10265 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10267 elif self.op.remote_node is not None:
10268 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10270 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10271 remote_node=self.op.remote_node,
10273 mode=constants.REPLACE_DISK_CHG,
10274 early_release=self.op.early_release)]
10275 for instance_name in self.instance_names
10279 raise errors.ProgrammerError("No iallocator or remote node")
10281 return ResultWithJobs(jobs)
10284 def _SetOpEarlyRelease(early_release, op):
10285 """Sets C{early_release} flag on opcodes if available.
10289 op.early_release = early_release
10290 except AttributeError:
10291 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10296 def _NodeEvacDest(use_nodes, group, nodes):
10297 """Returns group or nodes depending on caller's choice.
10301 return utils.CommaJoin(nodes)
10306 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10307 """Unpacks the result of change-group and node-evacuate iallocator requests.
10309 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10310 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10312 @type lu: L{LogicalUnit}
10313 @param lu: Logical unit instance
10314 @type alloc_result: tuple/list
10315 @param alloc_result: Result from iallocator
10316 @type early_release: bool
10317 @param early_release: Whether to release locks early if possible
10318 @type use_nodes: bool
10319 @param use_nodes: Whether to display node names instead of groups
10322 (moved, failed, jobs) = alloc_result
10325 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10326 for (name, reason) in failed)
10327 lu.LogWarning("Unable to evacuate instances %s", failreason)
10328 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10331 lu.LogInfo("Instances to be moved: %s",
10332 utils.CommaJoin("%s (to %s)" %
10333 (name, _NodeEvacDest(use_nodes, group, nodes))
10334 for (name, group, nodes) in moved))
10336 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10337 map(opcodes.OpCode.LoadOpCode, ops))
10341 class LUInstanceGrowDisk(LogicalUnit):
10342 """Grow a disk of an instance.
10345 HPATH = "disk-grow"
10346 HTYPE = constants.HTYPE_INSTANCE
10349 def ExpandNames(self):
10350 self._ExpandAndLockInstance()
10351 self.needed_locks[locking.LEVEL_NODE] = []
10352 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10354 def DeclareLocks(self, level):
10355 if level == locking.LEVEL_NODE:
10356 self._LockInstancesNodes()
10358 def BuildHooksEnv(self):
10359 """Build hooks env.
10361 This runs on the master, the primary and all the secondaries.
10365 "DISK": self.op.disk,
10366 "AMOUNT": self.op.amount,
10368 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10371 def BuildHooksNodes(self):
10372 """Build hooks nodes.
10375 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10378 def CheckPrereq(self):
10379 """Check prerequisites.
10381 This checks that the instance is in the cluster.
10384 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10385 assert instance is not None, \
10386 "Cannot retrieve locked instance %s" % self.op.instance_name
10387 nodenames = list(instance.all_nodes)
10388 for node in nodenames:
10389 _CheckNodeOnline(self, node)
10391 self.instance = instance
10393 if instance.disk_template not in constants.DTS_GROWABLE:
10394 raise errors.OpPrereqError("Instance's disk layout does not support"
10395 " growing", errors.ECODE_INVAL)
10397 self.disk = instance.FindDisk(self.op.disk)
10399 if instance.disk_template not in (constants.DT_FILE,
10400 constants.DT_SHARED_FILE):
10401 # TODO: check the free disk space for file, when that feature will be
10403 _CheckNodesFreeDiskPerVG(self, nodenames,
10404 self.disk.ComputeGrowth(self.op.amount))
10406 def Exec(self, feedback_fn):
10407 """Execute disk grow.
10410 instance = self.instance
10413 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10415 raise errors.OpExecError("Cannot activate block device to grow")
10417 # First run all grow ops in dry-run mode
10418 for node in instance.all_nodes:
10419 self.cfg.SetDiskID(disk, node)
10420 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10421 result.Raise("Grow request failed to node %s" % node)
10423 # We know that (as far as we can test) operations across different
10424 # nodes will succeed, time to run it for real
10425 for node in instance.all_nodes:
10426 self.cfg.SetDiskID(disk, node)
10427 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10428 result.Raise("Grow request failed to node %s" % node)
10430 # TODO: Rewrite code to work properly
10431 # DRBD goes into sync mode for a short amount of time after executing the
10432 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10433 # calling "resize" in sync mode fails. Sleeping for a short amount of
10434 # time is a work-around.
10437 disk.RecordGrow(self.op.amount)
10438 self.cfg.Update(instance, feedback_fn)
10439 if self.op.wait_for_sync:
10440 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10442 self.proc.LogWarning("Disk sync-ing has not returned a good"
10443 " status; please check the instance")
10444 if not instance.admin_up:
10445 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10446 elif not instance.admin_up:
10447 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10448 " not supposed to be running because no wait for"
10449 " sync mode was requested")
10452 class LUInstanceQueryData(NoHooksLU):
10453 """Query runtime instance data.
10458 def ExpandNames(self):
10459 self.needed_locks = {}
10461 # Use locking if requested or when non-static information is wanted
10462 if not (self.op.static or self.op.use_locking):
10463 self.LogWarning("Non-static data requested, locks need to be acquired")
10464 self.op.use_locking = True
10466 if self.op.instances or not self.op.use_locking:
10467 # Expand instance names right here
10468 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10470 # Will use acquired locks
10471 self.wanted_names = None
10473 if self.op.use_locking:
10474 self.share_locks = _ShareAll()
10476 if self.wanted_names is None:
10477 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10479 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10481 self.needed_locks[locking.LEVEL_NODE] = []
10482 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10484 def DeclareLocks(self, level):
10485 if self.op.use_locking and level == locking.LEVEL_NODE:
10486 self._LockInstancesNodes()
10488 def CheckPrereq(self):
10489 """Check prerequisites.
10491 This only checks the optional instance list against the existing names.
10494 if self.wanted_names is None:
10495 assert self.op.use_locking, "Locking was not used"
10496 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10498 self.wanted_instances = \
10499 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10501 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10502 """Returns the status of a block device
10505 if self.op.static or not node:
10508 self.cfg.SetDiskID(dev, node)
10510 result = self.rpc.call_blockdev_find(node, dev)
10514 result.Raise("Can't compute disk status for %s" % instance_name)
10516 status = result.payload
10520 return (status.dev_path, status.major, status.minor,
10521 status.sync_percent, status.estimated_time,
10522 status.is_degraded, status.ldisk_status)
10524 def _ComputeDiskStatus(self, instance, snode, dev):
10525 """Compute block device status.
10528 if dev.dev_type in constants.LDS_DRBD:
10529 # we change the snode then (otherwise we use the one passed in)
10530 if dev.logical_id[0] == instance.primary_node:
10531 snode = dev.logical_id[1]
10533 snode = dev.logical_id[0]
10535 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10536 instance.name, dev)
10537 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10540 dev_children = map(compat.partial(self._ComputeDiskStatus,
10547 "iv_name": dev.iv_name,
10548 "dev_type": dev.dev_type,
10549 "logical_id": dev.logical_id,
10550 "physical_id": dev.physical_id,
10551 "pstatus": dev_pstatus,
10552 "sstatus": dev_sstatus,
10553 "children": dev_children,
10558 def Exec(self, feedback_fn):
10559 """Gather and return data"""
10562 cluster = self.cfg.GetClusterInfo()
10564 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10565 for i in self.wanted_instances)
10566 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10567 if self.op.static or pnode.offline:
10568 remote_state = None
10570 self.LogWarning("Primary node %s is marked offline, returning static"
10571 " information only for instance %s" %
10572 (pnode.name, instance.name))
10574 remote_info = self.rpc.call_instance_info(instance.primary_node,
10576 instance.hypervisor)
10577 remote_info.Raise("Error checking node %s" % instance.primary_node)
10578 remote_info = remote_info.payload
10579 if remote_info and "state" in remote_info:
10580 remote_state = "up"
10582 remote_state = "down"
10584 if instance.admin_up:
10585 config_state = "up"
10587 config_state = "down"
10589 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10592 result[instance.name] = {
10593 "name": instance.name,
10594 "config_state": config_state,
10595 "run_state": remote_state,
10596 "pnode": instance.primary_node,
10597 "snodes": instance.secondary_nodes,
10599 # this happens to be the same format used for hooks
10600 "nics": _NICListToTuple(self, instance.nics),
10601 "disk_template": instance.disk_template,
10603 "hypervisor": instance.hypervisor,
10604 "network_port": instance.network_port,
10605 "hv_instance": instance.hvparams,
10606 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10607 "be_instance": instance.beparams,
10608 "be_actual": cluster.FillBE(instance),
10609 "os_instance": instance.osparams,
10610 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10611 "serial_no": instance.serial_no,
10612 "mtime": instance.mtime,
10613 "ctime": instance.ctime,
10614 "uuid": instance.uuid,
10620 class LUInstanceSetParams(LogicalUnit):
10621 """Modifies an instances's parameters.
10624 HPATH = "instance-modify"
10625 HTYPE = constants.HTYPE_INSTANCE
10628 def CheckArguments(self):
10629 if not (self.op.nics or self.op.disks or self.op.disk_template or
10630 self.op.hvparams or self.op.beparams or self.op.os_name):
10631 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10633 if self.op.hvparams:
10634 _CheckGlobalHvParams(self.op.hvparams)
10638 for disk_op, disk_dict in self.op.disks:
10639 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10640 if disk_op == constants.DDM_REMOVE:
10641 disk_addremove += 1
10643 elif disk_op == constants.DDM_ADD:
10644 disk_addremove += 1
10646 if not isinstance(disk_op, int):
10647 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10648 if not isinstance(disk_dict, dict):
10649 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10650 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10652 if disk_op == constants.DDM_ADD:
10653 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10654 if mode not in constants.DISK_ACCESS_SET:
10655 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10656 errors.ECODE_INVAL)
10657 size = disk_dict.get(constants.IDISK_SIZE, None)
10659 raise errors.OpPrereqError("Required disk parameter size missing",
10660 errors.ECODE_INVAL)
10663 except (TypeError, ValueError), err:
10664 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10665 str(err), errors.ECODE_INVAL)
10666 disk_dict[constants.IDISK_SIZE] = size
10668 # modification of disk
10669 if constants.IDISK_SIZE in disk_dict:
10670 raise errors.OpPrereqError("Disk size change not possible, use"
10671 " grow-disk", errors.ECODE_INVAL)
10673 if disk_addremove > 1:
10674 raise errors.OpPrereqError("Only one disk add or remove operation"
10675 " supported at a time", errors.ECODE_INVAL)
10677 if self.op.disks and self.op.disk_template is not None:
10678 raise errors.OpPrereqError("Disk template conversion and other disk"
10679 " changes not supported at the same time",
10680 errors.ECODE_INVAL)
10682 if (self.op.disk_template and
10683 self.op.disk_template in constants.DTS_INT_MIRROR and
10684 self.op.remote_node is None):
10685 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10686 " one requires specifying a secondary node",
10687 errors.ECODE_INVAL)
10691 for nic_op, nic_dict in self.op.nics:
10692 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10693 if nic_op == constants.DDM_REMOVE:
10696 elif nic_op == constants.DDM_ADD:
10699 if not isinstance(nic_op, int):
10700 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10701 if not isinstance(nic_dict, dict):
10702 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10703 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10705 # nic_dict should be a dict
10706 nic_ip = nic_dict.get(constants.INIC_IP, None)
10707 if nic_ip is not None:
10708 if nic_ip.lower() == constants.VALUE_NONE:
10709 nic_dict[constants.INIC_IP] = None
10711 if not netutils.IPAddress.IsValid(nic_ip):
10712 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10713 errors.ECODE_INVAL)
10715 nic_bridge = nic_dict.get("bridge", None)
10716 nic_link = nic_dict.get(constants.INIC_LINK, None)
10717 if nic_bridge and nic_link:
10718 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10719 " at the same time", errors.ECODE_INVAL)
10720 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10721 nic_dict["bridge"] = None
10722 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10723 nic_dict[constants.INIC_LINK] = None
10725 if nic_op == constants.DDM_ADD:
10726 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10727 if nic_mac is None:
10728 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10730 if constants.INIC_MAC in nic_dict:
10731 nic_mac = nic_dict[constants.INIC_MAC]
10732 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10733 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10735 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10736 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10737 " modifying an existing nic",
10738 errors.ECODE_INVAL)
10740 if nic_addremove > 1:
10741 raise errors.OpPrereqError("Only one NIC add or remove operation"
10742 " supported at a time", errors.ECODE_INVAL)
10744 def ExpandNames(self):
10745 self._ExpandAndLockInstance()
10746 self.needed_locks[locking.LEVEL_NODE] = []
10747 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10749 def DeclareLocks(self, level):
10750 if level == locking.LEVEL_NODE:
10751 self._LockInstancesNodes()
10752 if self.op.disk_template and self.op.remote_node:
10753 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10754 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10756 def BuildHooksEnv(self):
10757 """Build hooks env.
10759 This runs on the master, primary and secondaries.
10763 if constants.BE_MEMORY in self.be_new:
10764 args["memory"] = self.be_new[constants.BE_MEMORY]
10765 if constants.BE_VCPUS in self.be_new:
10766 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10767 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10768 # information at all.
10771 nic_override = dict(self.op.nics)
10772 for idx, nic in enumerate(self.instance.nics):
10773 if idx in nic_override:
10774 this_nic_override = nic_override[idx]
10776 this_nic_override = {}
10777 if constants.INIC_IP in this_nic_override:
10778 ip = this_nic_override[constants.INIC_IP]
10781 if constants.INIC_MAC in this_nic_override:
10782 mac = this_nic_override[constants.INIC_MAC]
10785 if idx in self.nic_pnew:
10786 nicparams = self.nic_pnew[idx]
10788 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10789 mode = nicparams[constants.NIC_MODE]
10790 link = nicparams[constants.NIC_LINK]
10791 args["nics"].append((ip, mac, mode, link))
10792 if constants.DDM_ADD in nic_override:
10793 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10794 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10795 nicparams = self.nic_pnew[constants.DDM_ADD]
10796 mode = nicparams[constants.NIC_MODE]
10797 link = nicparams[constants.NIC_LINK]
10798 args["nics"].append((ip, mac, mode, link))
10799 elif constants.DDM_REMOVE in nic_override:
10800 del args["nics"][-1]
10802 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10803 if self.op.disk_template:
10804 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10808 def BuildHooksNodes(self):
10809 """Build hooks nodes.
10812 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10815 def CheckPrereq(self):
10816 """Check prerequisites.
10818 This only checks the instance list against the existing names.
10821 # checking the new params on the primary/secondary nodes
10823 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10824 cluster = self.cluster = self.cfg.GetClusterInfo()
10825 assert self.instance is not None, \
10826 "Cannot retrieve locked instance %s" % self.op.instance_name
10827 pnode = instance.primary_node
10828 nodelist = list(instance.all_nodes)
10831 if self.op.os_name and not self.op.force:
10832 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10833 self.op.force_variant)
10834 instance_os = self.op.os_name
10836 instance_os = instance.os
10838 if self.op.disk_template:
10839 if instance.disk_template == self.op.disk_template:
10840 raise errors.OpPrereqError("Instance already has disk template %s" %
10841 instance.disk_template, errors.ECODE_INVAL)
10843 if (instance.disk_template,
10844 self.op.disk_template) not in self._DISK_CONVERSIONS:
10845 raise errors.OpPrereqError("Unsupported disk template conversion from"
10846 " %s to %s" % (instance.disk_template,
10847 self.op.disk_template),
10848 errors.ECODE_INVAL)
10849 _CheckInstanceDown(self, instance, "cannot change disk template")
10850 if self.op.disk_template in constants.DTS_INT_MIRROR:
10851 if self.op.remote_node == pnode:
10852 raise errors.OpPrereqError("Given new secondary node %s is the same"
10853 " as the primary node of the instance" %
10854 self.op.remote_node, errors.ECODE_STATE)
10855 _CheckNodeOnline(self, self.op.remote_node)
10856 _CheckNodeNotDrained(self, self.op.remote_node)
10857 # FIXME: here we assume that the old instance type is DT_PLAIN
10858 assert instance.disk_template == constants.DT_PLAIN
10859 disks = [{constants.IDISK_SIZE: d.size,
10860 constants.IDISK_VG: d.logical_id[0]}
10861 for d in instance.disks]
10862 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10863 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10865 # hvparams processing
10866 if self.op.hvparams:
10867 hv_type = instance.hypervisor
10868 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10869 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10870 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10873 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10874 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10875 self.hv_new = hv_new # the new actual values
10876 self.hv_inst = i_hvdict # the new dict (without defaults)
10878 self.hv_new = self.hv_inst = {}
10880 # beparams processing
10881 if self.op.beparams:
10882 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10884 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10885 be_new = cluster.SimpleFillBE(i_bedict)
10886 self.be_new = be_new # the new actual values
10887 self.be_inst = i_bedict # the new dict (without defaults)
10889 self.be_new = self.be_inst = {}
10890 be_old = cluster.FillBE(instance)
10892 # osparams processing
10893 if self.op.osparams:
10894 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10895 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10896 self.os_inst = i_osdict # the new dict (without defaults)
10902 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10903 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10904 mem_check_list = [pnode]
10905 if be_new[constants.BE_AUTO_BALANCE]:
10906 # either we changed auto_balance to yes or it was from before
10907 mem_check_list.extend(instance.secondary_nodes)
10908 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10909 instance.hypervisor)
10910 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10911 instance.hypervisor)
10912 pninfo = nodeinfo[pnode]
10913 msg = pninfo.fail_msg
10915 # Assume the primary node is unreachable and go ahead
10916 self.warn.append("Can't get info from primary node %s: %s" %
10918 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10919 self.warn.append("Node data from primary node %s doesn't contain"
10920 " free memory information" % pnode)
10921 elif instance_info.fail_msg:
10922 self.warn.append("Can't get instance runtime information: %s" %
10923 instance_info.fail_msg)
10925 if instance_info.payload:
10926 current_mem = int(instance_info.payload["memory"])
10928 # Assume instance not running
10929 # (there is a slight race condition here, but it's not very probable,
10930 # and we have no other way to check)
10932 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10933 pninfo.payload["memory_free"])
10935 raise errors.OpPrereqError("This change will prevent the instance"
10936 " from starting, due to %d MB of memory"
10937 " missing on its primary node" % miss_mem,
10938 errors.ECODE_NORES)
10940 if be_new[constants.BE_AUTO_BALANCE]:
10941 for node, nres in nodeinfo.items():
10942 if node not in instance.secondary_nodes:
10944 nres.Raise("Can't get info from secondary node %s" % node,
10945 prereq=True, ecode=errors.ECODE_STATE)
10946 if not isinstance(nres.payload.get("memory_free", None), int):
10947 raise errors.OpPrereqError("Secondary node %s didn't return free"
10948 " memory information" % node,
10949 errors.ECODE_STATE)
10950 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10951 raise errors.OpPrereqError("This change will prevent the instance"
10952 " from failover to its secondary node"
10953 " %s, due to not enough memory" % node,
10954 errors.ECODE_STATE)
10958 self.nic_pinst = {}
10959 for nic_op, nic_dict in self.op.nics:
10960 if nic_op == constants.DDM_REMOVE:
10961 if not instance.nics:
10962 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10963 errors.ECODE_INVAL)
10965 if nic_op != constants.DDM_ADD:
10967 if not instance.nics:
10968 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10969 " no NICs" % nic_op,
10970 errors.ECODE_INVAL)
10971 if nic_op < 0 or nic_op >= len(instance.nics):
10972 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10974 (nic_op, len(instance.nics) - 1),
10975 errors.ECODE_INVAL)
10976 old_nic_params = instance.nics[nic_op].nicparams
10977 old_nic_ip = instance.nics[nic_op].ip
10979 old_nic_params = {}
10982 update_params_dict = dict([(key, nic_dict[key])
10983 for key in constants.NICS_PARAMETERS
10984 if key in nic_dict])
10986 if "bridge" in nic_dict:
10987 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10989 new_nic_params = _GetUpdatedParams(old_nic_params,
10990 update_params_dict)
10991 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10992 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10993 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10994 self.nic_pinst[nic_op] = new_nic_params
10995 self.nic_pnew[nic_op] = new_filled_nic_params
10996 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10998 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10999 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11000 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11002 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11004 self.warn.append(msg)
11006 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11007 if new_nic_mode == constants.NIC_MODE_ROUTED:
11008 if constants.INIC_IP in nic_dict:
11009 nic_ip = nic_dict[constants.INIC_IP]
11011 nic_ip = old_nic_ip
11013 raise errors.OpPrereqError("Cannot set the nic ip to None"
11014 " on a routed nic", errors.ECODE_INVAL)
11015 if constants.INIC_MAC in nic_dict:
11016 nic_mac = nic_dict[constants.INIC_MAC]
11017 if nic_mac is None:
11018 raise errors.OpPrereqError("Cannot set the nic mac to None",
11019 errors.ECODE_INVAL)
11020 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11021 # otherwise generate the mac
11022 nic_dict[constants.INIC_MAC] = \
11023 self.cfg.GenerateMAC(self.proc.GetECId())
11025 # or validate/reserve the current one
11027 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11028 except errors.ReservationError:
11029 raise errors.OpPrereqError("MAC address %s already in use"
11030 " in cluster" % nic_mac,
11031 errors.ECODE_NOTUNIQUE)
11034 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11035 raise errors.OpPrereqError("Disk operations not supported for"
11036 " diskless instances",
11037 errors.ECODE_INVAL)
11038 for disk_op, _ in self.op.disks:
11039 if disk_op == constants.DDM_REMOVE:
11040 if len(instance.disks) == 1:
11041 raise errors.OpPrereqError("Cannot remove the last disk of"
11042 " an instance", errors.ECODE_INVAL)
11043 _CheckInstanceDown(self, instance, "cannot remove disks")
11045 if (disk_op == constants.DDM_ADD and
11046 len(instance.disks) >= constants.MAX_DISKS):
11047 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11048 " add more" % constants.MAX_DISKS,
11049 errors.ECODE_STATE)
11050 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11052 if disk_op < 0 or disk_op >= len(instance.disks):
11053 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11055 (disk_op, len(instance.disks)),
11056 errors.ECODE_INVAL)
11060 def _ConvertPlainToDrbd(self, feedback_fn):
11061 """Converts an instance from plain to drbd.
11064 feedback_fn("Converting template to drbd")
11065 instance = self.instance
11066 pnode = instance.primary_node
11067 snode = self.op.remote_node
11069 # create a fake disk info for _GenerateDiskTemplate
11070 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11071 constants.IDISK_VG: d.logical_id[0]}
11072 for d in instance.disks]
11073 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11074 instance.name, pnode, [snode],
11075 disk_info, None, None, 0, feedback_fn)
11076 info = _GetInstanceInfoText(instance)
11077 feedback_fn("Creating aditional volumes...")
11078 # first, create the missing data and meta devices
11079 for disk in new_disks:
11080 # unfortunately this is... not too nice
11081 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11083 for child in disk.children:
11084 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11085 # at this stage, all new LVs have been created, we can rename the
11087 feedback_fn("Renaming original volumes...")
11088 rename_list = [(o, n.children[0].logical_id)
11089 for (o, n) in zip(instance.disks, new_disks)]
11090 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11091 result.Raise("Failed to rename original LVs")
11093 feedback_fn("Initializing DRBD devices...")
11094 # all child devices are in place, we can now create the DRBD devices
11095 for disk in new_disks:
11096 for node in [pnode, snode]:
11097 f_create = node == pnode
11098 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11100 # at this point, the instance has been modified
11101 instance.disk_template = constants.DT_DRBD8
11102 instance.disks = new_disks
11103 self.cfg.Update(instance, feedback_fn)
11105 # disks are created, waiting for sync
11106 disk_abort = not _WaitForSync(self, instance,
11107 oneshot=not self.op.wait_for_sync)
11109 raise errors.OpExecError("There are some degraded disks for"
11110 " this instance, please cleanup manually")
11112 def _ConvertDrbdToPlain(self, feedback_fn):
11113 """Converts an instance from drbd to plain.
11116 instance = self.instance
11117 assert len(instance.secondary_nodes) == 1
11118 pnode = instance.primary_node
11119 snode = instance.secondary_nodes[0]
11120 feedback_fn("Converting template to plain")
11122 old_disks = instance.disks
11123 new_disks = [d.children[0] for d in old_disks]
11125 # copy over size and mode
11126 for parent, child in zip(old_disks, new_disks):
11127 child.size = parent.size
11128 child.mode = parent.mode
11130 # update instance structure
11131 instance.disks = new_disks
11132 instance.disk_template = constants.DT_PLAIN
11133 self.cfg.Update(instance, feedback_fn)
11135 feedback_fn("Removing volumes on the secondary node...")
11136 for disk in old_disks:
11137 self.cfg.SetDiskID(disk, snode)
11138 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11140 self.LogWarning("Could not remove block device %s on node %s,"
11141 " continuing anyway: %s", disk.iv_name, snode, msg)
11143 feedback_fn("Removing unneeded volumes on the primary node...")
11144 for idx, disk in enumerate(old_disks):
11145 meta = disk.children[1]
11146 self.cfg.SetDiskID(meta, pnode)
11147 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11149 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11150 " continuing anyway: %s", idx, pnode, msg)
11152 # this is a DRBD disk, return its port to the pool
11153 for disk in old_disks:
11154 tcp_port = disk.logical_id[2]
11155 self.cfg.AddTcpUdpPort(tcp_port)
11157 def Exec(self, feedback_fn):
11158 """Modifies an instance.
11160 All parameters take effect only at the next restart of the instance.
11163 # Process here the warnings from CheckPrereq, as we don't have a
11164 # feedback_fn there.
11165 for warn in self.warn:
11166 feedback_fn("WARNING: %s" % warn)
11169 instance = self.instance
11171 for disk_op, disk_dict in self.op.disks:
11172 if disk_op == constants.DDM_REMOVE:
11173 # remove the last disk
11174 device = instance.disks.pop()
11175 device_idx = len(instance.disks)
11176 for node, disk in device.ComputeNodeTree(instance.primary_node):
11177 self.cfg.SetDiskID(disk, node)
11178 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11180 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11181 " continuing anyway", device_idx, node, msg)
11182 result.append(("disk/%d" % device_idx, "remove"))
11184 # if this is a DRBD disk, return its port to the pool
11185 if device.dev_type in constants.LDS_DRBD:
11186 tcp_port = device.logical_id[2]
11187 self.cfg.AddTcpUdpPort(tcp_port)
11188 elif disk_op == constants.DDM_ADD:
11190 if instance.disk_template in (constants.DT_FILE,
11191 constants.DT_SHARED_FILE):
11192 file_driver, file_path = instance.disks[0].logical_id
11193 file_path = os.path.dirname(file_path)
11195 file_driver = file_path = None
11196 disk_idx_base = len(instance.disks)
11197 new_disk = _GenerateDiskTemplate(self,
11198 instance.disk_template,
11199 instance.name, instance.primary_node,
11200 instance.secondary_nodes,
11204 disk_idx_base, feedback_fn)[0]
11205 instance.disks.append(new_disk)
11206 info = _GetInstanceInfoText(instance)
11208 logging.info("Creating volume %s for instance %s",
11209 new_disk.iv_name, instance.name)
11210 # Note: this needs to be kept in sync with _CreateDisks
11212 for node in instance.all_nodes:
11213 f_create = node == instance.primary_node
11215 _CreateBlockDev(self, node, instance, new_disk,
11216 f_create, info, f_create)
11217 except errors.OpExecError, err:
11218 self.LogWarning("Failed to create volume %s (%s) on"
11220 new_disk.iv_name, new_disk, node, err)
11221 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11222 (new_disk.size, new_disk.mode)))
11224 # change a given disk
11225 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11226 result.append(("disk.mode/%d" % disk_op,
11227 disk_dict[constants.IDISK_MODE]))
11229 if self.op.disk_template:
11230 r_shut = _ShutdownInstanceDisks(self, instance)
11232 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11233 " proceed with disk template conversion")
11234 mode = (instance.disk_template, self.op.disk_template)
11236 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11238 self.cfg.ReleaseDRBDMinors(instance.name)
11240 result.append(("disk_template", self.op.disk_template))
11243 for nic_op, nic_dict in self.op.nics:
11244 if nic_op == constants.DDM_REMOVE:
11245 # remove the last nic
11246 del instance.nics[-1]
11247 result.append(("nic.%d" % len(instance.nics), "remove"))
11248 elif nic_op == constants.DDM_ADD:
11249 # mac and bridge should be set, by now
11250 mac = nic_dict[constants.INIC_MAC]
11251 ip = nic_dict.get(constants.INIC_IP, None)
11252 nicparams = self.nic_pinst[constants.DDM_ADD]
11253 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11254 instance.nics.append(new_nic)
11255 result.append(("nic.%d" % (len(instance.nics) - 1),
11256 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11257 (new_nic.mac, new_nic.ip,
11258 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11259 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11262 for key in (constants.INIC_MAC, constants.INIC_IP):
11263 if key in nic_dict:
11264 setattr(instance.nics[nic_op], key, nic_dict[key])
11265 if nic_op in self.nic_pinst:
11266 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11267 for key, val in nic_dict.iteritems():
11268 result.append(("nic.%s/%d" % (key, nic_op), val))
11271 if self.op.hvparams:
11272 instance.hvparams = self.hv_inst
11273 for key, val in self.op.hvparams.iteritems():
11274 result.append(("hv/%s" % key, val))
11277 if self.op.beparams:
11278 instance.beparams = self.be_inst
11279 for key, val in self.op.beparams.iteritems():
11280 result.append(("be/%s" % key, val))
11283 if self.op.os_name:
11284 instance.os = self.op.os_name
11287 if self.op.osparams:
11288 instance.osparams = self.os_inst
11289 for key, val in self.op.osparams.iteritems():
11290 result.append(("os/%s" % key, val))
11292 self.cfg.Update(instance, feedback_fn)
11296 _DISK_CONVERSIONS = {
11297 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11298 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11302 class LUInstanceChangeGroup(LogicalUnit):
11303 HPATH = "instance-change-group"
11304 HTYPE = constants.HTYPE_INSTANCE
11307 def ExpandNames(self):
11308 self.share_locks = _ShareAll()
11309 self.needed_locks = {
11310 locking.LEVEL_NODEGROUP: [],
11311 locking.LEVEL_NODE: [],
11314 self._ExpandAndLockInstance()
11316 if self.op.target_groups:
11317 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11318 self.op.target_groups)
11320 self.req_target_uuids = None
11322 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11324 def DeclareLocks(self, level):
11325 if level == locking.LEVEL_NODEGROUP:
11326 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11328 if self.req_target_uuids:
11329 lock_groups = set(self.req_target_uuids)
11331 # Lock all groups used by instance optimistically; this requires going
11332 # via the node before it's locked, requiring verification later on
11333 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11334 lock_groups.update(instance_groups)
11336 # No target groups, need to lock all of them
11337 lock_groups = locking.ALL_SET
11339 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11341 elif level == locking.LEVEL_NODE:
11342 if self.req_target_uuids:
11343 # Lock all nodes used by instances
11344 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11345 self._LockInstancesNodes()
11347 # Lock all nodes in all potential target groups
11348 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11349 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11350 member_nodes = [node_name
11351 for group in lock_groups
11352 for node_name in self.cfg.GetNodeGroup(group).members]
11353 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11355 # Lock all nodes as all groups are potential targets
11356 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11358 def CheckPrereq(self):
11359 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11360 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11361 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11363 assert (self.req_target_uuids is None or
11364 owned_groups.issuperset(self.req_target_uuids))
11365 assert owned_instances == set([self.op.instance_name])
11367 # Get instance information
11368 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11370 # Check if node groups for locked instance are still correct
11371 assert owned_nodes.issuperset(self.instance.all_nodes), \
11372 ("Instance %s's nodes changed while we kept the lock" %
11373 self.op.instance_name)
11375 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11378 if self.req_target_uuids:
11379 # User requested specific target groups
11380 self.target_uuids = self.req_target_uuids
11382 # All groups except those used by the instance are potential targets
11383 self.target_uuids = owned_groups - inst_groups
11385 conflicting_groups = self.target_uuids & inst_groups
11386 if conflicting_groups:
11387 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11388 " used by the instance '%s'" %
11389 (utils.CommaJoin(conflicting_groups),
11390 self.op.instance_name),
11391 errors.ECODE_INVAL)
11393 if not self.target_uuids:
11394 raise errors.OpPrereqError("There are no possible target groups",
11395 errors.ECODE_INVAL)
11397 def BuildHooksEnv(self):
11398 """Build hooks env.
11401 assert self.target_uuids
11404 "TARGET_GROUPS": " ".join(self.target_uuids),
11407 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11411 def BuildHooksNodes(self):
11412 """Build hooks nodes.
11415 mn = self.cfg.GetMasterNode()
11416 return ([mn], [mn])
11418 def Exec(self, feedback_fn):
11419 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11421 assert instances == [self.op.instance_name], "Instance not locked"
11423 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11424 instances=instances, target_groups=list(self.target_uuids))
11426 ial.Run(self.op.iallocator)
11428 if not ial.success:
11429 raise errors.OpPrereqError("Can't compute solution for changing group of"
11430 " instance '%s' using iallocator '%s': %s" %
11431 (self.op.instance_name, self.op.iallocator,
11433 errors.ECODE_NORES)
11435 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11437 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11438 " instance '%s'", len(jobs), self.op.instance_name)
11440 return ResultWithJobs(jobs)
11443 class LUBackupQuery(NoHooksLU):
11444 """Query the exports list
11449 def ExpandNames(self):
11450 self.needed_locks = {}
11451 self.share_locks[locking.LEVEL_NODE] = 1
11452 if not self.op.nodes:
11453 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11455 self.needed_locks[locking.LEVEL_NODE] = \
11456 _GetWantedNodes(self, self.op.nodes)
11458 def Exec(self, feedback_fn):
11459 """Compute the list of all the exported system images.
11462 @return: a dictionary with the structure node->(export-list)
11463 where export-list is a list of the instances exported on
11467 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11468 rpcresult = self.rpc.call_export_list(self.nodes)
11470 for node in rpcresult:
11471 if rpcresult[node].fail_msg:
11472 result[node] = False
11474 result[node] = rpcresult[node].payload
11479 class LUBackupPrepare(NoHooksLU):
11480 """Prepares an instance for an export and returns useful information.
11485 def ExpandNames(self):
11486 self._ExpandAndLockInstance()
11488 def CheckPrereq(self):
11489 """Check prerequisites.
11492 instance_name = self.op.instance_name
11494 self.instance = self.cfg.GetInstanceInfo(instance_name)
11495 assert self.instance is not None, \
11496 "Cannot retrieve locked instance %s" % self.op.instance_name
11497 _CheckNodeOnline(self, self.instance.primary_node)
11499 self._cds = _GetClusterDomainSecret()
11501 def Exec(self, feedback_fn):
11502 """Prepares an instance for an export.
11505 instance = self.instance
11507 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11508 salt = utils.GenerateSecret(8)
11510 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11511 result = self.rpc.call_x509_cert_create(instance.primary_node,
11512 constants.RIE_CERT_VALIDITY)
11513 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11515 (name, cert_pem) = result.payload
11517 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11521 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11522 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11524 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11530 class LUBackupExport(LogicalUnit):
11531 """Export an instance to an image in the cluster.
11534 HPATH = "instance-export"
11535 HTYPE = constants.HTYPE_INSTANCE
11538 def CheckArguments(self):
11539 """Check the arguments.
11542 self.x509_key_name = self.op.x509_key_name
11543 self.dest_x509_ca_pem = self.op.destination_x509_ca
11545 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11546 if not self.x509_key_name:
11547 raise errors.OpPrereqError("Missing X509 key name for encryption",
11548 errors.ECODE_INVAL)
11550 if not self.dest_x509_ca_pem:
11551 raise errors.OpPrereqError("Missing destination X509 CA",
11552 errors.ECODE_INVAL)
11554 def ExpandNames(self):
11555 self._ExpandAndLockInstance()
11557 # Lock all nodes for local exports
11558 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11559 # FIXME: lock only instance primary and destination node
11561 # Sad but true, for now we have do lock all nodes, as we don't know where
11562 # the previous export might be, and in this LU we search for it and
11563 # remove it from its current node. In the future we could fix this by:
11564 # - making a tasklet to search (share-lock all), then create the
11565 # new one, then one to remove, after
11566 # - removing the removal operation altogether
11567 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11569 def DeclareLocks(self, level):
11570 """Last minute lock declaration."""
11571 # All nodes are locked anyway, so nothing to do here.
11573 def BuildHooksEnv(self):
11574 """Build hooks env.
11576 This will run on the master, primary node and target node.
11580 "EXPORT_MODE": self.op.mode,
11581 "EXPORT_NODE": self.op.target_node,
11582 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11583 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11584 # TODO: Generic function for boolean env variables
11585 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11588 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11592 def BuildHooksNodes(self):
11593 """Build hooks nodes.
11596 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11598 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11599 nl.append(self.op.target_node)
11603 def CheckPrereq(self):
11604 """Check prerequisites.
11606 This checks that the instance and node names are valid.
11609 instance_name = self.op.instance_name
11611 self.instance = self.cfg.GetInstanceInfo(instance_name)
11612 assert self.instance is not None, \
11613 "Cannot retrieve locked instance %s" % self.op.instance_name
11614 _CheckNodeOnline(self, self.instance.primary_node)
11616 if (self.op.remove_instance and self.instance.admin_up and
11617 not self.op.shutdown):
11618 raise errors.OpPrereqError("Can not remove instance without shutting it"
11621 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11622 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11623 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11624 assert self.dst_node is not None
11626 _CheckNodeOnline(self, self.dst_node.name)
11627 _CheckNodeNotDrained(self, self.dst_node.name)
11630 self.dest_disk_info = None
11631 self.dest_x509_ca = None
11633 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11634 self.dst_node = None
11636 if len(self.op.target_node) != len(self.instance.disks):
11637 raise errors.OpPrereqError(("Received destination information for %s"
11638 " disks, but instance %s has %s disks") %
11639 (len(self.op.target_node), instance_name,
11640 len(self.instance.disks)),
11641 errors.ECODE_INVAL)
11643 cds = _GetClusterDomainSecret()
11645 # Check X509 key name
11647 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11648 except (TypeError, ValueError), err:
11649 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11651 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11652 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11653 errors.ECODE_INVAL)
11655 # Load and verify CA
11657 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11658 except OpenSSL.crypto.Error, err:
11659 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11660 (err, ), errors.ECODE_INVAL)
11662 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11663 if errcode is not None:
11664 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11665 (msg, ), errors.ECODE_INVAL)
11667 self.dest_x509_ca = cert
11669 # Verify target information
11671 for idx, disk_data in enumerate(self.op.target_node):
11673 (host, port, magic) = \
11674 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11675 except errors.GenericError, err:
11676 raise errors.OpPrereqError("Target info for disk %s: %s" %
11677 (idx, err), errors.ECODE_INVAL)
11679 disk_info.append((host, port, magic))
11681 assert len(disk_info) == len(self.op.target_node)
11682 self.dest_disk_info = disk_info
11685 raise errors.ProgrammerError("Unhandled export mode %r" %
11688 # instance disk type verification
11689 # TODO: Implement export support for file-based disks
11690 for disk in self.instance.disks:
11691 if disk.dev_type == constants.LD_FILE:
11692 raise errors.OpPrereqError("Export not supported for instances with"
11693 " file-based disks", errors.ECODE_INVAL)
11695 def _CleanupExports(self, feedback_fn):
11696 """Removes exports of current instance from all other nodes.
11698 If an instance in a cluster with nodes A..D was exported to node C, its
11699 exports will be removed from the nodes A, B and D.
11702 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11704 nodelist = self.cfg.GetNodeList()
11705 nodelist.remove(self.dst_node.name)
11707 # on one-node clusters nodelist will be empty after the removal
11708 # if we proceed the backup would be removed because OpBackupQuery
11709 # substitutes an empty list with the full cluster node list.
11710 iname = self.instance.name
11712 feedback_fn("Removing old exports for instance %s" % iname)
11713 exportlist = self.rpc.call_export_list(nodelist)
11714 for node in exportlist:
11715 if exportlist[node].fail_msg:
11717 if iname in exportlist[node].payload:
11718 msg = self.rpc.call_export_remove(node, iname).fail_msg
11720 self.LogWarning("Could not remove older export for instance %s"
11721 " on node %s: %s", iname, node, msg)
11723 def Exec(self, feedback_fn):
11724 """Export an instance to an image in the cluster.
11727 assert self.op.mode in constants.EXPORT_MODES
11729 instance = self.instance
11730 src_node = instance.primary_node
11732 if self.op.shutdown:
11733 # shutdown the instance, but not the disks
11734 feedback_fn("Shutting down instance %s" % instance.name)
11735 result = self.rpc.call_instance_shutdown(src_node, instance,
11736 self.op.shutdown_timeout)
11737 # TODO: Maybe ignore failures if ignore_remove_failures is set
11738 result.Raise("Could not shutdown instance %s on"
11739 " node %s" % (instance.name, src_node))
11741 # set the disks ID correctly since call_instance_start needs the
11742 # correct drbd minor to create the symlinks
11743 for disk in instance.disks:
11744 self.cfg.SetDiskID(disk, src_node)
11746 activate_disks = (not instance.admin_up)
11749 # Activate the instance disks if we'exporting a stopped instance
11750 feedback_fn("Activating disks for %s" % instance.name)
11751 _StartInstanceDisks(self, instance, None)
11754 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11757 helper.CreateSnapshots()
11759 if (self.op.shutdown and instance.admin_up and
11760 not self.op.remove_instance):
11761 assert not activate_disks
11762 feedback_fn("Starting instance %s" % instance.name)
11763 result = self.rpc.call_instance_start(src_node, instance,
11765 msg = result.fail_msg
11767 feedback_fn("Failed to start instance: %s" % msg)
11768 _ShutdownInstanceDisks(self, instance)
11769 raise errors.OpExecError("Could not start instance: %s" % msg)
11771 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11772 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11773 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11774 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11775 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11777 (key_name, _, _) = self.x509_key_name
11780 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11783 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11784 key_name, dest_ca_pem,
11789 # Check for backwards compatibility
11790 assert len(dresults) == len(instance.disks)
11791 assert compat.all(isinstance(i, bool) for i in dresults), \
11792 "Not all results are boolean: %r" % dresults
11796 feedback_fn("Deactivating disks for %s" % instance.name)
11797 _ShutdownInstanceDisks(self, instance)
11799 if not (compat.all(dresults) and fin_resu):
11802 failures.append("export finalization")
11803 if not compat.all(dresults):
11804 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11806 failures.append("disk export: disk(s) %s" % fdsk)
11808 raise errors.OpExecError("Export failed, errors in %s" %
11809 utils.CommaJoin(failures))
11811 # At this point, the export was successful, we can cleanup/finish
11813 # Remove instance if requested
11814 if self.op.remove_instance:
11815 feedback_fn("Removing instance %s" % instance.name)
11816 _RemoveInstance(self, feedback_fn, instance,
11817 self.op.ignore_remove_failures)
11819 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11820 self._CleanupExports(feedback_fn)
11822 return fin_resu, dresults
11825 class LUBackupRemove(NoHooksLU):
11826 """Remove exports related to the named instance.
11831 def ExpandNames(self):
11832 self.needed_locks = {}
11833 # We need all nodes to be locked in order for RemoveExport to work, but we
11834 # don't need to lock the instance itself, as nothing will happen to it (and
11835 # we can remove exports also for a removed instance)
11836 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11838 def Exec(self, feedback_fn):
11839 """Remove any export.
11842 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11843 # If the instance was not found we'll try with the name that was passed in.
11844 # This will only work if it was an FQDN, though.
11846 if not instance_name:
11848 instance_name = self.op.instance_name
11850 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11851 exportlist = self.rpc.call_export_list(locked_nodes)
11853 for node in exportlist:
11854 msg = exportlist[node].fail_msg
11856 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11858 if instance_name in exportlist[node].payload:
11860 result = self.rpc.call_export_remove(node, instance_name)
11861 msg = result.fail_msg
11863 logging.error("Could not remove export for instance %s"
11864 " on node %s: %s", instance_name, node, msg)
11866 if fqdn_warn and not found:
11867 feedback_fn("Export not found. If trying to remove an export belonging"
11868 " to a deleted instance please use its Fully Qualified"
11872 class LUGroupAdd(LogicalUnit):
11873 """Logical unit for creating node groups.
11876 HPATH = "group-add"
11877 HTYPE = constants.HTYPE_GROUP
11880 def ExpandNames(self):
11881 # We need the new group's UUID here so that we can create and acquire the
11882 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11883 # that it should not check whether the UUID exists in the configuration.
11884 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11885 self.needed_locks = {}
11886 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11888 def CheckPrereq(self):
11889 """Check prerequisites.
11891 This checks that the given group name is not an existing node group
11896 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11897 except errors.OpPrereqError:
11900 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11901 " node group (UUID: %s)" %
11902 (self.op.group_name, existing_uuid),
11903 errors.ECODE_EXISTS)
11905 if self.op.ndparams:
11906 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11908 def BuildHooksEnv(self):
11909 """Build hooks env.
11913 "GROUP_NAME": self.op.group_name,
11916 def BuildHooksNodes(self):
11917 """Build hooks nodes.
11920 mn = self.cfg.GetMasterNode()
11921 return ([mn], [mn])
11923 def Exec(self, feedback_fn):
11924 """Add the node group to the cluster.
11927 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11928 uuid=self.group_uuid,
11929 alloc_policy=self.op.alloc_policy,
11930 ndparams=self.op.ndparams)
11932 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11933 del self.remove_locks[locking.LEVEL_NODEGROUP]
11936 class LUGroupAssignNodes(NoHooksLU):
11937 """Logical unit for assigning nodes to groups.
11942 def ExpandNames(self):
11943 # These raise errors.OpPrereqError on their own:
11944 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11945 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11947 # We want to lock all the affected nodes and groups. We have readily
11948 # available the list of nodes, and the *destination* group. To gather the
11949 # list of "source" groups, we need to fetch node information later on.
11950 self.needed_locks = {
11951 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11952 locking.LEVEL_NODE: self.op.nodes,
11955 def DeclareLocks(self, level):
11956 if level == locking.LEVEL_NODEGROUP:
11957 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11959 # Try to get all affected nodes' groups without having the group or node
11960 # lock yet. Needs verification later in the code flow.
11961 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11963 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11965 def CheckPrereq(self):
11966 """Check prerequisites.
11969 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11970 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11971 frozenset(self.op.nodes))
11973 expected_locks = (set([self.group_uuid]) |
11974 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11975 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11976 if actual_locks != expected_locks:
11977 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11978 " current groups are '%s', used to be '%s'" %
11979 (utils.CommaJoin(expected_locks),
11980 utils.CommaJoin(actual_locks)))
11982 self.node_data = self.cfg.GetAllNodesInfo()
11983 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11984 instance_data = self.cfg.GetAllInstancesInfo()
11986 if self.group is None:
11987 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11988 (self.op.group_name, self.group_uuid))
11990 (new_splits, previous_splits) = \
11991 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11992 for node in self.op.nodes],
11993 self.node_data, instance_data)
11996 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11998 if not self.op.force:
11999 raise errors.OpExecError("The following instances get split by this"
12000 " change and --force was not given: %s" %
12003 self.LogWarning("This operation will split the following instances: %s",
12006 if previous_splits:
12007 self.LogWarning("In addition, these already-split instances continue"
12008 " to be split across groups: %s",
12009 utils.CommaJoin(utils.NiceSort(previous_splits)))
12011 def Exec(self, feedback_fn):
12012 """Assign nodes to a new group.
12015 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12017 self.cfg.AssignGroupNodes(mods)
12020 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12021 """Check for split instances after a node assignment.
12023 This method considers a series of node assignments as an atomic operation,
12024 and returns information about split instances after applying the set of
12027 In particular, it returns information about newly split instances, and
12028 instances that were already split, and remain so after the change.
12030 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12033 @type changes: list of (node_name, new_group_uuid) pairs.
12034 @param changes: list of node assignments to consider.
12035 @param node_data: a dict with data for all nodes
12036 @param instance_data: a dict with all instances to consider
12037 @rtype: a two-tuple
12038 @return: a list of instances that were previously okay and result split as a
12039 consequence of this change, and a list of instances that were previously
12040 split and this change does not fix.
12043 changed_nodes = dict((node, group) for node, group in changes
12044 if node_data[node].group != group)
12046 all_split_instances = set()
12047 previously_split_instances = set()
12049 def InstanceNodes(instance):
12050 return [instance.primary_node] + list(instance.secondary_nodes)
12052 for inst in instance_data.values():
12053 if inst.disk_template not in constants.DTS_INT_MIRROR:
12056 instance_nodes = InstanceNodes(inst)
12058 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12059 previously_split_instances.add(inst.name)
12061 if len(set(changed_nodes.get(node, node_data[node].group)
12062 for node in instance_nodes)) > 1:
12063 all_split_instances.add(inst.name)
12065 return (list(all_split_instances - previously_split_instances),
12066 list(previously_split_instances & all_split_instances))
12069 class _GroupQuery(_QueryBase):
12070 FIELDS = query.GROUP_FIELDS
12072 def ExpandNames(self, lu):
12073 lu.needed_locks = {}
12075 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12076 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12079 self.wanted = [name_to_uuid[name]
12080 for name in utils.NiceSort(name_to_uuid.keys())]
12082 # Accept names to be either names or UUIDs.
12085 all_uuid = frozenset(self._all_groups.keys())
12087 for name in self.names:
12088 if name in all_uuid:
12089 self.wanted.append(name)
12090 elif name in name_to_uuid:
12091 self.wanted.append(name_to_uuid[name])
12093 missing.append(name)
12096 raise errors.OpPrereqError("Some groups do not exist: %s" %
12097 utils.CommaJoin(missing),
12098 errors.ECODE_NOENT)
12100 def DeclareLocks(self, lu, level):
12103 def _GetQueryData(self, lu):
12104 """Computes the list of node groups and their attributes.
12107 do_nodes = query.GQ_NODE in self.requested_data
12108 do_instances = query.GQ_INST in self.requested_data
12110 group_to_nodes = None
12111 group_to_instances = None
12113 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12114 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12115 # latter GetAllInstancesInfo() is not enough, for we have to go through
12116 # instance->node. Hence, we will need to process nodes even if we only need
12117 # instance information.
12118 if do_nodes or do_instances:
12119 all_nodes = lu.cfg.GetAllNodesInfo()
12120 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12123 for node in all_nodes.values():
12124 if node.group in group_to_nodes:
12125 group_to_nodes[node.group].append(node.name)
12126 node_to_group[node.name] = node.group
12129 all_instances = lu.cfg.GetAllInstancesInfo()
12130 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12132 for instance in all_instances.values():
12133 node = instance.primary_node
12134 if node in node_to_group:
12135 group_to_instances[node_to_group[node]].append(instance.name)
12138 # Do not pass on node information if it was not requested.
12139 group_to_nodes = None
12141 return query.GroupQueryData([self._all_groups[uuid]
12142 for uuid in self.wanted],
12143 group_to_nodes, group_to_instances)
12146 class LUGroupQuery(NoHooksLU):
12147 """Logical unit for querying node groups.
12152 def CheckArguments(self):
12153 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12154 self.op.output_fields, False)
12156 def ExpandNames(self):
12157 self.gq.ExpandNames(self)
12159 def DeclareLocks(self, level):
12160 self.gq.DeclareLocks(self, level)
12162 def Exec(self, feedback_fn):
12163 return self.gq.OldStyleQuery(self)
12166 class LUGroupSetParams(LogicalUnit):
12167 """Modifies the parameters of a node group.
12170 HPATH = "group-modify"
12171 HTYPE = constants.HTYPE_GROUP
12174 def CheckArguments(self):
12177 self.op.alloc_policy,
12180 if all_changes.count(None) == len(all_changes):
12181 raise errors.OpPrereqError("Please pass at least one modification",
12182 errors.ECODE_INVAL)
12184 def ExpandNames(self):
12185 # This raises errors.OpPrereqError on its own:
12186 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12188 self.needed_locks = {
12189 locking.LEVEL_NODEGROUP: [self.group_uuid],
12192 def CheckPrereq(self):
12193 """Check prerequisites.
12196 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12198 if self.group is None:
12199 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12200 (self.op.group_name, self.group_uuid))
12202 if self.op.ndparams:
12203 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12204 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12205 self.new_ndparams = new_ndparams
12207 def BuildHooksEnv(self):
12208 """Build hooks env.
12212 "GROUP_NAME": self.op.group_name,
12213 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12216 def BuildHooksNodes(self):
12217 """Build hooks nodes.
12220 mn = self.cfg.GetMasterNode()
12221 return ([mn], [mn])
12223 def Exec(self, feedback_fn):
12224 """Modifies the node group.
12229 if self.op.ndparams:
12230 self.group.ndparams = self.new_ndparams
12231 result.append(("ndparams", str(self.group.ndparams)))
12233 if self.op.alloc_policy:
12234 self.group.alloc_policy = self.op.alloc_policy
12236 self.cfg.Update(self.group, feedback_fn)
12240 class LUGroupRemove(LogicalUnit):
12241 HPATH = "group-remove"
12242 HTYPE = constants.HTYPE_GROUP
12245 def ExpandNames(self):
12246 # This will raises errors.OpPrereqError on its own:
12247 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12248 self.needed_locks = {
12249 locking.LEVEL_NODEGROUP: [self.group_uuid],
12252 def CheckPrereq(self):
12253 """Check prerequisites.
12255 This checks that the given group name exists as a node group, that is
12256 empty (i.e., contains no nodes), and that is not the last group of the
12260 # Verify that the group is empty.
12261 group_nodes = [node.name
12262 for node in self.cfg.GetAllNodesInfo().values()
12263 if node.group == self.group_uuid]
12266 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12268 (self.op.group_name,
12269 utils.CommaJoin(utils.NiceSort(group_nodes))),
12270 errors.ECODE_STATE)
12272 # Verify the cluster would not be left group-less.
12273 if len(self.cfg.GetNodeGroupList()) == 1:
12274 raise errors.OpPrereqError("Group '%s' is the only group,"
12275 " cannot be removed" %
12276 self.op.group_name,
12277 errors.ECODE_STATE)
12279 def BuildHooksEnv(self):
12280 """Build hooks env.
12284 "GROUP_NAME": self.op.group_name,
12287 def BuildHooksNodes(self):
12288 """Build hooks nodes.
12291 mn = self.cfg.GetMasterNode()
12292 return ([mn], [mn])
12294 def Exec(self, feedback_fn):
12295 """Remove the node group.
12299 self.cfg.RemoveNodeGroup(self.group_uuid)
12300 except errors.ConfigurationError:
12301 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12302 (self.op.group_name, self.group_uuid))
12304 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12307 class LUGroupRename(LogicalUnit):
12308 HPATH = "group-rename"
12309 HTYPE = constants.HTYPE_GROUP
12312 def ExpandNames(self):
12313 # This raises errors.OpPrereqError on its own:
12314 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12316 self.needed_locks = {
12317 locking.LEVEL_NODEGROUP: [self.group_uuid],
12320 def CheckPrereq(self):
12321 """Check prerequisites.
12323 Ensures requested new name is not yet used.
12327 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12328 except errors.OpPrereqError:
12331 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12332 " node group (UUID: %s)" %
12333 (self.op.new_name, new_name_uuid),
12334 errors.ECODE_EXISTS)
12336 def BuildHooksEnv(self):
12337 """Build hooks env.
12341 "OLD_NAME": self.op.group_name,
12342 "NEW_NAME": self.op.new_name,
12345 def BuildHooksNodes(self):
12346 """Build hooks nodes.
12349 mn = self.cfg.GetMasterNode()
12351 all_nodes = self.cfg.GetAllNodesInfo()
12352 all_nodes.pop(mn, None)
12355 run_nodes.extend(node.name for node in all_nodes.values()
12356 if node.group == self.group_uuid)
12358 return (run_nodes, run_nodes)
12360 def Exec(self, feedback_fn):
12361 """Rename the node group.
12364 group = self.cfg.GetNodeGroup(self.group_uuid)
12367 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12368 (self.op.group_name, self.group_uuid))
12370 group.name = self.op.new_name
12371 self.cfg.Update(group, feedback_fn)
12373 return self.op.new_name
12376 class LUGroupEvacuate(LogicalUnit):
12377 HPATH = "group-evacuate"
12378 HTYPE = constants.HTYPE_GROUP
12381 def ExpandNames(self):
12382 # This raises errors.OpPrereqError on its own:
12383 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12385 if self.op.target_groups:
12386 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12387 self.op.target_groups)
12389 self.req_target_uuids = []
12391 if self.group_uuid in self.req_target_uuids:
12392 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12393 " as a target group (targets are %s)" %
12395 utils.CommaJoin(self.req_target_uuids)),
12396 errors.ECODE_INVAL)
12398 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12400 self.share_locks = _ShareAll()
12401 self.needed_locks = {
12402 locking.LEVEL_INSTANCE: [],
12403 locking.LEVEL_NODEGROUP: [],
12404 locking.LEVEL_NODE: [],
12407 def DeclareLocks(self, level):
12408 if level == locking.LEVEL_INSTANCE:
12409 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12411 # Lock instances optimistically, needs verification once node and group
12412 # locks have been acquired
12413 self.needed_locks[locking.LEVEL_INSTANCE] = \
12414 self.cfg.GetNodeGroupInstances(self.group_uuid)
12416 elif level == locking.LEVEL_NODEGROUP:
12417 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12419 if self.req_target_uuids:
12420 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12422 # Lock all groups used by instances optimistically; this requires going
12423 # via the node before it's locked, requiring verification later on
12424 lock_groups.update(group_uuid
12425 for instance_name in
12426 self.owned_locks(locking.LEVEL_INSTANCE)
12428 self.cfg.GetInstanceNodeGroups(instance_name))
12430 # No target groups, need to lock all of them
12431 lock_groups = locking.ALL_SET
12433 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12435 elif level == locking.LEVEL_NODE:
12436 # This will only lock the nodes in the group to be evacuated which
12437 # contain actual instances
12438 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12439 self._LockInstancesNodes()
12441 # Lock all nodes in group to be evacuated and target groups
12442 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12443 assert self.group_uuid in owned_groups
12444 member_nodes = [node_name
12445 for group in owned_groups
12446 for node_name in self.cfg.GetNodeGroup(group).members]
12447 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12449 def CheckPrereq(self):
12450 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12451 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12452 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12454 assert owned_groups.issuperset(self.req_target_uuids)
12455 assert self.group_uuid in owned_groups
12457 # Check if locked instances are still correct
12458 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12460 # Get instance information
12461 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12463 # Check if node groups for locked instances are still correct
12464 for instance_name in owned_instances:
12465 inst = self.instances[instance_name]
12466 assert owned_nodes.issuperset(inst.all_nodes), \
12467 "Instance %s's nodes changed while we kept the lock" % instance_name
12469 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12472 assert self.group_uuid in inst_groups, \
12473 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12475 if self.req_target_uuids:
12476 # User requested specific target groups
12477 self.target_uuids = self.req_target_uuids
12479 # All groups except the one to be evacuated are potential targets
12480 self.target_uuids = [group_uuid for group_uuid in owned_groups
12481 if group_uuid != self.group_uuid]
12483 if not self.target_uuids:
12484 raise errors.OpPrereqError("There are no possible target groups",
12485 errors.ECODE_INVAL)
12487 def BuildHooksEnv(self):
12488 """Build hooks env.
12492 "GROUP_NAME": self.op.group_name,
12493 "TARGET_GROUPS": " ".join(self.target_uuids),
12496 def BuildHooksNodes(self):
12497 """Build hooks nodes.
12500 mn = self.cfg.GetMasterNode()
12502 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12504 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12506 return (run_nodes, run_nodes)
12508 def Exec(self, feedback_fn):
12509 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12511 assert self.group_uuid not in self.target_uuids
12513 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12514 instances=instances, target_groups=self.target_uuids)
12516 ial.Run(self.op.iallocator)
12518 if not ial.success:
12519 raise errors.OpPrereqError("Can't compute group evacuation using"
12520 " iallocator '%s': %s" %
12521 (self.op.iallocator, ial.info),
12522 errors.ECODE_NORES)
12524 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12526 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12527 len(jobs), self.op.group_name)
12529 return ResultWithJobs(jobs)
12532 class TagsLU(NoHooksLU): # pylint: disable=W0223
12533 """Generic tags LU.
12535 This is an abstract class which is the parent of all the other tags LUs.
12538 def ExpandNames(self):
12539 self.group_uuid = None
12540 self.needed_locks = {}
12541 if self.op.kind == constants.TAG_NODE:
12542 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12543 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12544 elif self.op.kind == constants.TAG_INSTANCE:
12545 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12546 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12547 elif self.op.kind == constants.TAG_NODEGROUP:
12548 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12550 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12551 # not possible to acquire the BGL based on opcode parameters)
12553 def CheckPrereq(self):
12554 """Check prerequisites.
12557 if self.op.kind == constants.TAG_CLUSTER:
12558 self.target = self.cfg.GetClusterInfo()
12559 elif self.op.kind == constants.TAG_NODE:
12560 self.target = self.cfg.GetNodeInfo(self.op.name)
12561 elif self.op.kind == constants.TAG_INSTANCE:
12562 self.target = self.cfg.GetInstanceInfo(self.op.name)
12563 elif self.op.kind == constants.TAG_NODEGROUP:
12564 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12566 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12567 str(self.op.kind), errors.ECODE_INVAL)
12570 class LUTagsGet(TagsLU):
12571 """Returns the tags of a given object.
12576 def ExpandNames(self):
12577 TagsLU.ExpandNames(self)
12579 # Share locks as this is only a read operation
12580 self.share_locks = _ShareAll()
12582 def Exec(self, feedback_fn):
12583 """Returns the tag list.
12586 return list(self.target.GetTags())
12589 class LUTagsSearch(NoHooksLU):
12590 """Searches the tags for a given pattern.
12595 def ExpandNames(self):
12596 self.needed_locks = {}
12598 def CheckPrereq(self):
12599 """Check prerequisites.
12601 This checks the pattern passed for validity by compiling it.
12605 self.re = re.compile(self.op.pattern)
12606 except re.error, err:
12607 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12608 (self.op.pattern, err), errors.ECODE_INVAL)
12610 def Exec(self, feedback_fn):
12611 """Returns the tag list.
12615 tgts = [("/cluster", cfg.GetClusterInfo())]
12616 ilist = cfg.GetAllInstancesInfo().values()
12617 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12618 nlist = cfg.GetAllNodesInfo().values()
12619 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12620 tgts.extend(("/nodegroup/%s" % n.name, n)
12621 for n in cfg.GetAllNodeGroupsInfo().values())
12623 for path, target in tgts:
12624 for tag in target.GetTags():
12625 if self.re.search(tag):
12626 results.append((path, tag))
12630 class LUTagsSet(TagsLU):
12631 """Sets a tag on a given object.
12636 def CheckPrereq(self):
12637 """Check prerequisites.
12639 This checks the type and length of the tag name and value.
12642 TagsLU.CheckPrereq(self)
12643 for tag in self.op.tags:
12644 objects.TaggableObject.ValidateTag(tag)
12646 def Exec(self, feedback_fn):
12651 for tag in self.op.tags:
12652 self.target.AddTag(tag)
12653 except errors.TagError, err:
12654 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12655 self.cfg.Update(self.target, feedback_fn)
12658 class LUTagsDel(TagsLU):
12659 """Delete a list of tags from a given object.
12664 def CheckPrereq(self):
12665 """Check prerequisites.
12667 This checks that we have the given tag.
12670 TagsLU.CheckPrereq(self)
12671 for tag in self.op.tags:
12672 objects.TaggableObject.ValidateTag(tag)
12673 del_tags = frozenset(self.op.tags)
12674 cur_tags = self.target.GetTags()
12676 diff_tags = del_tags - cur_tags
12678 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12679 raise errors.OpPrereqError("Tag(s) %s not found" %
12680 (utils.CommaJoin(diff_names), ),
12681 errors.ECODE_NOENT)
12683 def Exec(self, feedback_fn):
12684 """Remove the tag from the object.
12687 for tag in self.op.tags:
12688 self.target.RemoveTag(tag)
12689 self.cfg.Update(self.target, feedback_fn)
12692 class LUTestDelay(NoHooksLU):
12693 """Sleep for a specified amount of time.
12695 This LU sleeps on the master and/or nodes for a specified amount of
12701 def ExpandNames(self):
12702 """Expand names and set required locks.
12704 This expands the node list, if any.
12707 self.needed_locks = {}
12708 if self.op.on_nodes:
12709 # _GetWantedNodes can be used here, but is not always appropriate to use
12710 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12711 # more information.
12712 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12713 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12715 def _TestDelay(self):
12716 """Do the actual sleep.
12719 if self.op.on_master:
12720 if not utils.TestDelay(self.op.duration):
12721 raise errors.OpExecError("Error during master delay test")
12722 if self.op.on_nodes:
12723 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12724 for node, node_result in result.items():
12725 node_result.Raise("Failure during rpc call to node %s" % node)
12727 def Exec(self, feedback_fn):
12728 """Execute the test delay opcode, with the wanted repetitions.
12731 if self.op.repeat == 0:
12734 top_value = self.op.repeat - 1
12735 for i in range(self.op.repeat):
12736 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12740 class LUTestJqueue(NoHooksLU):
12741 """Utility LU to test some aspects of the job queue.
12746 # Must be lower than default timeout for WaitForJobChange to see whether it
12747 # notices changed jobs
12748 _CLIENT_CONNECT_TIMEOUT = 20.0
12749 _CLIENT_CONFIRM_TIMEOUT = 60.0
12752 def _NotifyUsingSocket(cls, cb, errcls):
12753 """Opens a Unix socket and waits for another program to connect.
12756 @param cb: Callback to send socket name to client
12757 @type errcls: class
12758 @param errcls: Exception class to use for errors
12761 # Using a temporary directory as there's no easy way to create temporary
12762 # sockets without writing a custom loop around tempfile.mktemp and
12764 tmpdir = tempfile.mkdtemp()
12766 tmpsock = utils.PathJoin(tmpdir, "sock")
12768 logging.debug("Creating temporary socket at %s", tmpsock)
12769 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12774 # Send details to client
12777 # Wait for client to connect before continuing
12778 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12780 (conn, _) = sock.accept()
12781 except socket.error, err:
12782 raise errcls("Client didn't connect in time (%s)" % err)
12786 # Remove as soon as client is connected
12787 shutil.rmtree(tmpdir)
12789 # Wait for client to close
12792 # pylint: disable=E1101
12793 # Instance of '_socketobject' has no ... member
12794 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12796 except socket.error, err:
12797 raise errcls("Client failed to confirm notification (%s)" % err)
12801 def _SendNotification(self, test, arg, sockname):
12802 """Sends a notification to the client.
12805 @param test: Test name
12806 @param arg: Test argument (depends on test)
12807 @type sockname: string
12808 @param sockname: Socket path
12811 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12813 def _Notify(self, prereq, test, arg):
12814 """Notifies the client of a test.
12817 @param prereq: Whether this is a prereq-phase test
12819 @param test: Test name
12820 @param arg: Test argument (depends on test)
12824 errcls = errors.OpPrereqError
12826 errcls = errors.OpExecError
12828 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12832 def CheckArguments(self):
12833 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12834 self.expandnames_calls = 0
12836 def ExpandNames(self):
12837 checkargs_calls = getattr(self, "checkargs_calls", 0)
12838 if checkargs_calls < 1:
12839 raise errors.ProgrammerError("CheckArguments was not called")
12841 self.expandnames_calls += 1
12843 if self.op.notify_waitlock:
12844 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12846 self.LogInfo("Expanding names")
12848 # Get lock on master node (just to get a lock, not for a particular reason)
12849 self.needed_locks = {
12850 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12853 def Exec(self, feedback_fn):
12854 if self.expandnames_calls < 1:
12855 raise errors.ProgrammerError("ExpandNames was not called")
12857 if self.op.notify_exec:
12858 self._Notify(False, constants.JQT_EXEC, None)
12860 self.LogInfo("Executing")
12862 if self.op.log_messages:
12863 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12864 for idx, msg in enumerate(self.op.log_messages):
12865 self.LogInfo("Sending log message %s", idx + 1)
12866 feedback_fn(constants.JQT_MSGPREFIX + msg)
12867 # Report how many test messages have been sent
12868 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12871 raise errors.OpExecError("Opcode failure was requested")
12876 class IAllocator(object):
12877 """IAllocator framework.
12879 An IAllocator instance has three sets of attributes:
12880 - cfg that is needed to query the cluster
12881 - input data (all members of the _KEYS class attribute are required)
12882 - four buffer attributes (in|out_data|text), that represent the
12883 input (to the external script) in text and data structure format,
12884 and the output from it, again in two formats
12885 - the result variables from the script (success, info, nodes) for
12889 # pylint: disable=R0902
12890 # lots of instance attributes
12892 def __init__(self, cfg, rpc, mode, **kwargs):
12895 # init buffer variables
12896 self.in_text = self.out_text = self.in_data = self.out_data = None
12897 # init all input fields so that pylint is happy
12899 self.memory = self.disks = self.disk_template = None
12900 self.os = self.tags = self.nics = self.vcpus = None
12901 self.hypervisor = None
12902 self.relocate_from = None
12904 self.instances = None
12905 self.evac_mode = None
12906 self.target_groups = []
12908 self.required_nodes = None
12909 # init result fields
12910 self.success = self.info = self.result = None
12913 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12915 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12916 " IAllocator" % self.mode)
12918 keyset = [n for (n, _) in keydata]
12921 if key not in keyset:
12922 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12923 " IAllocator" % key)
12924 setattr(self, key, kwargs[key])
12927 if key not in kwargs:
12928 raise errors.ProgrammerError("Missing input parameter '%s' to"
12929 " IAllocator" % key)
12930 self._BuildInputData(compat.partial(fn, self), keydata)
12932 def _ComputeClusterData(self):
12933 """Compute the generic allocator input data.
12935 This is the data that is independent of the actual operation.
12939 cluster_info = cfg.GetClusterInfo()
12942 "version": constants.IALLOCATOR_VERSION,
12943 "cluster_name": cfg.GetClusterName(),
12944 "cluster_tags": list(cluster_info.GetTags()),
12945 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12946 # we don't have job IDs
12948 ninfo = cfg.GetAllNodesInfo()
12949 iinfo = cfg.GetAllInstancesInfo().values()
12950 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12953 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12955 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12956 hypervisor_name = self.hypervisor
12957 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12958 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12960 hypervisor_name = cluster_info.enabled_hypervisors[0]
12962 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12965 self.rpc.call_all_instances_info(node_list,
12966 cluster_info.enabled_hypervisors)
12968 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12970 config_ndata = self._ComputeBasicNodeData(ninfo)
12971 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12972 i_list, config_ndata)
12973 assert len(data["nodes"]) == len(ninfo), \
12974 "Incomplete node data computed"
12976 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12978 self.in_data = data
12981 def _ComputeNodeGroupData(cfg):
12982 """Compute node groups data.
12985 ng = dict((guuid, {
12986 "name": gdata.name,
12987 "alloc_policy": gdata.alloc_policy,
12989 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12994 def _ComputeBasicNodeData(node_cfg):
12995 """Compute global node data.
12998 @returns: a dict of name: (node dict, node config)
13001 # fill in static (config-based) values
13002 node_results = dict((ninfo.name, {
13003 "tags": list(ninfo.GetTags()),
13004 "primary_ip": ninfo.primary_ip,
13005 "secondary_ip": ninfo.secondary_ip,
13006 "offline": ninfo.offline,
13007 "drained": ninfo.drained,
13008 "master_candidate": ninfo.master_candidate,
13009 "group": ninfo.group,
13010 "master_capable": ninfo.master_capable,
13011 "vm_capable": ninfo.vm_capable,
13013 for ninfo in node_cfg.values())
13015 return node_results
13018 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13020 """Compute global node data.
13022 @param node_results: the basic node structures as filled from the config
13025 # make a copy of the current dict
13026 node_results = dict(node_results)
13027 for nname, nresult in node_data.items():
13028 assert nname in node_results, "Missing basic data for node %s" % nname
13029 ninfo = node_cfg[nname]
13031 if not (ninfo.offline or ninfo.drained):
13032 nresult.Raise("Can't get data for node %s" % nname)
13033 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13035 remote_info = nresult.payload
13037 for attr in ["memory_total", "memory_free", "memory_dom0",
13038 "vg_size", "vg_free", "cpu_total"]:
13039 if attr not in remote_info:
13040 raise errors.OpExecError("Node '%s' didn't return attribute"
13041 " '%s'" % (nname, attr))
13042 if not isinstance(remote_info[attr], int):
13043 raise errors.OpExecError("Node '%s' returned invalid value"
13045 (nname, attr, remote_info[attr]))
13046 # compute memory used by primary instances
13047 i_p_mem = i_p_up_mem = 0
13048 for iinfo, beinfo in i_list:
13049 if iinfo.primary_node == nname:
13050 i_p_mem += beinfo[constants.BE_MEMORY]
13051 if iinfo.name not in node_iinfo[nname].payload:
13054 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13055 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13056 remote_info["memory_free"] -= max(0, i_mem_diff)
13059 i_p_up_mem += beinfo[constants.BE_MEMORY]
13061 # compute memory used by instances
13063 "total_memory": remote_info["memory_total"],
13064 "reserved_memory": remote_info["memory_dom0"],
13065 "free_memory": remote_info["memory_free"],
13066 "total_disk": remote_info["vg_size"],
13067 "free_disk": remote_info["vg_free"],
13068 "total_cpus": remote_info["cpu_total"],
13069 "i_pri_memory": i_p_mem,
13070 "i_pri_up_memory": i_p_up_mem,
13072 pnr_dyn.update(node_results[nname])
13073 node_results[nname] = pnr_dyn
13075 return node_results
13078 def _ComputeInstanceData(cluster_info, i_list):
13079 """Compute global instance data.
13083 for iinfo, beinfo in i_list:
13085 for nic in iinfo.nics:
13086 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13090 "mode": filled_params[constants.NIC_MODE],
13091 "link": filled_params[constants.NIC_LINK],
13093 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13094 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13095 nic_data.append(nic_dict)
13097 "tags": list(iinfo.GetTags()),
13098 "admin_up": iinfo.admin_up,
13099 "vcpus": beinfo[constants.BE_VCPUS],
13100 "memory": beinfo[constants.BE_MEMORY],
13102 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13104 "disks": [{constants.IDISK_SIZE: dsk.size,
13105 constants.IDISK_MODE: dsk.mode}
13106 for dsk in iinfo.disks],
13107 "disk_template": iinfo.disk_template,
13108 "hypervisor": iinfo.hypervisor,
13110 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13112 instance_data[iinfo.name] = pir
13114 return instance_data
13116 def _AddNewInstance(self):
13117 """Add new instance data to allocator structure.
13119 This in combination with _AllocatorGetClusterData will create the
13120 correct structure needed as input for the allocator.
13122 The checks for the completeness of the opcode must have already been
13126 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13128 if self.disk_template in constants.DTS_INT_MIRROR:
13129 self.required_nodes = 2
13131 self.required_nodes = 1
13135 "disk_template": self.disk_template,
13138 "vcpus": self.vcpus,
13139 "memory": self.memory,
13140 "disks": self.disks,
13141 "disk_space_total": disk_space,
13143 "required_nodes": self.required_nodes,
13144 "hypervisor": self.hypervisor,
13149 def _AddRelocateInstance(self):
13150 """Add relocate instance data to allocator structure.
13152 This in combination with _IAllocatorGetClusterData will create the
13153 correct structure needed as input for the allocator.
13155 The checks for the completeness of the opcode must have already been
13159 instance = self.cfg.GetInstanceInfo(self.name)
13160 if instance is None:
13161 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13162 " IAllocator" % self.name)
13164 if instance.disk_template not in constants.DTS_MIRRORED:
13165 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13166 errors.ECODE_INVAL)
13168 if instance.disk_template in constants.DTS_INT_MIRROR and \
13169 len(instance.secondary_nodes) != 1:
13170 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13171 errors.ECODE_STATE)
13173 self.required_nodes = 1
13174 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13175 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13179 "disk_space_total": disk_space,
13180 "required_nodes": self.required_nodes,
13181 "relocate_from": self.relocate_from,
13185 def _AddNodeEvacuate(self):
13186 """Get data for node-evacuate requests.
13190 "instances": self.instances,
13191 "evac_mode": self.evac_mode,
13194 def _AddChangeGroup(self):
13195 """Get data for node-evacuate requests.
13199 "instances": self.instances,
13200 "target_groups": self.target_groups,
13203 def _BuildInputData(self, fn, keydata):
13204 """Build input data structures.
13207 self._ComputeClusterData()
13210 request["type"] = self.mode
13211 for keyname, keytype in keydata:
13212 if keyname not in request:
13213 raise errors.ProgrammerError("Request parameter %s is missing" %
13215 val = request[keyname]
13216 if not keytype(val):
13217 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13218 " validation, value %s, expected"
13219 " type %s" % (keyname, val, keytype))
13220 self.in_data["request"] = request
13222 self.in_text = serializer.Dump(self.in_data)
13224 _STRING_LIST = ht.TListOf(ht.TString)
13225 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13226 # pylint: disable=E1101
13227 # Class '...' has no 'OP_ID' member
13228 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13229 opcodes.OpInstanceMigrate.OP_ID,
13230 opcodes.OpInstanceReplaceDisks.OP_ID])
13234 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13235 ht.TItems([ht.TNonEmptyString,
13236 ht.TNonEmptyString,
13237 ht.TListOf(ht.TNonEmptyString),
13240 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13241 ht.TItems([ht.TNonEmptyString,
13244 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13245 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13248 constants.IALLOCATOR_MODE_ALLOC:
13251 ("name", ht.TString),
13252 ("memory", ht.TInt),
13253 ("disks", ht.TListOf(ht.TDict)),
13254 ("disk_template", ht.TString),
13255 ("os", ht.TString),
13256 ("tags", _STRING_LIST),
13257 ("nics", ht.TListOf(ht.TDict)),
13258 ("vcpus", ht.TInt),
13259 ("hypervisor", ht.TString),
13261 constants.IALLOCATOR_MODE_RELOC:
13262 (_AddRelocateInstance,
13263 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13265 constants.IALLOCATOR_MODE_NODE_EVAC:
13266 (_AddNodeEvacuate, [
13267 ("instances", _STRING_LIST),
13268 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13270 constants.IALLOCATOR_MODE_CHG_GROUP:
13271 (_AddChangeGroup, [
13272 ("instances", _STRING_LIST),
13273 ("target_groups", _STRING_LIST),
13277 def Run(self, name, validate=True, call_fn=None):
13278 """Run an instance allocator and return the results.
13281 if call_fn is None:
13282 call_fn = self.rpc.call_iallocator_runner
13284 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13285 result.Raise("Failure while running the iallocator script")
13287 self.out_text = result.payload
13289 self._ValidateResult()
13291 def _ValidateResult(self):
13292 """Process the allocator results.
13294 This will process and if successful save the result in
13295 self.out_data and the other parameters.
13299 rdict = serializer.Load(self.out_text)
13300 except Exception, err:
13301 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13303 if not isinstance(rdict, dict):
13304 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13306 # TODO: remove backwards compatiblity in later versions
13307 if "nodes" in rdict and "result" not in rdict:
13308 rdict["result"] = rdict["nodes"]
13311 for key in "success", "info", "result":
13312 if key not in rdict:
13313 raise errors.OpExecError("Can't parse iallocator results:"
13314 " missing key '%s'" % key)
13315 setattr(self, key, rdict[key])
13317 if not self._result_check(self.result):
13318 raise errors.OpExecError("Iallocator returned invalid result,"
13319 " expected %s, got %s" %
13320 (self._result_check, self.result),
13321 errors.ECODE_INVAL)
13323 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13324 assert self.relocate_from is not None
13325 assert self.required_nodes == 1
13327 node2group = dict((name, ndata["group"])
13328 for (name, ndata) in self.in_data["nodes"].items())
13330 fn = compat.partial(self._NodesToGroups, node2group,
13331 self.in_data["nodegroups"])
13333 instance = self.cfg.GetInstanceInfo(self.name)
13334 request_groups = fn(self.relocate_from + [instance.primary_node])
13335 result_groups = fn(rdict["result"] + [instance.primary_node])
13337 if self.success and not set(result_groups).issubset(request_groups):
13338 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13339 " differ from original groups (%s)" %
13340 (utils.CommaJoin(result_groups),
13341 utils.CommaJoin(request_groups)))
13343 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13344 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13346 self.out_data = rdict
13349 def _NodesToGroups(node2group, groups, nodes):
13350 """Returns a list of unique group names for a list of nodes.
13352 @type node2group: dict
13353 @param node2group: Map from node name to group UUID
13355 @param groups: Group information
13357 @param nodes: Node names
13364 group_uuid = node2group[node]
13366 # Ignore unknown node
13370 group = groups[group_uuid]
13372 # Can't find group, let's use UUID
13373 group_name = group_uuid
13375 group_name = group["name"]
13377 result.add(group_name)
13379 return sorted(result)
13382 class LUTestAllocator(NoHooksLU):
13383 """Run allocator tests.
13385 This LU runs the allocator tests
13388 def CheckPrereq(self):
13389 """Check prerequisites.
13391 This checks the opcode parameters depending on the director and mode test.
13394 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13395 for attr in ["memory", "disks", "disk_template",
13396 "os", "tags", "nics", "vcpus"]:
13397 if not hasattr(self.op, attr):
13398 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13399 attr, errors.ECODE_INVAL)
13400 iname = self.cfg.ExpandInstanceName(self.op.name)
13401 if iname is not None:
13402 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13403 iname, errors.ECODE_EXISTS)
13404 if not isinstance(self.op.nics, list):
13405 raise errors.OpPrereqError("Invalid parameter 'nics'",
13406 errors.ECODE_INVAL)
13407 if not isinstance(self.op.disks, list):
13408 raise errors.OpPrereqError("Invalid parameter 'disks'",
13409 errors.ECODE_INVAL)
13410 for row in self.op.disks:
13411 if (not isinstance(row, dict) or
13412 constants.IDISK_SIZE not in row or
13413 not isinstance(row[constants.IDISK_SIZE], int) or
13414 constants.IDISK_MODE not in row or
13415 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13416 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13417 " parameter", errors.ECODE_INVAL)
13418 if self.op.hypervisor is None:
13419 self.op.hypervisor = self.cfg.GetHypervisorType()
13420 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13421 fname = _ExpandInstanceName(self.cfg, self.op.name)
13422 self.op.name = fname
13423 self.relocate_from = \
13424 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13425 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13426 constants.IALLOCATOR_MODE_NODE_EVAC):
13427 if not self.op.instances:
13428 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13429 self.op.instances = _GetWantedInstances(self, self.op.instances)
13431 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13432 self.op.mode, errors.ECODE_INVAL)
13434 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13435 if self.op.allocator is None:
13436 raise errors.OpPrereqError("Missing allocator name",
13437 errors.ECODE_INVAL)
13438 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13439 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13440 self.op.direction, errors.ECODE_INVAL)
13442 def Exec(self, feedback_fn):
13443 """Run the allocator test.
13446 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13447 ial = IAllocator(self.cfg, self.rpc,
13450 memory=self.op.memory,
13451 disks=self.op.disks,
13452 disk_template=self.op.disk_template,
13456 vcpus=self.op.vcpus,
13457 hypervisor=self.op.hypervisor,
13459 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13460 ial = IAllocator(self.cfg, self.rpc,
13463 relocate_from=list(self.relocate_from),
13465 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13466 ial = IAllocator(self.cfg, self.rpc,
13468 instances=self.op.instances,
13469 target_groups=self.op.target_groups)
13470 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13471 ial = IAllocator(self.cfg, self.rpc,
13473 instances=self.op.instances,
13474 evac_mode=self.op.evac_mode)
13476 raise errors.ProgrammerError("Uncatched mode %s in"
13477 " LUTestAllocator.Exec", self.op.mode)
13479 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13480 result = ial.in_text
13482 ial.Run(self.op.allocator, validate=False)
13483 result = ial.out_text
13487 #: Query type implementations
13489 constants.QR_INSTANCE: _InstanceQuery,
13490 constants.QR_NODE: _NodeQuery,
13491 constants.QR_GROUP: _GroupQuery,
13492 constants.QR_OS: _OsQuery,
13495 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13498 def _GetQueryImplementation(name):
13499 """Returns the implemtnation for a query type.
13501 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13505 return _QUERY_IMPL[name]
13507 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13508 errors.ECODE_INVAL)