4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import runtime
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
562 """Checks if node groups for locked instances are still correct.
564 @type cfg: L{config.ConfigWriter}
565 @param cfg: Cluster configuration
566 @type instances: dict; string as key, L{objects.Instance} as value
567 @param instances: Dictionary, instance name as key, instance object as value
568 @type owned_groups: iterable of string
569 @param owned_groups: List of owned groups
570 @type owned_nodes: iterable of string
571 @param owned_nodes: List of owned nodes
572 @type cur_group_uuid: string or None
573 @type cur_group_uuid: Optional group UUID to check against instance's groups
576 for (name, inst) in instances.items():
577 assert owned_nodes.issuperset(inst.all_nodes), \
578 "Instance %s's nodes changed while we kept the lock" % name
580 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
582 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
583 "Instance %s has no node in group %s" % (name, cur_group_uuid)
586 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
587 """Checks if the owned node groups are still correct for an instance.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type instance_name: string
592 @param instance_name: Instance name
593 @type owned_groups: set or frozenset
594 @param owned_groups: List of currently owned node groups
597 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
599 if not owned_groups.issuperset(inst_groups):
600 raise errors.OpPrereqError("Instance %s's node groups changed since"
601 " locks were acquired, current groups are"
602 " are '%s', owning groups '%s'; retry the"
605 utils.CommaJoin(inst_groups),
606 utils.CommaJoin(owned_groups)),
612 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
613 """Checks if the instances in a node group are still correct.
615 @type cfg: L{config.ConfigWriter}
616 @param cfg: The cluster configuration
617 @type group_uuid: string
618 @param group_uuid: Node group UUID
619 @type owned_instances: set or frozenset
620 @param owned_instances: List of currently owned instances
623 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
624 if owned_instances != wanted_instances:
625 raise errors.OpPrereqError("Instances in node group '%s' changed since"
626 " locks were acquired, wanted '%s', have '%s';"
627 " retry the operation" %
629 utils.CommaJoin(wanted_instances),
630 utils.CommaJoin(owned_instances)),
633 return wanted_instances
636 def _SupportsOob(cfg, node):
637 """Tells if node supports OOB.
639 @type cfg: L{config.ConfigWriter}
640 @param cfg: The cluster configuration
641 @type node: L{objects.Node}
642 @param node: The node
643 @return: The OOB script if supported or an empty string otherwise
646 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
649 def _GetWantedNodes(lu, nodes):
650 """Returns list of checked and expanded node names.
652 @type lu: L{LogicalUnit}
653 @param lu: the logical unit on whose behalf we execute
655 @param nodes: list of node names or None for all nodes
657 @return: the list of nodes, sorted
658 @raise errors.ProgrammerError: if the nodes parameter is wrong type
662 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
664 return utils.NiceSort(lu.cfg.GetNodeList())
667 def _GetWantedInstances(lu, instances):
668 """Returns list of checked and expanded instance names.
670 @type lu: L{LogicalUnit}
671 @param lu: the logical unit on whose behalf we execute
672 @type instances: list
673 @param instances: list of instance names or None for all instances
675 @return: the list of instances, sorted
676 @raise errors.OpPrereqError: if the instances parameter is wrong type
677 @raise errors.OpPrereqError: if any of the passed instances is not found
681 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
683 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
687 def _GetUpdatedParams(old_params, update_dict,
688 use_default=True, use_none=False):
689 """Return the new version of a parameter dictionary.
691 @type old_params: dict
692 @param old_params: old parameters
693 @type update_dict: dict
694 @param update_dict: dict containing new parameter values, or
695 constants.VALUE_DEFAULT to reset the parameter to its default
697 @param use_default: boolean
698 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
699 values as 'to be deleted' values
700 @param use_none: boolean
701 @type use_none: whether to recognise C{None} values as 'to be
704 @return: the new parameter dictionary
707 params_copy = copy.deepcopy(old_params)
708 for key, val in update_dict.iteritems():
709 if ((use_default and val == constants.VALUE_DEFAULT) or
710 (use_none and val is None)):
716 params_copy[key] = val
720 def _ReleaseLocks(lu, level, names=None, keep=None):
721 """Releases locks owned by an LU.
723 @type lu: L{LogicalUnit}
724 @param level: Lock level
725 @type names: list or None
726 @param names: Names of locks to release
727 @type keep: list or None
728 @param keep: Names of locks to retain
731 assert not (keep is not None and names is not None), \
732 "Only one of the 'names' and the 'keep' parameters can be given"
734 if names is not None:
735 should_release = names.__contains__
737 should_release = lambda name: name not in keep
739 should_release = None
745 # Determine which locks to release
746 for name in lu.owned_locks(level):
747 if should_release(name):
752 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
754 # Release just some locks
755 lu.glm.release(level, names=release)
757 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
760 lu.glm.release(level)
762 assert not lu.glm.is_owned(level), "No locks should be owned"
765 def _MapInstanceDisksToNodes(instances):
766 """Creates a map from (node, volume) to instance name.
768 @type instances: list of L{objects.Instance}
769 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
772 return dict(((node, vol), inst.name)
773 for inst in instances
774 for (node, vols) in inst.MapLVsByNode().items()
778 def _RunPostHook(lu, node_name):
779 """Runs the post-hook for an opcode on a single node.
782 hm = lu.proc.BuildHooksManager(lu)
784 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
786 # pylint: disable=W0702
787 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
790 def _CheckOutputFields(static, dynamic, selected):
791 """Checks whether all selected fields are valid.
793 @type static: L{utils.FieldSet}
794 @param static: static fields set
795 @type dynamic: L{utils.FieldSet}
796 @param dynamic: dynamic fields set
803 delta = f.NonMatching(selected)
805 raise errors.OpPrereqError("Unknown output fields selected: %s"
806 % ",".join(delta), errors.ECODE_INVAL)
809 def _CheckGlobalHvParams(params):
810 """Validates that given hypervisor params are not global ones.
812 This will ensure that instances don't get customised versions of
816 used_globals = constants.HVC_GLOBALS.intersection(params)
818 msg = ("The following hypervisor parameters are global and cannot"
819 " be customized at instance level, please modify them at"
820 " cluster level: %s" % utils.CommaJoin(used_globals))
821 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
824 def _CheckNodeOnline(lu, node, msg=None):
825 """Ensure that a given node is online.
827 @param lu: the LU on behalf of which we make the check
828 @param node: the node to check
829 @param msg: if passed, should be a message to replace the default one
830 @raise errors.OpPrereqError: if the node is offline
834 msg = "Can't use offline node"
835 if lu.cfg.GetNodeInfo(node).offline:
836 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
839 def _CheckNodeNotDrained(lu, node):
840 """Ensure that a given node is not drained.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @raise errors.OpPrereqError: if the node is drained
847 if lu.cfg.GetNodeInfo(node).drained:
848 raise errors.OpPrereqError("Can't use drained node %s" % node,
852 def _CheckNodeVmCapable(lu, node):
853 """Ensure that a given node is vm capable.
855 @param lu: the LU on behalf of which we make the check
856 @param node: the node to check
857 @raise errors.OpPrereqError: if the node is not vm capable
860 if not lu.cfg.GetNodeInfo(node).vm_capable:
861 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
865 def _CheckNodeHasOS(lu, node, os_name, force_variant):
866 """Ensure that a node supports a given OS.
868 @param lu: the LU on behalf of which we make the check
869 @param node: the node to check
870 @param os_name: the OS to query about
871 @param force_variant: whether to ignore variant errors
872 @raise errors.OpPrereqError: if the node is not supporting the OS
875 result = lu.rpc.call_os_get(node, os_name)
876 result.Raise("OS '%s' not in supported OS list for node %s" %
878 prereq=True, ecode=errors.ECODE_INVAL)
879 if not force_variant:
880 _CheckOSVariant(result.payload, os_name)
883 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
884 """Ensure that a node has the given secondary ip.
886 @type lu: L{LogicalUnit}
887 @param lu: the LU on behalf of which we make the check
889 @param node: the node to check
890 @type secondary_ip: string
891 @param secondary_ip: the ip to check
892 @type prereq: boolean
893 @param prereq: whether to throw a prerequisite or an execute error
894 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
895 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
898 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
899 result.Raise("Failure checking secondary ip on node %s" % node,
900 prereq=prereq, ecode=errors.ECODE_ENVIRON)
901 if not result.payload:
902 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
903 " please fix and re-run this command" % secondary_ip)
905 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
907 raise errors.OpExecError(msg)
910 def _GetClusterDomainSecret():
911 """Reads the cluster domain secret.
914 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
918 def _CheckInstanceDown(lu, instance, reason):
919 """Ensure that an instance is not running."""
920 if instance.admin_up:
921 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
922 (instance.name, reason), errors.ECODE_STATE)
924 pnode = instance.primary_node
925 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
926 ins_l.Raise("Can't contact node %s for instance information" % pnode,
927 prereq=True, ecode=errors.ECODE_ENVIRON)
929 if instance.name in ins_l.payload:
930 raise errors.OpPrereqError("Instance %s is running, %s" %
931 (instance.name, reason), errors.ECODE_STATE)
934 def _ExpandItemName(fn, name, kind):
935 """Expand an item name.
937 @param fn: the function to use for expansion
938 @param name: requested item name
939 @param kind: text description ('Node' or 'Instance')
940 @return: the resolved (full) name
941 @raise errors.OpPrereqError: if the item is not found
945 if full_name is None:
946 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
951 def _ExpandNodeName(cfg, name):
952 """Wrapper over L{_ExpandItemName} for nodes."""
953 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
956 def _ExpandInstanceName(cfg, name):
957 """Wrapper over L{_ExpandItemName} for instance."""
958 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
961 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
962 memory, vcpus, nics, disk_template, disks,
963 bep, hvp, hypervisor_name, tags):
964 """Builds instance related env variables for hooks
966 This builds the hook environment from individual variables.
969 @param name: the name of the instance
970 @type primary_node: string
971 @param primary_node: the name of the instance's primary node
972 @type secondary_nodes: list
973 @param secondary_nodes: list of secondary nodes as strings
974 @type os_type: string
975 @param os_type: the name of the instance's OS
976 @type status: boolean
977 @param status: the should_run status of the instance
979 @param memory: the memory size of the instance
981 @param vcpus: the count of VCPUs the instance has
983 @param nics: list of tuples (ip, mac, mode, link) representing
984 the NICs the instance has
985 @type disk_template: string
986 @param disk_template: the disk template of the instance
988 @param disks: the list of (size, mode) pairs
990 @param bep: the backend parameters for the instance
992 @param hvp: the hypervisor parameters for the instance
993 @type hypervisor_name: string
994 @param hypervisor_name: the hypervisor for the instance
996 @param tags: list of instance tags as strings
998 @return: the hook environment for this instance
1007 "INSTANCE_NAME": name,
1008 "INSTANCE_PRIMARY": primary_node,
1009 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1010 "INSTANCE_OS_TYPE": os_type,
1011 "INSTANCE_STATUS": str_status,
1012 "INSTANCE_MEMORY": memory,
1013 "INSTANCE_VCPUS": vcpus,
1014 "INSTANCE_DISK_TEMPLATE": disk_template,
1015 "INSTANCE_HYPERVISOR": hypervisor_name,
1019 nic_count = len(nics)
1020 for idx, (ip, mac, mode, link) in enumerate(nics):
1023 env["INSTANCE_NIC%d_IP" % idx] = ip
1024 env["INSTANCE_NIC%d_MAC" % idx] = mac
1025 env["INSTANCE_NIC%d_MODE" % idx] = mode
1026 env["INSTANCE_NIC%d_LINK" % idx] = link
1027 if mode == constants.NIC_MODE_BRIDGED:
1028 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1032 env["INSTANCE_NIC_COUNT"] = nic_count
1035 disk_count = len(disks)
1036 for idx, (size, mode) in enumerate(disks):
1037 env["INSTANCE_DISK%d_SIZE" % idx] = size
1038 env["INSTANCE_DISK%d_MODE" % idx] = mode
1042 env["INSTANCE_DISK_COUNT"] = disk_count
1047 env["INSTANCE_TAGS"] = " ".join(tags)
1049 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1050 for key, value in source.items():
1051 env["INSTANCE_%s_%s" % (kind, key)] = value
1056 def _NICListToTuple(lu, nics):
1057 """Build a list of nic information tuples.
1059 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1060 value in LUInstanceQueryData.
1062 @type lu: L{LogicalUnit}
1063 @param lu: the logical unit on whose behalf we execute
1064 @type nics: list of L{objects.NIC}
1065 @param nics: list of nics to convert to hooks tuples
1069 cluster = lu.cfg.GetClusterInfo()
1073 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1074 mode = filled_params[constants.NIC_MODE]
1075 link = filled_params[constants.NIC_LINK]
1076 hooks_nics.append((ip, mac, mode, link))
1080 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1081 """Builds instance related env variables for hooks from an object.
1083 @type lu: L{LogicalUnit}
1084 @param lu: the logical unit on whose behalf we execute
1085 @type instance: L{objects.Instance}
1086 @param instance: the instance for which we should build the
1088 @type override: dict
1089 @param override: dictionary with key/values that will override
1092 @return: the hook environment dictionary
1095 cluster = lu.cfg.GetClusterInfo()
1096 bep = cluster.FillBE(instance)
1097 hvp = cluster.FillHV(instance)
1099 "name": instance.name,
1100 "primary_node": instance.primary_node,
1101 "secondary_nodes": instance.secondary_nodes,
1102 "os_type": instance.os,
1103 "status": instance.admin_up,
1104 "memory": bep[constants.BE_MEMORY],
1105 "vcpus": bep[constants.BE_VCPUS],
1106 "nics": _NICListToTuple(lu, instance.nics),
1107 "disk_template": instance.disk_template,
1108 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1111 "hypervisor_name": instance.hypervisor,
1112 "tags": instance.tags,
1115 args.update(override)
1116 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1119 def _AdjustCandidatePool(lu, exceptions):
1120 """Adjust the candidate pool after node operations.
1123 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1125 lu.LogInfo("Promoted nodes to master candidate role: %s",
1126 utils.CommaJoin(node.name for node in mod_list))
1127 for name in mod_list:
1128 lu.context.ReaddNode(name)
1129 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1131 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1135 def _DecideSelfPromotion(lu, exceptions=None):
1136 """Decide whether I should promote myself as a master candidate.
1139 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1140 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1141 # the new node will increase mc_max with one, so:
1142 mc_should = min(mc_should + 1, cp_size)
1143 return mc_now < mc_should
1146 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1147 """Check that the brigdes needed by a list of nics exist.
1150 cluster = lu.cfg.GetClusterInfo()
1151 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1152 brlist = [params[constants.NIC_LINK] for params in paramslist
1153 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1155 result = lu.rpc.call_bridges_exist(target_node, brlist)
1156 result.Raise("Error checking bridges on destination node '%s'" %
1157 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1160 def _CheckInstanceBridgesExist(lu, instance, node=None):
1161 """Check that the brigdes needed by an instance exist.
1165 node = instance.primary_node
1166 _CheckNicsBridgesExist(lu, instance.nics, node)
1169 def _CheckOSVariant(os_obj, name):
1170 """Check whether an OS name conforms to the os variants specification.
1172 @type os_obj: L{objects.OS}
1173 @param os_obj: OS object to check
1175 @param name: OS name passed by the user, to check for validity
1178 variant = objects.OS.GetVariant(name)
1179 if not os_obj.supported_variants:
1181 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1182 " passed)" % (os_obj.name, variant),
1186 raise errors.OpPrereqError("OS name must include a variant",
1189 if variant not in os_obj.supported_variants:
1190 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1193 def _GetNodeInstancesInner(cfg, fn):
1194 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1197 def _GetNodeInstances(cfg, node_name):
1198 """Returns a list of all primary and secondary instances on a node.
1202 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1205 def _GetNodePrimaryInstances(cfg, node_name):
1206 """Returns primary instances on a node.
1209 return _GetNodeInstancesInner(cfg,
1210 lambda inst: node_name == inst.primary_node)
1213 def _GetNodeSecondaryInstances(cfg, node_name):
1214 """Returns secondary instances on a node.
1217 return _GetNodeInstancesInner(cfg,
1218 lambda inst: node_name in inst.secondary_nodes)
1221 def _GetStorageTypeArgs(cfg, storage_type):
1222 """Returns the arguments for a storage type.
1225 # Special case for file storage
1226 if storage_type == constants.ST_FILE:
1227 # storage.FileStorage wants a list of storage directories
1228 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1233 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1236 for dev in instance.disks:
1237 cfg.SetDiskID(dev, node_name)
1239 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1240 result.Raise("Failed to get disk status from node %s" % node_name,
1241 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1243 for idx, bdev_status in enumerate(result.payload):
1244 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1250 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1251 """Check the sanity of iallocator and node arguments and use the
1252 cluster-wide iallocator if appropriate.
1254 Check that at most one of (iallocator, node) is specified. If none is
1255 specified, then the LU's opcode's iallocator slot is filled with the
1256 cluster-wide default iallocator.
1258 @type iallocator_slot: string
1259 @param iallocator_slot: the name of the opcode iallocator slot
1260 @type node_slot: string
1261 @param node_slot: the name of the opcode target node slot
1264 node = getattr(lu.op, node_slot, None)
1265 iallocator = getattr(lu.op, iallocator_slot, None)
1267 if node is not None and iallocator is not None:
1268 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1270 elif node is None and iallocator is None:
1271 default_iallocator = lu.cfg.GetDefaultIAllocator()
1272 if default_iallocator:
1273 setattr(lu.op, iallocator_slot, default_iallocator)
1275 raise errors.OpPrereqError("No iallocator or node given and no"
1276 " cluster-wide default iallocator found;"
1277 " please specify either an iallocator or a"
1278 " node, or set a cluster-wide default"
1282 def _GetDefaultIAllocator(cfg, iallocator):
1283 """Decides on which iallocator to use.
1285 @type cfg: L{config.ConfigWriter}
1286 @param cfg: Cluster configuration object
1287 @type iallocator: string or None
1288 @param iallocator: Iallocator specified in opcode
1290 @return: Iallocator name
1294 # Use default iallocator
1295 iallocator = cfg.GetDefaultIAllocator()
1298 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1299 " opcode nor as a cluster-wide default",
1305 class LUClusterPostInit(LogicalUnit):
1306 """Logical unit for running hooks after cluster initialization.
1309 HPATH = "cluster-init"
1310 HTYPE = constants.HTYPE_CLUSTER
1312 def BuildHooksEnv(self):
1317 "OP_TARGET": self.cfg.GetClusterName(),
1320 def BuildHooksNodes(self):
1321 """Build hooks nodes.
1324 return ([], [self.cfg.GetMasterNode()])
1326 def Exec(self, feedback_fn):
1333 class LUClusterDestroy(LogicalUnit):
1334 """Logical unit for destroying the cluster.
1337 HPATH = "cluster-destroy"
1338 HTYPE = constants.HTYPE_CLUSTER
1340 def BuildHooksEnv(self):
1345 "OP_TARGET": self.cfg.GetClusterName(),
1348 def BuildHooksNodes(self):
1349 """Build hooks nodes.
1354 def CheckPrereq(self):
1355 """Check prerequisites.
1357 This checks whether the cluster is empty.
1359 Any errors are signaled by raising errors.OpPrereqError.
1362 master = self.cfg.GetMasterNode()
1364 nodelist = self.cfg.GetNodeList()
1365 if len(nodelist) != 1 or nodelist[0] != master:
1366 raise errors.OpPrereqError("There are still %d node(s) in"
1367 " this cluster." % (len(nodelist) - 1),
1369 instancelist = self.cfg.GetInstanceList()
1371 raise errors.OpPrereqError("There are still %d instance(s) in"
1372 " this cluster." % len(instancelist),
1375 def Exec(self, feedback_fn):
1376 """Destroys the cluster.
1379 master = self.cfg.GetMasterNode()
1381 # Run post hooks on master node before it's removed
1382 _RunPostHook(self, master)
1384 result = self.rpc.call_node_deactivate_master_ip(master)
1385 result.Raise("Could not disable the master role")
1390 def _VerifyCertificate(filename):
1391 """Verifies a certificate for L{LUClusterVerifyConfig}.
1393 @type filename: string
1394 @param filename: Path to PEM file
1398 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1399 utils.ReadFile(filename))
1400 except Exception, err: # pylint: disable=W0703
1401 return (LUClusterVerifyConfig.ETYPE_ERROR,
1402 "Failed to load X509 certificate %s: %s" % (filename, err))
1405 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1406 constants.SSL_CERT_EXPIRATION_ERROR)
1409 fnamemsg = "While verifying %s: %s" % (filename, msg)
1414 return (None, fnamemsg)
1415 elif errcode == utils.CERT_WARNING:
1416 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1417 elif errcode == utils.CERT_ERROR:
1418 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1420 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1423 def _GetAllHypervisorParameters(cluster, instances):
1424 """Compute the set of all hypervisor parameters.
1426 @type cluster: L{objects.Cluster}
1427 @param cluster: the cluster object
1428 @param instances: list of L{objects.Instance}
1429 @param instances: additional instances from which to obtain parameters
1430 @rtype: list of (origin, hypervisor, parameters)
1431 @return: a list with all parameters found, indicating the hypervisor they
1432 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1437 for hv_name in cluster.enabled_hypervisors:
1438 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1440 for os_name, os_hvp in cluster.os_hvp.items():
1441 for hv_name, hv_params in os_hvp.items():
1443 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1444 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1446 # TODO: collapse identical parameter values in a single one
1447 for instance in instances:
1448 if instance.hvparams:
1449 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1450 cluster.FillHV(instance)))
1455 class _VerifyErrors(object):
1456 """Mix-in for cluster/group verify LUs.
1458 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1459 self.op and self._feedback_fn to be available.)
1462 TCLUSTER = "cluster"
1464 TINSTANCE = "instance"
1466 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1467 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1468 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1469 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1470 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1471 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1472 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1473 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1474 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1475 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1476 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1477 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1478 ENODEDRBD = (TNODE, "ENODEDRBD")
1479 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1480 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1481 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1482 ENODEHV = (TNODE, "ENODEHV")
1483 ENODELVM = (TNODE, "ENODELVM")
1484 ENODEN1 = (TNODE, "ENODEN1")
1485 ENODENET = (TNODE, "ENODENET")
1486 ENODEOS = (TNODE, "ENODEOS")
1487 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1488 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1489 ENODERPC = (TNODE, "ENODERPC")
1490 ENODESSH = (TNODE, "ENODESSH")
1491 ENODEVERSION = (TNODE, "ENODEVERSION")
1492 ENODESETUP = (TNODE, "ENODESETUP")
1493 ENODETIME = (TNODE, "ENODETIME")
1494 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1496 ETYPE_FIELD = "code"
1497 ETYPE_ERROR = "ERROR"
1498 ETYPE_WARNING = "WARNING"
1500 def _Error(self, ecode, item, msg, *args, **kwargs):
1501 """Format an error message.
1503 Based on the opcode's error_codes parameter, either format a
1504 parseable error code, or a simpler error string.
1506 This must be called only from Exec and functions called from Exec.
1509 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1511 # first complete the msg
1514 # then format the whole message
1515 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1516 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1522 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1523 # and finally report it via the feedback_fn
1524 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1526 def _ErrorIf(self, cond, *args, **kwargs):
1527 """Log an error message if the passed condition is True.
1531 or self.op.debug_simulate_errors) # pylint: disable=E1101
1533 self._Error(*args, **kwargs)
1534 # do not mark the operation as failed for WARN cases only
1535 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1536 self.bad = self.bad or cond
1539 class LUClusterVerify(NoHooksLU):
1540 """Submits all jobs necessary to verify the cluster.
1545 def ExpandNames(self):
1546 self.needed_locks = {}
1548 def Exec(self, feedback_fn):
1551 if self.op.group_name:
1552 groups = [self.op.group_name]
1553 depends_fn = lambda: None
1555 groups = self.cfg.GetNodeGroupList()
1557 # Verify global configuration
1558 jobs.append([opcodes.OpClusterVerifyConfig()])
1560 # Always depend on global verification
1561 depends_fn = lambda: [(-len(jobs), [])]
1563 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564 depends=depends_fn())]
1565 for group in groups)
1567 # Fix up all parameters
1568 for op in itertools.chain(*jobs): # pylint: disable=W0142
1569 op.debug_simulate_errors = self.op.debug_simulate_errors
1570 op.verbose = self.op.verbose
1571 op.error_codes = self.op.error_codes
1573 op.skip_checks = self.op.skip_checks
1574 except AttributeError:
1575 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1577 return ResultWithJobs(jobs)
1580 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1581 """Verifies the cluster config.
1586 def _VerifyHVP(self, hvp_data):
1587 """Verifies locally the syntax of the hypervisor parameters.
1590 for item, hv_name, hv_params in hvp_data:
1591 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1594 hv_class = hypervisor.GetHypervisor(hv_name)
1595 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1596 hv_class.CheckParameterSyntax(hv_params)
1597 except errors.GenericError, err:
1598 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1600 def ExpandNames(self):
1601 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1602 self.share_locks = _ShareAll()
1604 def CheckPrereq(self):
1605 """Check prerequisites.
1608 # Retrieve all information
1609 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1610 self.all_node_info = self.cfg.GetAllNodesInfo()
1611 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1613 def Exec(self, feedback_fn):
1614 """Verify integrity of cluster, performing various test on nodes.
1618 self._feedback_fn = feedback_fn
1620 feedback_fn("* Verifying cluster config")
1622 for msg in self.cfg.VerifyConfig():
1623 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1625 feedback_fn("* Verifying cluster certificate files")
1627 for cert_filename in constants.ALL_CERT_FILES:
1628 (errcode, msg) = _VerifyCertificate(cert_filename)
1629 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1631 feedback_fn("* Verifying hypervisor parameters")
1633 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1634 self.all_inst_info.values()))
1636 feedback_fn("* Verifying all nodes belong to an existing group")
1638 # We do this verification here because, should this bogus circumstance
1639 # occur, it would never be caught by VerifyGroup, which only acts on
1640 # nodes/instances reachable from existing node groups.
1642 dangling_nodes = set(node.name for node in self.all_node_info.values()
1643 if node.group not in self.all_group_info)
1645 dangling_instances = {}
1646 no_node_instances = []
1648 for inst in self.all_inst_info.values():
1649 if inst.primary_node in dangling_nodes:
1650 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1651 elif inst.primary_node not in self.all_node_info:
1652 no_node_instances.append(inst.name)
1657 utils.CommaJoin(dangling_instances.get(node.name,
1659 for node in dangling_nodes]
1661 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1662 "the following nodes (and their instances) belong to a non"
1663 " existing group: %s", utils.CommaJoin(pretty_dangling))
1665 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1666 "the following instances have a non-existing primary-node:"
1667 " %s", utils.CommaJoin(no_node_instances))
1672 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1673 """Verifies the status of a node group.
1676 HPATH = "cluster-verify"
1677 HTYPE = constants.HTYPE_CLUSTER
1680 _HOOKS_INDENT_RE = re.compile("^", re.M)
1682 class NodeImage(object):
1683 """A class representing the logical and physical status of a node.
1686 @ivar name: the node name to which this object refers
1687 @ivar volumes: a structure as returned from
1688 L{ganeti.backend.GetVolumeList} (runtime)
1689 @ivar instances: a list of running instances (runtime)
1690 @ivar pinst: list of configured primary instances (config)
1691 @ivar sinst: list of configured secondary instances (config)
1692 @ivar sbp: dictionary of {primary-node: list of instances} for all
1693 instances for which this node is secondary (config)
1694 @ivar mfree: free memory, as reported by hypervisor (runtime)
1695 @ivar dfree: free disk, as reported by the node (runtime)
1696 @ivar offline: the offline status (config)
1697 @type rpc_fail: boolean
1698 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1699 not whether the individual keys were correct) (runtime)
1700 @type lvm_fail: boolean
1701 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1702 @type hyp_fail: boolean
1703 @ivar hyp_fail: whether the RPC call didn't return the instance list
1704 @type ghost: boolean
1705 @ivar ghost: whether this is a known node or not (config)
1706 @type os_fail: boolean
1707 @ivar os_fail: whether the RPC call didn't return valid OS data
1709 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1710 @type vm_capable: boolean
1711 @ivar vm_capable: whether the node can host instances
1714 def __init__(self, offline=False, name=None, vm_capable=True):
1723 self.offline = offline
1724 self.vm_capable = vm_capable
1725 self.rpc_fail = False
1726 self.lvm_fail = False
1727 self.hyp_fail = False
1729 self.os_fail = False
1732 def ExpandNames(self):
1733 # This raises errors.OpPrereqError on its own:
1734 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1736 # Get instances in node group; this is unsafe and needs verification later
1738 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1740 self.needed_locks = {
1741 locking.LEVEL_INSTANCE: inst_names,
1742 locking.LEVEL_NODEGROUP: [self.group_uuid],
1743 locking.LEVEL_NODE: [],
1746 self.share_locks = _ShareAll()
1748 def DeclareLocks(self, level):
1749 if level == locking.LEVEL_NODE:
1750 # Get members of node group; this is unsafe and needs verification later
1751 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1753 all_inst_info = self.cfg.GetAllInstancesInfo()
1755 # In Exec(), we warn about mirrored instances that have primary and
1756 # secondary living in separate node groups. To fully verify that
1757 # volumes for these instances are healthy, we will need to do an
1758 # extra call to their secondaries. We ensure here those nodes will
1760 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1761 # Important: access only the instances whose lock is owned
1762 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1763 nodes.update(all_inst_info[inst].secondary_nodes)
1765 self.needed_locks[locking.LEVEL_NODE] = nodes
1767 def CheckPrereq(self):
1768 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1769 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1771 group_nodes = set(self.group_info.members)
1773 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1776 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1778 unlocked_instances = \
1779 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1782 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1783 utils.CommaJoin(unlocked_nodes),
1786 if unlocked_instances:
1787 raise errors.OpPrereqError("Missing lock for instances: %s" %
1788 utils.CommaJoin(unlocked_instances),
1791 self.all_node_info = self.cfg.GetAllNodesInfo()
1792 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1794 self.my_node_names = utils.NiceSort(group_nodes)
1795 self.my_inst_names = utils.NiceSort(group_instances)
1797 self.my_node_info = dict((name, self.all_node_info[name])
1798 for name in self.my_node_names)
1800 self.my_inst_info = dict((name, self.all_inst_info[name])
1801 for name in self.my_inst_names)
1803 # We detect here the nodes that will need the extra RPC calls for verifying
1804 # split LV volumes; they should be locked.
1805 extra_lv_nodes = set()
1807 for inst in self.my_inst_info.values():
1808 if inst.disk_template in constants.DTS_INT_MIRROR:
1809 for nname in inst.all_nodes:
1810 if self.all_node_info[nname].group != self.group_uuid:
1811 extra_lv_nodes.add(nname)
1813 unlocked_lv_nodes = \
1814 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1816 if unlocked_lv_nodes:
1817 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1818 utils.CommaJoin(unlocked_lv_nodes),
1820 self.extra_lv_nodes = list(extra_lv_nodes)
1822 def _VerifyNode(self, ninfo, nresult):
1823 """Perform some basic validation on data returned from a node.
1825 - check the result data structure is well formed and has all the
1827 - check ganeti version
1829 @type ninfo: L{objects.Node}
1830 @param ninfo: the node to check
1831 @param nresult: the results from the node
1833 @return: whether overall this call was successful (and we can expect
1834 reasonable values in the respose)
1838 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1840 # main result, nresult should be a non-empty dict
1841 test = not nresult or not isinstance(nresult, dict)
1842 _ErrorIf(test, self.ENODERPC, node,
1843 "unable to verify node: no data returned")
1847 # compares ganeti version
1848 local_version = constants.PROTOCOL_VERSION
1849 remote_version = nresult.get("version", None)
1850 test = not (remote_version and
1851 isinstance(remote_version, (list, tuple)) and
1852 len(remote_version) == 2)
1853 _ErrorIf(test, self.ENODERPC, node,
1854 "connection to node returned invalid data")
1858 test = local_version != remote_version[0]
1859 _ErrorIf(test, self.ENODEVERSION, node,
1860 "incompatible protocol versions: master %s,"
1861 " node %s", local_version, remote_version[0])
1865 # node seems compatible, we can actually try to look into its results
1867 # full package version
1868 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1869 self.ENODEVERSION, node,
1870 "software version mismatch: master %s, node %s",
1871 constants.RELEASE_VERSION, remote_version[1],
1872 code=self.ETYPE_WARNING)
1874 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1875 if ninfo.vm_capable and isinstance(hyp_result, dict):
1876 for hv_name, hv_result in hyp_result.iteritems():
1877 test = hv_result is not None
1878 _ErrorIf(test, self.ENODEHV, node,
1879 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1881 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1882 if ninfo.vm_capable and isinstance(hvp_result, list):
1883 for item, hv_name, hv_result in hvp_result:
1884 _ErrorIf(True, self.ENODEHV, node,
1885 "hypervisor %s parameter verify failure (source %s): %s",
1886 hv_name, item, hv_result)
1888 test = nresult.get(constants.NV_NODESETUP,
1889 ["Missing NODESETUP results"])
1890 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1895 def _VerifyNodeTime(self, ninfo, nresult,
1896 nvinfo_starttime, nvinfo_endtime):
1897 """Check the node time.
1899 @type ninfo: L{objects.Node}
1900 @param ninfo: the node to check
1901 @param nresult: the remote results for the node
1902 @param nvinfo_starttime: the start time of the RPC call
1903 @param nvinfo_endtime: the end time of the RPC call
1907 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1909 ntime = nresult.get(constants.NV_TIME, None)
1911 ntime_merged = utils.MergeTime(ntime)
1912 except (ValueError, TypeError):
1913 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1916 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1917 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1918 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1919 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1923 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1924 "Node time diverges by at least %s from master node time",
1927 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1928 """Check the node LVM results.
1930 @type ninfo: L{objects.Node}
1931 @param ninfo: the node to check
1932 @param nresult: the remote results for the node
1933 @param vg_name: the configured VG name
1940 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1942 # checks vg existence and size > 20G
1943 vglist = nresult.get(constants.NV_VGLIST, None)
1945 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1947 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1948 constants.MIN_VG_SIZE)
1949 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1952 pvlist = nresult.get(constants.NV_PVLIST, None)
1953 test = pvlist is None
1954 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1956 # check that ':' is not present in PV names, since it's a
1957 # special character for lvcreate (denotes the range of PEs to
1959 for _, pvname, owner_vg in pvlist:
1960 test = ":" in pvname
1961 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1962 " '%s' of VG '%s'", pvname, owner_vg)
1964 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1965 """Check the node bridges.
1967 @type ninfo: L{objects.Node}
1968 @param ninfo: the node to check
1969 @param nresult: the remote results for the node
1970 @param bridges: the expected list of bridges
1977 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1979 missing = nresult.get(constants.NV_BRIDGES, None)
1980 test = not isinstance(missing, list)
1981 _ErrorIf(test, self.ENODENET, node,
1982 "did not return valid bridge information")
1984 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1985 utils.CommaJoin(sorted(missing)))
1987 def _VerifyNodeNetwork(self, ninfo, nresult):
1988 """Check the node network connectivity results.
1990 @type ninfo: L{objects.Node}
1991 @param ninfo: the node to check
1992 @param nresult: the remote results for the node
1996 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1998 test = constants.NV_NODELIST not in nresult
1999 _ErrorIf(test, self.ENODESSH, node,
2000 "node hasn't returned node ssh connectivity data")
2002 if nresult[constants.NV_NODELIST]:
2003 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2004 _ErrorIf(True, self.ENODESSH, node,
2005 "ssh communication with node '%s': %s", a_node, a_msg)
2007 test = constants.NV_NODENETTEST not in nresult
2008 _ErrorIf(test, self.ENODENET, node,
2009 "node hasn't returned node tcp connectivity data")
2011 if nresult[constants.NV_NODENETTEST]:
2012 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2014 _ErrorIf(True, self.ENODENET, node,
2015 "tcp communication with node '%s': %s",
2016 anode, nresult[constants.NV_NODENETTEST][anode])
2018 test = constants.NV_MASTERIP not in nresult
2019 _ErrorIf(test, self.ENODENET, node,
2020 "node hasn't returned node master IP reachability data")
2022 if not nresult[constants.NV_MASTERIP]:
2023 if node == self.master_node:
2024 msg = "the master node cannot reach the master IP (not configured?)"
2026 msg = "cannot reach the master IP"
2027 _ErrorIf(True, self.ENODENET, node, msg)
2029 def _VerifyInstance(self, instance, instanceconfig, node_image,
2031 """Verify an instance.
2033 This function checks to see if the required block devices are
2034 available on the instance's node.
2037 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2038 node_current = instanceconfig.primary_node
2040 node_vol_should = {}
2041 instanceconfig.MapLVsByNode(node_vol_should)
2043 for node in node_vol_should:
2044 n_img = node_image[node]
2045 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2046 # ignore missing volumes on offline or broken nodes
2048 for volume in node_vol_should[node]:
2049 test = volume not in n_img.volumes
2050 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2051 "volume %s missing on node %s", volume, node)
2053 if instanceconfig.admin_up:
2054 pri_img = node_image[node_current]
2055 test = instance not in pri_img.instances and not pri_img.offline
2056 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2057 "instance not running on its primary node %s",
2060 diskdata = [(nname, success, status, idx)
2061 for (nname, disks) in diskstatus.items()
2062 for idx, (success, status) in enumerate(disks)]
2064 for nname, success, bdev_status, idx in diskdata:
2065 # the 'ghost node' construction in Exec() ensures that we have a
2067 snode = node_image[nname]
2068 bad_snode = snode.ghost or snode.offline
2069 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2070 self.EINSTANCEFAULTYDISK, instance,
2071 "couldn't retrieve status for disk/%s on %s: %s",
2072 idx, nname, bdev_status)
2073 _ErrorIf((instanceconfig.admin_up and success and
2074 bdev_status.ldisk_status == constants.LDS_FAULTY),
2075 self.EINSTANCEFAULTYDISK, instance,
2076 "disk/%s on %s is faulty", idx, nname)
2078 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2079 """Verify if there are any unknown volumes in the cluster.
2081 The .os, .swap and backup volumes are ignored. All other volumes are
2082 reported as unknown.
2084 @type reserved: L{ganeti.utils.FieldSet}
2085 @param reserved: a FieldSet of reserved volume names
2088 for node, n_img in node_image.items():
2089 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2090 self.all_node_info[node].group != self.group_uuid):
2091 # skip non-healthy nodes
2093 for volume in n_img.volumes:
2094 test = ((node not in node_vol_should or
2095 volume not in node_vol_should[node]) and
2096 not reserved.Matches(volume))
2097 self._ErrorIf(test, self.ENODEORPHANLV, node,
2098 "volume %s is unknown", volume)
2100 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2101 """Verify N+1 Memory Resilience.
2103 Check that if one single node dies we can still start all the
2104 instances it was primary for.
2107 cluster_info = self.cfg.GetClusterInfo()
2108 for node, n_img in node_image.items():
2109 # This code checks that every node which is now listed as
2110 # secondary has enough memory to host all instances it is
2111 # supposed to should a single other node in the cluster fail.
2112 # FIXME: not ready for failover to an arbitrary node
2113 # FIXME: does not support file-backed instances
2114 # WARNING: we currently take into account down instances as well
2115 # as up ones, considering that even if they're down someone
2116 # might want to start them even in the event of a node failure.
2117 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2118 # we're skipping nodes marked offline and nodes in other groups from
2119 # the N+1 warning, since most likely we don't have good memory
2120 # infromation from them; we already list instances living on such
2121 # nodes, and that's enough warning
2123 for prinode, instances in n_img.sbp.items():
2125 for instance in instances:
2126 bep = cluster_info.FillBE(instance_cfg[instance])
2127 if bep[constants.BE_AUTO_BALANCE]:
2128 needed_mem += bep[constants.BE_MEMORY]
2129 test = n_img.mfree < needed_mem
2130 self._ErrorIf(test, self.ENODEN1, node,
2131 "not enough memory to accomodate instance failovers"
2132 " should node %s fail (%dMiB needed, %dMiB available)",
2133 prinode, needed_mem, n_img.mfree)
2136 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2137 (files_all, files_opt, files_mc, files_vm)):
2138 """Verifies file checksums collected from all nodes.
2140 @param errorif: Callback for reporting errors
2141 @param nodeinfo: List of L{objects.Node} objects
2142 @param master_node: Name of master node
2143 @param all_nvinfo: RPC results
2146 # Define functions determining which nodes to consider for a file
2149 (files_mc, lambda node: (node.master_candidate or
2150 node.name == master_node)),
2151 (files_vm, lambda node: node.vm_capable),
2154 # Build mapping from filename to list of nodes which should have the file
2156 for (files, fn) in files2nodefn:
2158 filenodes = nodeinfo
2160 filenodes = filter(fn, nodeinfo)
2161 nodefiles.update((filename,
2162 frozenset(map(operator.attrgetter("name"), filenodes)))
2163 for filename in files)
2165 assert set(nodefiles) == (files_all | files_mc | files_vm)
2167 fileinfo = dict((filename, {}) for filename in nodefiles)
2168 ignore_nodes = set()
2170 for node in nodeinfo:
2172 ignore_nodes.add(node.name)
2175 nresult = all_nvinfo[node.name]
2177 if nresult.fail_msg or not nresult.payload:
2180 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2182 test = not (node_files and isinstance(node_files, dict))
2183 errorif(test, cls.ENODEFILECHECK, node.name,
2184 "Node did not return file checksum data")
2186 ignore_nodes.add(node.name)
2189 # Build per-checksum mapping from filename to nodes having it
2190 for (filename, checksum) in node_files.items():
2191 assert filename in nodefiles
2192 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2194 for (filename, checksums) in fileinfo.items():
2195 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2197 # Nodes having the file
2198 with_file = frozenset(node_name
2199 for nodes in fileinfo[filename].values()
2200 for node_name in nodes) - ignore_nodes
2202 expected_nodes = nodefiles[filename] - ignore_nodes
2204 # Nodes missing file
2205 missing_file = expected_nodes - with_file
2207 if filename in files_opt:
2209 errorif(missing_file and missing_file != expected_nodes,
2210 cls.ECLUSTERFILECHECK, None,
2211 "File %s is optional, but it must exist on all or no"
2212 " nodes (not found on %s)",
2213 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2215 # Non-optional files
2216 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2217 "File %s is missing from node(s) %s", filename,
2218 utils.CommaJoin(utils.NiceSort(missing_file)))
2220 # Warn if a node has a file it shouldn't
2221 unexpected = with_file - expected_nodes
2223 cls.ECLUSTERFILECHECK, None,
2224 "File %s should not exist on node(s) %s",
2225 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2227 # See if there are multiple versions of the file
2228 test = len(checksums) > 1
2230 variants = ["variant %s on %s" %
2231 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2232 for (idx, (checksum, nodes)) in
2233 enumerate(sorted(checksums.items()))]
2237 errorif(test, cls.ECLUSTERFILECHECK, None,
2238 "File %s found with %s different checksums (%s)",
2239 filename, len(checksums), "; ".join(variants))
2241 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2243 """Verifies and the node DRBD status.
2245 @type ninfo: L{objects.Node}
2246 @param ninfo: the node to check
2247 @param nresult: the remote results for the node
2248 @param instanceinfo: the dict of instances
2249 @param drbd_helper: the configured DRBD usermode helper
2250 @param drbd_map: the DRBD map as returned by
2251 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2255 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2258 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2259 test = (helper_result == None)
2260 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2261 "no drbd usermode helper returned")
2263 status, payload = helper_result
2265 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2266 "drbd usermode helper check unsuccessful: %s", payload)
2267 test = status and (payload != drbd_helper)
2268 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2269 "wrong drbd usermode helper: %s", payload)
2271 # compute the DRBD minors
2273 for minor, instance in drbd_map[node].items():
2274 test = instance not in instanceinfo
2275 _ErrorIf(test, self.ECLUSTERCFG, None,
2276 "ghost instance '%s' in temporary DRBD map", instance)
2277 # ghost instance should not be running, but otherwise we
2278 # don't give double warnings (both ghost instance and
2279 # unallocated minor in use)
2281 node_drbd[minor] = (instance, False)
2283 instance = instanceinfo[instance]
2284 node_drbd[minor] = (instance.name, instance.admin_up)
2286 # and now check them
2287 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2288 test = not isinstance(used_minors, (tuple, list))
2289 _ErrorIf(test, self.ENODEDRBD, node,
2290 "cannot parse drbd status file: %s", str(used_minors))
2292 # we cannot check drbd status
2295 for minor, (iname, must_exist) in node_drbd.items():
2296 test = minor not in used_minors and must_exist
2297 _ErrorIf(test, self.ENODEDRBD, node,
2298 "drbd minor %d of instance %s is not active", minor, iname)
2299 for minor in used_minors:
2300 test = minor not in node_drbd
2301 _ErrorIf(test, self.ENODEDRBD, node,
2302 "unallocated drbd minor %d is in use", minor)
2304 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2305 """Builds the node OS structures.
2307 @type ninfo: L{objects.Node}
2308 @param ninfo: the node to check
2309 @param nresult: the remote results for the node
2310 @param nimg: the node image object
2314 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2316 remote_os = nresult.get(constants.NV_OSLIST, None)
2317 test = (not isinstance(remote_os, list) or
2318 not compat.all(isinstance(v, list) and len(v) == 7
2319 for v in remote_os))
2321 _ErrorIf(test, self.ENODEOS, node,
2322 "node hasn't returned valid OS data")
2331 for (name, os_path, status, diagnose,
2332 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2334 if name not in os_dict:
2337 # parameters is a list of lists instead of list of tuples due to
2338 # JSON lacking a real tuple type, fix it:
2339 parameters = [tuple(v) for v in parameters]
2340 os_dict[name].append((os_path, status, diagnose,
2341 set(variants), set(parameters), set(api_ver)))
2343 nimg.oslist = os_dict
2345 def _VerifyNodeOS(self, ninfo, nimg, base):
2346 """Verifies the node OS list.
2348 @type ninfo: L{objects.Node}
2349 @param ninfo: the node to check
2350 @param nimg: the node image object
2351 @param base: the 'template' node we match against (e.g. from the master)
2355 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2357 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2359 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2360 for os_name, os_data in nimg.oslist.items():
2361 assert os_data, "Empty OS status for OS %s?!" % os_name
2362 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2363 _ErrorIf(not f_status, self.ENODEOS, node,
2364 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2365 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2366 "OS '%s' has multiple entries (first one shadows the rest): %s",
2367 os_name, utils.CommaJoin([v[0] for v in os_data]))
2368 # comparisons with the 'base' image
2369 test = os_name not in base.oslist
2370 _ErrorIf(test, self.ENODEOS, node,
2371 "Extra OS %s not present on reference node (%s)",
2375 assert base.oslist[os_name], "Base node has empty OS status?"
2376 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2378 # base OS is invalid, skipping
2380 for kind, a, b in [("API version", f_api, b_api),
2381 ("variants list", f_var, b_var),
2382 ("parameters", beautify_params(f_param),
2383 beautify_params(b_param))]:
2384 _ErrorIf(a != b, self.ENODEOS, node,
2385 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2386 kind, os_name, base.name,
2387 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2389 # check any missing OSes
2390 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2391 _ErrorIf(missing, self.ENODEOS, node,
2392 "OSes present on reference node %s but missing on this node: %s",
2393 base.name, utils.CommaJoin(missing))
2395 def _VerifyOob(self, ninfo, nresult):
2396 """Verifies out of band functionality of a node.
2398 @type ninfo: L{objects.Node}
2399 @param ninfo: the node to check
2400 @param nresult: the remote results for the node
2404 # We just have to verify the paths on master and/or master candidates
2405 # as the oob helper is invoked on the master
2406 if ((ninfo.master_candidate or ninfo.master_capable) and
2407 constants.NV_OOB_PATHS in nresult):
2408 for path_result in nresult[constants.NV_OOB_PATHS]:
2409 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2411 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2412 """Verifies and updates the node volume data.
2414 This function will update a L{NodeImage}'s internal structures
2415 with data from the remote call.
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param nimg: the node image object
2421 @param vg_name: the configured VG name
2425 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2427 nimg.lvm_fail = True
2428 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2431 elif isinstance(lvdata, basestring):
2432 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2433 utils.SafeEncode(lvdata))
2434 elif not isinstance(lvdata, dict):
2435 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2437 nimg.volumes = lvdata
2438 nimg.lvm_fail = False
2440 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2441 """Verifies and updates the node instance list.
2443 If the listing was successful, then updates this node's instance
2444 list. Otherwise, it marks the RPC call as failed for the instance
2447 @type ninfo: L{objects.Node}
2448 @param ninfo: the node to check
2449 @param nresult: the remote results for the node
2450 @param nimg: the node image object
2453 idata = nresult.get(constants.NV_INSTANCELIST, None)
2454 test = not isinstance(idata, list)
2455 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2456 " (instancelist): %s", utils.SafeEncode(str(idata)))
2458 nimg.hyp_fail = True
2460 nimg.instances = idata
2462 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2463 """Verifies and computes a node information map
2465 @type ninfo: L{objects.Node}
2466 @param ninfo: the node to check
2467 @param nresult: the remote results for the node
2468 @param nimg: the node image object
2469 @param vg_name: the configured VG name
2473 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2475 # try to read free memory (from the hypervisor)
2476 hv_info = nresult.get(constants.NV_HVINFO, None)
2477 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2478 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2481 nimg.mfree = int(hv_info["memory_free"])
2482 except (ValueError, TypeError):
2483 _ErrorIf(True, self.ENODERPC, node,
2484 "node returned invalid nodeinfo, check hypervisor")
2486 # FIXME: devise a free space model for file based instances as well
2487 if vg_name is not None:
2488 test = (constants.NV_VGLIST not in nresult or
2489 vg_name not in nresult[constants.NV_VGLIST])
2490 _ErrorIf(test, self.ENODELVM, node,
2491 "node didn't return data for the volume group '%s'"
2492 " - it is either missing or broken", vg_name)
2495 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496 except (ValueError, TypeError):
2497 _ErrorIf(True, self.ENODERPC, node,
2498 "node returned invalid LVM info, check LVM status")
2500 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501 """Gets per-disk status information for all instances.
2503 @type nodelist: list of strings
2504 @param nodelist: Node names
2505 @type node_image: dict of (name, L{objects.Node})
2506 @param node_image: Node objects
2507 @type instanceinfo: dict of (name, L{objects.Instance})
2508 @param instanceinfo: Instance objects
2509 @rtype: {instance: {node: [(succes, payload)]}}
2510 @return: a dictionary of per-instance dictionaries with nodes as
2511 keys and disk information as values; the disk information is a
2512 list of tuples (success, payload)
2515 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2518 node_disks_devonly = {}
2519 diskless_instances = set()
2520 diskless = constants.DT_DISKLESS
2522 for nname in nodelist:
2523 node_instances = list(itertools.chain(node_image[nname].pinst,
2524 node_image[nname].sinst))
2525 diskless_instances.update(inst for inst in node_instances
2526 if instanceinfo[inst].disk_template == diskless)
2527 disks = [(inst, disk)
2528 for inst in node_instances
2529 for disk in instanceinfo[inst].disks]
2532 # No need to collect data
2535 node_disks[nname] = disks
2537 # Creating copies as SetDiskID below will modify the objects and that can
2538 # lead to incorrect data returned from nodes
2539 devonly = [dev.Copy() for (_, dev) in disks]
2542 self.cfg.SetDiskID(dev, nname)
2544 node_disks_devonly[nname] = devonly
2546 assert len(node_disks) == len(node_disks_devonly)
2548 # Collect data from all nodes with disks
2549 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2552 assert len(result) == len(node_disks)
2556 for (nname, nres) in result.items():
2557 disks = node_disks[nname]
2560 # No data from this node
2561 data = len(disks) * [(False, "node offline")]
2564 _ErrorIf(msg, self.ENODERPC, nname,
2565 "while getting disk information: %s", msg)
2567 # No data from this node
2568 data = len(disks) * [(False, msg)]
2571 for idx, i in enumerate(nres.payload):
2572 if isinstance(i, (tuple, list)) and len(i) == 2:
2575 logging.warning("Invalid result from node %s, entry %d: %s",
2577 data.append((False, "Invalid result from the remote node"))
2579 for ((inst, _), status) in zip(disks, data):
2580 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2582 # Add empty entries for diskless instances.
2583 for inst in diskless_instances:
2584 assert inst not in instdisk
2587 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589 compat.all(isinstance(s, (tuple, list)) and
2590 len(s) == 2 for s in statuses)
2591 for inst, nnames in instdisk.items()
2592 for nname, statuses in nnames.items())
2593 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2598 def _SshNodeSelector(group_uuid, all_nodes):
2599 """Create endless iterators for all potential SSH check hosts.
2602 nodes = [node for node in all_nodes
2603 if (node.group != group_uuid and
2605 keyfunc = operator.attrgetter("group")
2607 return map(itertools.cycle,
2608 [sorted(map(operator.attrgetter("name"), names))
2609 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2613 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614 """Choose which nodes should talk to which other nodes.
2616 We will make nodes contact all nodes in their group, and one node from
2619 @warning: This algorithm has a known issue if one node group is much
2620 smaller than others (e.g. just one node). In such a case all other
2621 nodes will talk to the single node.
2624 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2627 return (online_nodes,
2628 dict((name, sorted([i.next() for i in sel]))
2629 for name in online_nodes))
2631 def BuildHooksEnv(self):
2634 Cluster-Verify hooks just ran in the post phase and their failure makes
2635 the output be logged in the verify output and the verification to fail.
2639 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2642 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643 for node in self.my_node_info.values())
2647 def BuildHooksNodes(self):
2648 """Build hooks nodes.
2651 return ([], self.my_node_names)
2653 def Exec(self, feedback_fn):
2654 """Verify integrity of the node group, performing various test on nodes.
2657 # This method has too many local variables. pylint: disable=R0914
2658 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2660 if not self.my_node_names:
2662 feedback_fn("* Empty node group, skipping verification")
2666 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667 verbose = self.op.verbose
2668 self._feedback_fn = feedback_fn
2670 vg_name = self.cfg.GetVGName()
2671 drbd_helper = self.cfg.GetDRBDHelper()
2672 cluster = self.cfg.GetClusterInfo()
2673 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674 hypervisors = cluster.enabled_hypervisors
2675 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2677 i_non_redundant = [] # Non redundant instances
2678 i_non_a_balanced = [] # Non auto-balanced instances
2679 n_offline = 0 # Count of offline nodes
2680 n_drained = 0 # Count of nodes being drained
2681 node_vol_should = {}
2683 # FIXME: verify OS list
2686 filemap = _ComputeAncillaryFiles(cluster, False)
2688 # do local checksums
2689 master_node = self.master_node = self.cfg.GetMasterNode()
2690 master_ip = self.cfg.GetMasterIP()
2692 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2694 node_verify_param = {
2695 constants.NV_FILELIST:
2696 utils.UniqueSequence(filename
2697 for files in filemap
2698 for filename in files),
2699 constants.NV_NODELIST:
2700 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2701 self.all_node_info.values()),
2702 constants.NV_HYPERVISOR: hypervisors,
2703 constants.NV_HVPARAMS:
2704 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2705 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2706 for node in node_data_list
2707 if not node.offline],
2708 constants.NV_INSTANCELIST: hypervisors,
2709 constants.NV_VERSION: None,
2710 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2711 constants.NV_NODESETUP: None,
2712 constants.NV_TIME: None,
2713 constants.NV_MASTERIP: (master_node, master_ip),
2714 constants.NV_OSLIST: None,
2715 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2718 if vg_name is not None:
2719 node_verify_param[constants.NV_VGLIST] = None
2720 node_verify_param[constants.NV_LVLIST] = vg_name
2721 node_verify_param[constants.NV_PVLIST] = [vg_name]
2722 node_verify_param[constants.NV_DRBDLIST] = None
2725 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2728 # FIXME: this needs to be changed per node-group, not cluster-wide
2730 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2731 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2732 bridges.add(default_nicpp[constants.NIC_LINK])
2733 for instance in self.my_inst_info.values():
2734 for nic in instance.nics:
2735 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2736 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2737 bridges.add(full_nic[constants.NIC_LINK])
2740 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2742 # Build our expected cluster state
2743 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2745 vm_capable=node.vm_capable))
2746 for node in node_data_list)
2750 for node in self.all_node_info.values():
2751 path = _SupportsOob(self.cfg, node)
2752 if path and path not in oob_paths:
2753 oob_paths.append(path)
2756 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2758 for instance in self.my_inst_names:
2759 inst_config = self.my_inst_info[instance]
2761 for nname in inst_config.all_nodes:
2762 if nname not in node_image:
2763 gnode = self.NodeImage(name=nname)
2764 gnode.ghost = (nname not in self.all_node_info)
2765 node_image[nname] = gnode
2767 inst_config.MapLVsByNode(node_vol_should)
2769 pnode = inst_config.primary_node
2770 node_image[pnode].pinst.append(instance)
2772 for snode in inst_config.secondary_nodes:
2773 nimg = node_image[snode]
2774 nimg.sinst.append(instance)
2775 if pnode not in nimg.sbp:
2776 nimg.sbp[pnode] = []
2777 nimg.sbp[pnode].append(instance)
2779 # At this point, we have the in-memory data structures complete,
2780 # except for the runtime information, which we'll gather next
2782 # Due to the way our RPC system works, exact response times cannot be
2783 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2784 # time before and after executing the request, we can at least have a time
2786 nvinfo_starttime = time.time()
2787 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2789 self.cfg.GetClusterName())
2790 nvinfo_endtime = time.time()
2792 if self.extra_lv_nodes and vg_name is not None:
2794 self.rpc.call_node_verify(self.extra_lv_nodes,
2795 {constants.NV_LVLIST: vg_name},
2796 self.cfg.GetClusterName())
2798 extra_lv_nvinfo = {}
2800 all_drbd_map = self.cfg.ComputeDRBDMap()
2802 feedback_fn("* Gathering disk information (%s nodes)" %
2803 len(self.my_node_names))
2804 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2807 feedback_fn("* Verifying configuration file consistency")
2809 # If not all nodes are being checked, we need to make sure the master node
2810 # and a non-checked vm_capable node are in the list.
2811 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2813 vf_nvinfo = all_nvinfo.copy()
2814 vf_node_info = list(self.my_node_info.values())
2815 additional_nodes = []
2816 if master_node not in self.my_node_info:
2817 additional_nodes.append(master_node)
2818 vf_node_info.append(self.all_node_info[master_node])
2819 # Add the first vm_capable node we find which is not included
2820 for node in absent_nodes:
2821 nodeinfo = self.all_node_info[node]
2822 if nodeinfo.vm_capable and not nodeinfo.offline:
2823 additional_nodes.append(node)
2824 vf_node_info.append(self.all_node_info[node])
2826 key = constants.NV_FILELIST
2827 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2828 {key: node_verify_param[key]},
2829 self.cfg.GetClusterName()))
2831 vf_nvinfo = all_nvinfo
2832 vf_node_info = self.my_node_info.values()
2834 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2836 feedback_fn("* Verifying node status")
2840 for node_i in node_data_list:
2842 nimg = node_image[node]
2846 feedback_fn("* Skipping offline node %s" % (node,))
2850 if node == master_node:
2852 elif node_i.master_candidate:
2853 ntype = "master candidate"
2854 elif node_i.drained:
2860 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2862 msg = all_nvinfo[node].fail_msg
2863 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2865 nimg.rpc_fail = True
2868 nresult = all_nvinfo[node].payload
2870 nimg.call_ok = self._VerifyNode(node_i, nresult)
2871 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2872 self._VerifyNodeNetwork(node_i, nresult)
2873 self._VerifyOob(node_i, nresult)
2876 self._VerifyNodeLVM(node_i, nresult, vg_name)
2877 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2880 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2881 self._UpdateNodeInstances(node_i, nresult, nimg)
2882 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2883 self._UpdateNodeOS(node_i, nresult, nimg)
2885 if not nimg.os_fail:
2886 if refos_img is None:
2888 self._VerifyNodeOS(node_i, nimg, refos_img)
2889 self._VerifyNodeBridges(node_i, nresult, bridges)
2891 # Check whether all running instancies are primary for the node. (This
2892 # can no longer be done from _VerifyInstance below, since some of the
2893 # wrong instances could be from other node groups.)
2894 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2896 for inst in non_primary_inst:
2897 test = inst in self.all_inst_info
2898 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2899 "instance should not run on node %s", node_i.name)
2900 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2901 "node is running unknown instance %s", inst)
2903 for node, result in extra_lv_nvinfo.items():
2904 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2905 node_image[node], vg_name)
2907 feedback_fn("* Verifying instance status")
2908 for instance in self.my_inst_names:
2910 feedback_fn("* Verifying instance %s" % instance)
2911 inst_config = self.my_inst_info[instance]
2912 self._VerifyInstance(instance, inst_config, node_image,
2914 inst_nodes_offline = []
2916 pnode = inst_config.primary_node
2917 pnode_img = node_image[pnode]
2918 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2919 self.ENODERPC, pnode, "instance %s, connection to"
2920 " primary node failed", instance)
2922 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2923 self.EINSTANCEBADNODE, instance,
2924 "instance is marked as running and lives on offline node %s",
2925 inst_config.primary_node)
2927 # If the instance is non-redundant we cannot survive losing its primary
2928 # node, so we are not N+1 compliant. On the other hand we have no disk
2929 # templates with more than one secondary so that situation is not well
2931 # FIXME: does not support file-backed instances
2932 if not inst_config.secondary_nodes:
2933 i_non_redundant.append(instance)
2935 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2936 instance, "instance has multiple secondary nodes: %s",
2937 utils.CommaJoin(inst_config.secondary_nodes),
2938 code=self.ETYPE_WARNING)
2940 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2941 pnode = inst_config.primary_node
2942 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2943 instance_groups = {}
2945 for node in instance_nodes:
2946 instance_groups.setdefault(self.all_node_info[node].group,
2950 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2951 # Sort so that we always list the primary node first.
2952 for group, nodes in sorted(instance_groups.items(),
2953 key=lambda (_, nodes): pnode in nodes,
2956 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2957 instance, "instance has primary and secondary nodes in"
2958 " different groups: %s", utils.CommaJoin(pretty_list),
2959 code=self.ETYPE_WARNING)
2961 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2962 i_non_a_balanced.append(instance)
2964 for snode in inst_config.secondary_nodes:
2965 s_img = node_image[snode]
2966 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2967 "instance %s, connection to secondary node failed", instance)
2970 inst_nodes_offline.append(snode)
2972 # warn that the instance lives on offline nodes
2973 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2974 "instance has offline secondary node(s) %s",
2975 utils.CommaJoin(inst_nodes_offline))
2976 # ... or ghost/non-vm_capable nodes
2977 for node in inst_config.all_nodes:
2978 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2979 "instance lives on ghost node %s", node)
2980 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2981 instance, "instance lives on non-vm_capable node %s", node)
2983 feedback_fn("* Verifying orphan volumes")
2984 reserved = utils.FieldSet(*cluster.reserved_lvs)
2986 # We will get spurious "unknown volume" warnings if any node of this group
2987 # is secondary for an instance whose primary is in another group. To avoid
2988 # them, we find these instances and add their volumes to node_vol_should.
2989 for inst in self.all_inst_info.values():
2990 for secondary in inst.secondary_nodes:
2991 if (secondary in self.my_node_info
2992 and inst.name not in self.my_inst_info):
2993 inst.MapLVsByNode(node_vol_should)
2996 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2998 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2999 feedback_fn("* Verifying N+1 Memory redundancy")
3000 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3002 feedback_fn("* Other Notes")
3004 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3005 % len(i_non_redundant))
3007 if i_non_a_balanced:
3008 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3009 % len(i_non_a_balanced))
3012 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3015 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3019 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3020 """Analyze the post-hooks' result
3022 This method analyses the hook result, handles it, and sends some
3023 nicely-formatted feedback back to the user.
3025 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3026 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3027 @param hooks_results: the results of the multi-node hooks rpc call
3028 @param feedback_fn: function used send feedback back to the caller
3029 @param lu_result: previous Exec result
3030 @return: the new Exec result, based on the previous result
3034 # We only really run POST phase hooks, only for non-empty groups,
3035 # and are only interested in their results
3036 if not self.my_node_names:
3039 elif phase == constants.HOOKS_PHASE_POST:
3040 # Used to change hooks' output to proper indentation
3041 feedback_fn("* Hooks Results")
3042 assert hooks_results, "invalid result from hooks"
3044 for node_name in hooks_results:
3045 res = hooks_results[node_name]
3047 test = msg and not res.offline
3048 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3049 "Communication failure in hooks execution: %s", msg)
3050 if res.offline or msg:
3051 # No need to investigate payload if node is offline or gave
3054 for script, hkr, output in res.payload:
3055 test = hkr == constants.HKR_FAIL
3056 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3057 "Script %s failed, output:", script)
3059 output = self._HOOKS_INDENT_RE.sub(" ", output)
3060 feedback_fn("%s" % output)
3066 class LUClusterVerifyDisks(NoHooksLU):
3067 """Verifies the cluster disks status.
3072 def ExpandNames(self):
3073 self.share_locks = _ShareAll()
3074 self.needed_locks = {
3075 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3078 def Exec(self, feedback_fn):
3079 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3081 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3082 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3083 for group in group_names])
3086 class LUGroupVerifyDisks(NoHooksLU):
3087 """Verifies the status of all disks in a node group.
3092 def ExpandNames(self):
3093 # Raises errors.OpPrereqError on its own if group can't be found
3094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3096 self.share_locks = _ShareAll()
3097 self.needed_locks = {
3098 locking.LEVEL_INSTANCE: [],
3099 locking.LEVEL_NODEGROUP: [],
3100 locking.LEVEL_NODE: [],
3103 def DeclareLocks(self, level):
3104 if level == locking.LEVEL_INSTANCE:
3105 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3107 # Lock instances optimistically, needs verification once node and group
3108 # locks have been acquired
3109 self.needed_locks[locking.LEVEL_INSTANCE] = \
3110 self.cfg.GetNodeGroupInstances(self.group_uuid)
3112 elif level == locking.LEVEL_NODEGROUP:
3113 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3115 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3116 set([self.group_uuid] +
3117 # Lock all groups used by instances optimistically; this requires
3118 # going via the node before it's locked, requiring verification
3121 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3122 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3124 elif level == locking.LEVEL_NODE:
3125 # This will only lock the nodes in the group to be verified which contain
3127 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3128 self._LockInstancesNodes()
3130 # Lock all nodes in group to be verified
3131 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3132 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3133 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3135 def CheckPrereq(self):
3136 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3137 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3138 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3140 assert self.group_uuid in owned_groups
3142 # Check if locked instances are still correct
3143 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3145 # Get instance information
3146 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3148 # Check if node groups for locked instances are still correct
3149 _CheckInstancesNodeGroups(self.cfg, self.instances,
3150 owned_groups, owned_nodes, self.group_uuid)
3152 def Exec(self, feedback_fn):
3153 """Verify integrity of cluster disks.
3155 @rtype: tuple of three items
3156 @return: a tuple of (dict of node-to-node_error, list of instances
3157 which need activate-disks, dict of instance: (node, volume) for
3162 res_instances = set()
3165 nv_dict = _MapInstanceDisksToNodes([inst
3166 for inst in self.instances.values()
3170 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3171 set(self.cfg.GetVmCapableNodeList()))
3173 node_lvs = self.rpc.call_lv_list(nodes, [])
3175 for (node, node_res) in node_lvs.items():
3176 if node_res.offline:
3179 msg = node_res.fail_msg
3181 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3182 res_nodes[node] = msg
3185 for lv_name, (_, _, lv_online) in node_res.payload.items():
3186 inst = nv_dict.pop((node, lv_name), None)
3187 if not (lv_online or inst is None):
3188 res_instances.add(inst)
3190 # any leftover items in nv_dict are missing LVs, let's arrange the data
3192 for key, inst in nv_dict.iteritems():
3193 res_missing.setdefault(inst, []).append(list(key))
3195 return (res_nodes, list(res_instances), res_missing)
3198 class LUClusterRepairDiskSizes(NoHooksLU):
3199 """Verifies the cluster disks sizes.
3204 def ExpandNames(self):
3205 if self.op.instances:
3206 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3207 self.needed_locks = {
3208 locking.LEVEL_NODE: [],
3209 locking.LEVEL_INSTANCE: self.wanted_names,
3211 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3213 self.wanted_names = None
3214 self.needed_locks = {
3215 locking.LEVEL_NODE: locking.ALL_SET,
3216 locking.LEVEL_INSTANCE: locking.ALL_SET,
3218 self.share_locks = {
3219 locking.LEVEL_NODE: 1,
3220 locking.LEVEL_INSTANCE: 0,
3223 def DeclareLocks(self, level):
3224 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3225 self._LockInstancesNodes(primary_only=True)
3227 def CheckPrereq(self):
3228 """Check prerequisites.
3230 This only checks the optional instance list against the existing names.
3233 if self.wanted_names is None:
3234 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3236 self.wanted_instances = \
3237 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3239 def _EnsureChildSizes(self, disk):
3240 """Ensure children of the disk have the needed disk size.
3242 This is valid mainly for DRBD8 and fixes an issue where the
3243 children have smaller disk size.
3245 @param disk: an L{ganeti.objects.Disk} object
3248 if disk.dev_type == constants.LD_DRBD8:
3249 assert disk.children, "Empty children for DRBD8?"
3250 fchild = disk.children[0]
3251 mismatch = fchild.size < disk.size
3253 self.LogInfo("Child disk has size %d, parent %d, fixing",
3254 fchild.size, disk.size)
3255 fchild.size = disk.size
3257 # and we recurse on this child only, not on the metadev
3258 return self._EnsureChildSizes(fchild) or mismatch
3262 def Exec(self, feedback_fn):
3263 """Verify the size of cluster disks.
3266 # TODO: check child disks too
3267 # TODO: check differences in size between primary/secondary nodes
3269 for instance in self.wanted_instances:
3270 pnode = instance.primary_node
3271 if pnode not in per_node_disks:
3272 per_node_disks[pnode] = []
3273 for idx, disk in enumerate(instance.disks):
3274 per_node_disks[pnode].append((instance, idx, disk))
3277 for node, dskl in per_node_disks.items():
3278 newl = [v[2].Copy() for v in dskl]
3280 self.cfg.SetDiskID(dsk, node)
3281 result = self.rpc.call_blockdev_getsize(node, newl)
3283 self.LogWarning("Failure in blockdev_getsize call to node"
3284 " %s, ignoring", node)
3286 if len(result.payload) != len(dskl):
3287 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3288 " result.payload=%s", node, len(dskl), result.payload)
3289 self.LogWarning("Invalid result from node %s, ignoring node results",
3292 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3294 self.LogWarning("Disk %d of instance %s did not return size"
3295 " information, ignoring", idx, instance.name)
3297 if not isinstance(size, (int, long)):
3298 self.LogWarning("Disk %d of instance %s did not return valid"
3299 " size information, ignoring", idx, instance.name)
3302 if size != disk.size:
3303 self.LogInfo("Disk %d of instance %s has mismatched size,"
3304 " correcting: recorded %d, actual %d", idx,
3305 instance.name, disk.size, size)
3307 self.cfg.Update(instance, feedback_fn)
3308 changed.append((instance.name, idx, size))
3309 if self._EnsureChildSizes(disk):
3310 self.cfg.Update(instance, feedback_fn)
3311 changed.append((instance.name, idx, disk.size))
3315 class LUClusterRename(LogicalUnit):
3316 """Rename the cluster.
3319 HPATH = "cluster-rename"
3320 HTYPE = constants.HTYPE_CLUSTER
3322 def BuildHooksEnv(self):
3327 "OP_TARGET": self.cfg.GetClusterName(),
3328 "NEW_NAME": self.op.name,
3331 def BuildHooksNodes(self):
3332 """Build hooks nodes.
3335 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3337 def CheckPrereq(self):
3338 """Verify that the passed name is a valid one.
3341 hostname = netutils.GetHostname(name=self.op.name,
3342 family=self.cfg.GetPrimaryIPFamily())
3344 new_name = hostname.name
3345 self.ip = new_ip = hostname.ip
3346 old_name = self.cfg.GetClusterName()
3347 old_ip = self.cfg.GetMasterIP()
3348 if new_name == old_name and new_ip == old_ip:
3349 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3350 " cluster has changed",
3352 if new_ip != old_ip:
3353 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3354 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3355 " reachable on the network" %
3356 new_ip, errors.ECODE_NOTUNIQUE)
3358 self.op.name = new_name
3360 def Exec(self, feedback_fn):
3361 """Rename the cluster.
3364 clustername = self.op.name
3367 # shutdown the master IP
3368 master = self.cfg.GetMasterNode()
3369 result = self.rpc.call_node_deactivate_master_ip(master)
3370 result.Raise("Could not disable the master role")
3373 cluster = self.cfg.GetClusterInfo()
3374 cluster.cluster_name = clustername
3375 cluster.master_ip = ip
3376 self.cfg.Update(cluster, feedback_fn)
3378 # update the known hosts file
3379 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3380 node_list = self.cfg.GetOnlineNodeList()
3382 node_list.remove(master)
3385 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3387 result = self.rpc.call_node_activate_master_ip(master)
3388 msg = result.fail_msg
3390 self.LogWarning("Could not re-enable the master role on"
3391 " the master, please restart manually: %s", msg)
3396 class LUClusterSetParams(LogicalUnit):
3397 """Change the parameters of the cluster.
3400 HPATH = "cluster-modify"
3401 HTYPE = constants.HTYPE_CLUSTER
3404 def CheckArguments(self):
3408 if self.op.uid_pool:
3409 uidpool.CheckUidPool(self.op.uid_pool)
3411 if self.op.add_uids:
3412 uidpool.CheckUidPool(self.op.add_uids)
3414 if self.op.remove_uids:
3415 uidpool.CheckUidPool(self.op.remove_uids)
3417 def ExpandNames(self):
3418 # FIXME: in the future maybe other cluster params won't require checking on
3419 # all nodes to be modified.
3420 self.needed_locks = {
3421 locking.LEVEL_NODE: locking.ALL_SET,
3423 self.share_locks[locking.LEVEL_NODE] = 1
3425 def BuildHooksEnv(self):
3430 "OP_TARGET": self.cfg.GetClusterName(),
3431 "NEW_VG_NAME": self.op.vg_name,
3434 def BuildHooksNodes(self):
3435 """Build hooks nodes.
3438 mn = self.cfg.GetMasterNode()
3441 def CheckPrereq(self):
3442 """Check prerequisites.
3444 This checks whether the given params don't conflict and
3445 if the given volume group is valid.
3448 if self.op.vg_name is not None and not self.op.vg_name:
3449 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3450 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3451 " instances exist", errors.ECODE_INVAL)
3453 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3454 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3455 raise errors.OpPrereqError("Cannot disable drbd helper while"
3456 " drbd-based instances exist",
3459 node_list = self.owned_locks(locking.LEVEL_NODE)
3461 # if vg_name not None, checks given volume group on all nodes
3463 vglist = self.rpc.call_vg_list(node_list)
3464 for node in node_list:
3465 msg = vglist[node].fail_msg
3467 # ignoring down node
3468 self.LogWarning("Error while gathering data on node %s"
3469 " (ignoring node): %s", node, msg)
3471 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3473 constants.MIN_VG_SIZE)
3475 raise errors.OpPrereqError("Error on node '%s': %s" %
3476 (node, vgstatus), errors.ECODE_ENVIRON)
3478 if self.op.drbd_helper:
3479 # checks given drbd helper on all nodes
3480 helpers = self.rpc.call_drbd_helper(node_list)
3481 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3483 self.LogInfo("Not checking drbd helper on offline node %s", node)
3485 msg = helpers[node].fail_msg
3487 raise errors.OpPrereqError("Error checking drbd helper on node"
3488 " '%s': %s" % (node, msg),
3489 errors.ECODE_ENVIRON)
3490 node_helper = helpers[node].payload
3491 if node_helper != self.op.drbd_helper:
3492 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3493 (node, node_helper), errors.ECODE_ENVIRON)
3495 self.cluster = cluster = self.cfg.GetClusterInfo()
3496 # validate params changes
3497 if self.op.beparams:
3498 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3499 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3501 if self.op.ndparams:
3502 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3503 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3505 # TODO: we need a more general way to handle resetting
3506 # cluster-level parameters to default values
3507 if self.new_ndparams["oob_program"] == "":
3508 self.new_ndparams["oob_program"] = \
3509 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3511 if self.op.nicparams:
3512 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3513 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3514 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3517 # check all instances for consistency
3518 for instance in self.cfg.GetAllInstancesInfo().values():
3519 for nic_idx, nic in enumerate(instance.nics):
3520 params_copy = copy.deepcopy(nic.nicparams)
3521 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3523 # check parameter syntax
3525 objects.NIC.CheckParameterSyntax(params_filled)
3526 except errors.ConfigurationError, err:
3527 nic_errors.append("Instance %s, nic/%d: %s" %
3528 (instance.name, nic_idx, err))
3530 # if we're moving instances to routed, check that they have an ip
3531 target_mode = params_filled[constants.NIC_MODE]
3532 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3533 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3534 " address" % (instance.name, nic_idx))
3536 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3537 "\n".join(nic_errors))
3539 # hypervisor list/parameters
3540 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3541 if self.op.hvparams:
3542 for hv_name, hv_dict in self.op.hvparams.items():
3543 if hv_name not in self.new_hvparams:
3544 self.new_hvparams[hv_name] = hv_dict
3546 self.new_hvparams[hv_name].update(hv_dict)
3548 # os hypervisor parameters
3549 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3551 for os_name, hvs in self.op.os_hvp.items():
3552 if os_name not in self.new_os_hvp:
3553 self.new_os_hvp[os_name] = hvs
3555 for hv_name, hv_dict in hvs.items():
3556 if hv_name not in self.new_os_hvp[os_name]:
3557 self.new_os_hvp[os_name][hv_name] = hv_dict
3559 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3562 self.new_osp = objects.FillDict(cluster.osparams, {})
3563 if self.op.osparams:
3564 for os_name, osp in self.op.osparams.items():
3565 if os_name not in self.new_osp:
3566 self.new_osp[os_name] = {}
3568 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3571 if not self.new_osp[os_name]:
3572 # we removed all parameters
3573 del self.new_osp[os_name]
3575 # check the parameter validity (remote check)
3576 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3577 os_name, self.new_osp[os_name])
3579 # changes to the hypervisor list
3580 if self.op.enabled_hypervisors is not None:
3581 self.hv_list = self.op.enabled_hypervisors
3582 for hv in self.hv_list:
3583 # if the hypervisor doesn't already exist in the cluster
3584 # hvparams, we initialize it to empty, and then (in both
3585 # cases) we make sure to fill the defaults, as we might not
3586 # have a complete defaults list if the hypervisor wasn't
3588 if hv not in new_hvp:
3590 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3591 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3593 self.hv_list = cluster.enabled_hypervisors
3595 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3596 # either the enabled list has changed, or the parameters have, validate
3597 for hv_name, hv_params in self.new_hvparams.items():
3598 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3599 (self.op.enabled_hypervisors and
3600 hv_name in self.op.enabled_hypervisors)):
3601 # either this is a new hypervisor, or its parameters have changed
3602 hv_class = hypervisor.GetHypervisor(hv_name)
3603 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3604 hv_class.CheckParameterSyntax(hv_params)
3605 _CheckHVParams(self, node_list, hv_name, hv_params)
3608 # no need to check any newly-enabled hypervisors, since the
3609 # defaults have already been checked in the above code-block
3610 for os_name, os_hvp in self.new_os_hvp.items():
3611 for hv_name, hv_params in os_hvp.items():
3612 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3613 # we need to fill in the new os_hvp on top of the actual hv_p
3614 cluster_defaults = self.new_hvparams.get(hv_name, {})
3615 new_osp = objects.FillDict(cluster_defaults, hv_params)
3616 hv_class = hypervisor.GetHypervisor(hv_name)
3617 hv_class.CheckParameterSyntax(new_osp)
3618 _CheckHVParams(self, node_list, hv_name, new_osp)
3620 if self.op.default_iallocator:
3621 alloc_script = utils.FindFile(self.op.default_iallocator,
3622 constants.IALLOCATOR_SEARCH_PATH,
3624 if alloc_script is None:
3625 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3626 " specified" % self.op.default_iallocator,
3629 def Exec(self, feedback_fn):
3630 """Change the parameters of the cluster.
3633 if self.op.vg_name is not None:
3634 new_volume = self.op.vg_name
3637 if new_volume != self.cfg.GetVGName():
3638 self.cfg.SetVGName(new_volume)
3640 feedback_fn("Cluster LVM configuration already in desired"
3641 " state, not changing")
3642 if self.op.drbd_helper is not None:
3643 new_helper = self.op.drbd_helper
3646 if new_helper != self.cfg.GetDRBDHelper():
3647 self.cfg.SetDRBDHelper(new_helper)
3649 feedback_fn("Cluster DRBD helper already in desired state,"
3651 if self.op.hvparams:
3652 self.cluster.hvparams = self.new_hvparams
3654 self.cluster.os_hvp = self.new_os_hvp
3655 if self.op.enabled_hypervisors is not None:
3656 self.cluster.hvparams = self.new_hvparams
3657 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3658 if self.op.beparams:
3659 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3660 if self.op.nicparams:
3661 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3662 if self.op.osparams:
3663 self.cluster.osparams = self.new_osp
3664 if self.op.ndparams:
3665 self.cluster.ndparams = self.new_ndparams
3667 if self.op.candidate_pool_size is not None:
3668 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3669 # we need to update the pool size here, otherwise the save will fail
3670 _AdjustCandidatePool(self, [])
3672 if self.op.maintain_node_health is not None:
3673 self.cluster.maintain_node_health = self.op.maintain_node_health
3675 if self.op.prealloc_wipe_disks is not None:
3676 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3678 if self.op.add_uids is not None:
3679 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3681 if self.op.remove_uids is not None:
3682 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3684 if self.op.uid_pool is not None:
3685 self.cluster.uid_pool = self.op.uid_pool
3687 if self.op.default_iallocator is not None:
3688 self.cluster.default_iallocator = self.op.default_iallocator
3690 if self.op.reserved_lvs is not None:
3691 self.cluster.reserved_lvs = self.op.reserved_lvs
3693 def helper_os(aname, mods, desc):
3695 lst = getattr(self.cluster, aname)
3696 for key, val in mods:
3697 if key == constants.DDM_ADD:
3699 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3702 elif key == constants.DDM_REMOVE:
3706 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3708 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3710 if self.op.hidden_os:
3711 helper_os("hidden_os", self.op.hidden_os, "hidden")
3713 if self.op.blacklisted_os:
3714 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3716 if self.op.master_netdev:
3717 master = self.cfg.GetMasterNode()
3718 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3719 self.cluster.master_netdev)
3720 result = self.rpc.call_node_deactivate_master_ip(master)
3721 result.Raise("Could not disable the master ip")
3722 feedback_fn("Changing master_netdev from %s to %s" %
3723 (self.cluster.master_netdev, self.op.master_netdev))
3724 self.cluster.master_netdev = self.op.master_netdev
3726 self.cfg.Update(self.cluster, feedback_fn)
3728 if self.op.master_netdev:
3729 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3730 self.op.master_netdev)
3731 result = self.rpc.call_node_activate_master_ip(master)
3733 self.LogWarning("Could not re-enable the master ip on"
3734 " the master, please restart manually: %s",
3738 def _UploadHelper(lu, nodes, fname):
3739 """Helper for uploading a file and showing warnings.
3742 if os.path.exists(fname):
3743 result = lu.rpc.call_upload_file(nodes, fname)
3744 for to_node, to_result in result.items():
3745 msg = to_result.fail_msg
3747 msg = ("Copy of file %s to node %s failed: %s" %
3748 (fname, to_node, msg))
3749 lu.proc.LogWarning(msg)
3752 def _ComputeAncillaryFiles(cluster, redist):
3753 """Compute files external to Ganeti which need to be consistent.
3755 @type redist: boolean
3756 @param redist: Whether to include files which need to be redistributed
3759 # Compute files for all nodes
3761 constants.SSH_KNOWN_HOSTS_FILE,
3762 constants.CONFD_HMAC_KEY,
3763 constants.CLUSTER_DOMAIN_SECRET_FILE,
3764 constants.RAPI_USERS_FILE,
3768 files_all.update(constants.ALL_CERT_FILES)
3769 files_all.update(ssconf.SimpleStore().GetFileList())
3771 # we need to ship at least the RAPI certificate
3772 files_all.add(constants.RAPI_CERT_FILE)
3774 if cluster.modify_etc_hosts:
3775 files_all.add(constants.ETC_HOSTS)
3777 # Files which are optional, these must:
3778 # - be present in one other category as well
3779 # - either exist or not exist on all nodes of that category (mc, vm all)
3781 constants.RAPI_USERS_FILE,
3784 # Files which should only be on master candidates
3787 files_mc.add(constants.CLUSTER_CONF_FILE)
3789 # Files which should only be on VM-capable nodes
3790 files_vm = set(filename
3791 for hv_name in cluster.enabled_hypervisors
3792 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3794 files_opt |= set(filename
3795 for hv_name in cluster.enabled_hypervisors
3796 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3798 # Filenames in each category must be unique
3799 all_files_set = files_all | files_mc | files_vm
3800 assert (len(all_files_set) ==
3801 sum(map(len, [files_all, files_mc, files_vm]))), \
3802 "Found file listed in more than one file list"
3804 # Optional files must be present in one other category
3805 assert all_files_set.issuperset(files_opt), \
3806 "Optional file not in a different required list"
3808 return (files_all, files_opt, files_mc, files_vm)
3811 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3812 """Distribute additional files which are part of the cluster configuration.
3814 ConfigWriter takes care of distributing the config and ssconf files, but
3815 there are more files which should be distributed to all nodes. This function
3816 makes sure those are copied.
3818 @param lu: calling logical unit
3819 @param additional_nodes: list of nodes not in the config to distribute to
3820 @type additional_vm: boolean
3821 @param additional_vm: whether the additional nodes are vm-capable or not
3824 # Gather target nodes
3825 cluster = lu.cfg.GetClusterInfo()
3826 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3828 online_nodes = lu.cfg.GetOnlineNodeList()
3829 vm_nodes = lu.cfg.GetVmCapableNodeList()
3831 if additional_nodes is not None:
3832 online_nodes.extend(additional_nodes)
3834 vm_nodes.extend(additional_nodes)
3836 # Never distribute to master node
3837 for nodelist in [online_nodes, vm_nodes]:
3838 if master_info.name in nodelist:
3839 nodelist.remove(master_info.name)
3842 (files_all, _, files_mc, files_vm) = \
3843 _ComputeAncillaryFiles(cluster, True)
3845 # Never re-distribute configuration file from here
3846 assert not (constants.CLUSTER_CONF_FILE in files_all or
3847 constants.CLUSTER_CONF_FILE in files_vm)
3848 assert not files_mc, "Master candidates not handled in this function"
3851 (online_nodes, files_all),
3852 (vm_nodes, files_vm),
3856 for (node_list, files) in filemap:
3858 _UploadHelper(lu, node_list, fname)
3861 class LUClusterRedistConf(NoHooksLU):
3862 """Force the redistribution of cluster configuration.
3864 This is a very simple LU.
3869 def ExpandNames(self):
3870 self.needed_locks = {
3871 locking.LEVEL_NODE: locking.ALL_SET,
3873 self.share_locks[locking.LEVEL_NODE] = 1
3875 def Exec(self, feedback_fn):
3876 """Redistribute the configuration.
3879 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3880 _RedistributeAncillaryFiles(self)
3883 class LUClusterActivateMasterIp(NoHooksLU):
3884 """Activate the master IP on the master node.
3887 def Exec(self, feedback_fn):
3888 """Activate the master IP.
3891 master = self.cfg.GetMasterNode()
3892 result = self.rpc.call_node_activate_master_ip(master)
3893 result.Raise("Could not activate the master IP")
3896 class LUClusterDeactivateMasterIp(NoHooksLU):
3897 """Deactivate the master IP on the master node.
3900 def Exec(self, feedback_fn):
3901 """Deactivate the master IP.
3904 master = self.cfg.GetMasterNode()
3905 result = self.rpc.call_node_deactivate_master_ip(master)
3906 result.Raise("Could not deactivate the master IP")
3909 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3910 """Sleep and poll for an instance's disk to sync.
3913 if not instance.disks or disks is not None and not disks:
3916 disks = _ExpandCheckDisks(instance, disks)
3919 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3921 node = instance.primary_node
3924 lu.cfg.SetDiskID(dev, node)
3926 # TODO: Convert to utils.Retry
3929 degr_retries = 10 # in seconds, as we sleep 1 second each time
3933 cumul_degraded = False
3934 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3935 msg = rstats.fail_msg
3937 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3940 raise errors.RemoteError("Can't contact node %s for mirror data,"
3941 " aborting." % node)
3944 rstats = rstats.payload
3946 for i, mstat in enumerate(rstats):
3948 lu.LogWarning("Can't compute data for node %s/%s",
3949 node, disks[i].iv_name)
3952 cumul_degraded = (cumul_degraded or
3953 (mstat.is_degraded and mstat.sync_percent is None))
3954 if mstat.sync_percent is not None:
3956 if mstat.estimated_time is not None:
3957 rem_time = ("%s remaining (estimated)" %
3958 utils.FormatSeconds(mstat.estimated_time))
3959 max_time = mstat.estimated_time
3961 rem_time = "no time estimate"
3962 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3963 (disks[i].iv_name, mstat.sync_percent, rem_time))
3965 # if we're done but degraded, let's do a few small retries, to
3966 # make sure we see a stable and not transient situation; therefore
3967 # we force restart of the loop
3968 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3969 logging.info("Degraded disks found, %d retries left", degr_retries)
3977 time.sleep(min(60, max_time))
3980 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3981 return not cumul_degraded
3984 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3985 """Check that mirrors are not degraded.
3987 The ldisk parameter, if True, will change the test from the
3988 is_degraded attribute (which represents overall non-ok status for
3989 the device(s)) to the ldisk (representing the local storage status).
3992 lu.cfg.SetDiskID(dev, node)
3996 if on_primary or dev.AssembleOnSecondary():
3997 rstats = lu.rpc.call_blockdev_find(node, dev)
3998 msg = rstats.fail_msg
4000 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4002 elif not rstats.payload:
4003 lu.LogWarning("Can't find disk on node %s", node)
4007 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4009 result = result and not rstats.payload.is_degraded
4012 for child in dev.children:
4013 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4018 class LUOobCommand(NoHooksLU):
4019 """Logical unit for OOB handling.
4023 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4025 def ExpandNames(self):
4026 """Gather locks we need.
4029 if self.op.node_names:
4030 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4031 lock_names = self.op.node_names
4033 lock_names = locking.ALL_SET
4035 self.needed_locks = {
4036 locking.LEVEL_NODE: lock_names,
4039 def CheckPrereq(self):
4040 """Check prerequisites.
4043 - the node exists in the configuration
4046 Any errors are signaled by raising errors.OpPrereqError.
4050 self.master_node = self.cfg.GetMasterNode()
4052 assert self.op.power_delay >= 0.0
4054 if self.op.node_names:
4055 if (self.op.command in self._SKIP_MASTER and
4056 self.master_node in self.op.node_names):
4057 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4058 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4060 if master_oob_handler:
4061 additional_text = ("run '%s %s %s' if you want to operate on the"
4062 " master regardless") % (master_oob_handler,
4066 additional_text = "it does not support out-of-band operations"
4068 raise errors.OpPrereqError(("Operating on the master node %s is not"
4069 " allowed for %s; %s") %
4070 (self.master_node, self.op.command,
4071 additional_text), errors.ECODE_INVAL)
4073 self.op.node_names = self.cfg.GetNodeList()
4074 if self.op.command in self._SKIP_MASTER:
4075 self.op.node_names.remove(self.master_node)
4077 if self.op.command in self._SKIP_MASTER:
4078 assert self.master_node not in self.op.node_names
4080 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4082 raise errors.OpPrereqError("Node %s not found" % node_name,
4085 self.nodes.append(node)
4087 if (not self.op.ignore_status and
4088 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4089 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4090 " not marked offline") % node_name,
4093 def Exec(self, feedback_fn):
4094 """Execute OOB and return result if we expect any.
4097 master_node = self.master_node
4100 for idx, node in enumerate(utils.NiceSort(self.nodes,
4101 key=lambda node: node.name)):
4102 node_entry = [(constants.RS_NORMAL, node.name)]
4103 ret.append(node_entry)
4105 oob_program = _SupportsOob(self.cfg, node)
4108 node_entry.append((constants.RS_UNAVAIL, None))
4111 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4112 self.op.command, oob_program, node.name)
4113 result = self.rpc.call_run_oob(master_node, oob_program,
4114 self.op.command, node.name,
4118 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4119 node.name, result.fail_msg)
4120 node_entry.append((constants.RS_NODATA, None))
4123 self._CheckPayload(result)
4124 except errors.OpExecError, err:
4125 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4127 node_entry.append((constants.RS_NODATA, None))
4129 if self.op.command == constants.OOB_HEALTH:
4130 # For health we should log important events
4131 for item, status in result.payload:
4132 if status in [constants.OOB_STATUS_WARNING,
4133 constants.OOB_STATUS_CRITICAL]:
4134 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4135 item, node.name, status)
4137 if self.op.command == constants.OOB_POWER_ON:
4139 elif self.op.command == constants.OOB_POWER_OFF:
4140 node.powered = False
4141 elif self.op.command == constants.OOB_POWER_STATUS:
4142 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4143 if powered != node.powered:
4144 logging.warning(("Recorded power state (%s) of node '%s' does not"
4145 " match actual power state (%s)"), node.powered,
4148 # For configuration changing commands we should update the node
4149 if self.op.command in (constants.OOB_POWER_ON,
4150 constants.OOB_POWER_OFF):
4151 self.cfg.Update(node, feedback_fn)
4153 node_entry.append((constants.RS_NORMAL, result.payload))
4155 if (self.op.command == constants.OOB_POWER_ON and
4156 idx < len(self.nodes) - 1):
4157 time.sleep(self.op.power_delay)
4161 def _CheckPayload(self, result):
4162 """Checks if the payload is valid.
4164 @param result: RPC result
4165 @raises errors.OpExecError: If payload is not valid
4169 if self.op.command == constants.OOB_HEALTH:
4170 if not isinstance(result.payload, list):
4171 errs.append("command 'health' is expected to return a list but got %s" %
4172 type(result.payload))
4174 for item, status in result.payload:
4175 if status not in constants.OOB_STATUSES:
4176 errs.append("health item '%s' has invalid status '%s'" %
4179 if self.op.command == constants.OOB_POWER_STATUS:
4180 if not isinstance(result.payload, dict):
4181 errs.append("power-status is expected to return a dict but got %s" %
4182 type(result.payload))
4184 if self.op.command in [
4185 constants.OOB_POWER_ON,
4186 constants.OOB_POWER_OFF,
4187 constants.OOB_POWER_CYCLE,
4189 if result.payload is not None:
4190 errs.append("%s is expected to not return payload but got '%s'" %
4191 (self.op.command, result.payload))
4194 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4195 utils.CommaJoin(errs))
4198 class _OsQuery(_QueryBase):
4199 FIELDS = query.OS_FIELDS
4201 def ExpandNames(self, lu):
4202 # Lock all nodes in shared mode
4203 # Temporary removal of locks, should be reverted later
4204 # TODO: reintroduce locks when they are lighter-weight
4205 lu.needed_locks = {}
4206 #self.share_locks[locking.LEVEL_NODE] = 1
4207 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4209 # The following variables interact with _QueryBase._GetNames
4211 self.wanted = self.names
4213 self.wanted = locking.ALL_SET
4215 self.do_locking = self.use_locking
4217 def DeclareLocks(self, lu, level):
4221 def _DiagnoseByOS(rlist):
4222 """Remaps a per-node return list into an a per-os per-node dictionary
4224 @param rlist: a map with node names as keys and OS objects as values
4227 @return: a dictionary with osnames as keys and as value another
4228 map, with nodes as keys and tuples of (path, status, diagnose,
4229 variants, parameters, api_versions) as values, eg::
4231 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4232 (/srv/..., False, "invalid api")],
4233 "node2": [(/srv/..., True, "", [], [])]}
4238 # we build here the list of nodes that didn't fail the RPC (at RPC
4239 # level), so that nodes with a non-responding node daemon don't
4240 # make all OSes invalid
4241 good_nodes = [node_name for node_name in rlist
4242 if not rlist[node_name].fail_msg]
4243 for node_name, nr in rlist.items():
4244 if nr.fail_msg or not nr.payload:
4246 for (name, path, status, diagnose, variants,
4247 params, api_versions) in nr.payload:
4248 if name not in all_os:
4249 # build a list of nodes for this os containing empty lists
4250 # for each node in node_list
4252 for nname in good_nodes:
4253 all_os[name][nname] = []
4254 # convert params from [name, help] to (name, help)
4255 params = [tuple(v) for v in params]
4256 all_os[name][node_name].append((path, status, diagnose,
4257 variants, params, api_versions))
4260 def _GetQueryData(self, lu):
4261 """Computes the list of nodes and their attributes.
4264 # Locking is not used
4265 assert not (compat.any(lu.glm.is_owned(level)
4266 for level in locking.LEVELS
4267 if level != locking.LEVEL_CLUSTER) or
4268 self.do_locking or self.use_locking)
4270 valid_nodes = [node.name
4271 for node in lu.cfg.GetAllNodesInfo().values()
4272 if not node.offline and node.vm_capable]
4273 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4274 cluster = lu.cfg.GetClusterInfo()
4278 for (os_name, os_data) in pol.items():
4279 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4280 hidden=(os_name in cluster.hidden_os),
4281 blacklisted=(os_name in cluster.blacklisted_os))
4285 api_versions = set()
4287 for idx, osl in enumerate(os_data.values()):
4288 info.valid = bool(info.valid and osl and osl[0][1])
4292 (node_variants, node_params, node_api) = osl[0][3:6]
4295 variants.update(node_variants)
4296 parameters.update(node_params)
4297 api_versions.update(node_api)
4299 # Filter out inconsistent values
4300 variants.intersection_update(node_variants)
4301 parameters.intersection_update(node_params)
4302 api_versions.intersection_update(node_api)
4304 info.variants = list(variants)
4305 info.parameters = list(parameters)
4306 info.api_versions = list(api_versions)
4308 data[os_name] = info
4310 # Prepare data in requested order
4311 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4315 class LUOsDiagnose(NoHooksLU):
4316 """Logical unit for OS diagnose/query.
4322 def _BuildFilter(fields, names):
4323 """Builds a filter for querying OSes.
4326 name_filter = qlang.MakeSimpleFilter("name", names)
4328 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4329 # respective field is not requested
4330 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4331 for fname in ["hidden", "blacklisted"]
4332 if fname not in fields]
4333 if "valid" not in fields:
4334 status_filter.append([qlang.OP_TRUE, "valid"])
4337 status_filter.insert(0, qlang.OP_AND)
4339 status_filter = None
4341 if name_filter and status_filter:
4342 return [qlang.OP_AND, name_filter, status_filter]
4346 return status_filter
4348 def CheckArguments(self):
4349 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4350 self.op.output_fields, False)
4352 def ExpandNames(self):
4353 self.oq.ExpandNames(self)
4355 def Exec(self, feedback_fn):
4356 return self.oq.OldStyleQuery(self)
4359 class LUNodeRemove(LogicalUnit):
4360 """Logical unit for removing a node.
4363 HPATH = "node-remove"
4364 HTYPE = constants.HTYPE_NODE
4366 def BuildHooksEnv(self):
4369 This doesn't run on the target node in the pre phase as a failed
4370 node would then be impossible to remove.
4374 "OP_TARGET": self.op.node_name,
4375 "NODE_NAME": self.op.node_name,
4378 def BuildHooksNodes(self):
4379 """Build hooks nodes.
4382 all_nodes = self.cfg.GetNodeList()
4384 all_nodes.remove(self.op.node_name)
4386 logging.warning("Node '%s', which is about to be removed, was not found"
4387 " in the list of all nodes", self.op.node_name)
4388 return (all_nodes, all_nodes)
4390 def CheckPrereq(self):
4391 """Check prerequisites.
4394 - the node exists in the configuration
4395 - it does not have primary or secondary instances
4396 - it's not the master
4398 Any errors are signaled by raising errors.OpPrereqError.
4401 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4402 node = self.cfg.GetNodeInfo(self.op.node_name)
4403 assert node is not None
4405 masternode = self.cfg.GetMasterNode()
4406 if node.name == masternode:
4407 raise errors.OpPrereqError("Node is the master node, failover to another"
4408 " node is required", errors.ECODE_INVAL)
4410 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4411 if node.name in instance.all_nodes:
4412 raise errors.OpPrereqError("Instance %s is still running on the node,"
4413 " please remove first" % instance_name,
4415 self.op.node_name = node.name
4418 def Exec(self, feedback_fn):
4419 """Removes the node from the cluster.
4423 logging.info("Stopping the node daemon and removing configs from node %s",
4426 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4428 # Promote nodes to master candidate as needed
4429 _AdjustCandidatePool(self, exceptions=[node.name])
4430 self.context.RemoveNode(node.name)
4432 # Run post hooks on the node before it's removed
4433 _RunPostHook(self, node.name)
4435 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4436 msg = result.fail_msg
4438 self.LogWarning("Errors encountered on the remote node while leaving"
4439 " the cluster: %s", msg)
4441 # Remove node from our /etc/hosts
4442 if self.cfg.GetClusterInfo().modify_etc_hosts:
4443 master_node = self.cfg.GetMasterNode()
4444 result = self.rpc.call_etc_hosts_modify(master_node,
4445 constants.ETC_HOSTS_REMOVE,
4447 result.Raise("Can't update hosts file with new host data")
4448 _RedistributeAncillaryFiles(self)
4451 class _NodeQuery(_QueryBase):
4452 FIELDS = query.NODE_FIELDS
4454 def ExpandNames(self, lu):
4455 lu.needed_locks = {}
4456 lu.share_locks = _ShareAll()
4459 self.wanted = _GetWantedNodes(lu, self.names)
4461 self.wanted = locking.ALL_SET
4463 self.do_locking = (self.use_locking and
4464 query.NQ_LIVE in self.requested_data)
4467 # If any non-static field is requested we need to lock the nodes
4468 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4470 def DeclareLocks(self, lu, level):
4473 def _GetQueryData(self, lu):
4474 """Computes the list of nodes and their attributes.
4477 all_info = lu.cfg.GetAllNodesInfo()
4479 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4481 # Gather data as requested
4482 if query.NQ_LIVE in self.requested_data:
4483 # filter out non-vm_capable nodes
4484 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4486 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4487 lu.cfg.GetHypervisorType())
4488 live_data = dict((name, nresult.payload)
4489 for (name, nresult) in node_data.items()
4490 if not nresult.fail_msg and nresult.payload)
4494 if query.NQ_INST in self.requested_data:
4495 node_to_primary = dict([(name, set()) for name in nodenames])
4496 node_to_secondary = dict([(name, set()) for name in nodenames])
4498 inst_data = lu.cfg.GetAllInstancesInfo()
4500 for inst in inst_data.values():
4501 if inst.primary_node in node_to_primary:
4502 node_to_primary[inst.primary_node].add(inst.name)
4503 for secnode in inst.secondary_nodes:
4504 if secnode in node_to_secondary:
4505 node_to_secondary[secnode].add(inst.name)
4507 node_to_primary = None
4508 node_to_secondary = None
4510 if query.NQ_OOB in self.requested_data:
4511 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4512 for name, node in all_info.iteritems())
4516 if query.NQ_GROUP in self.requested_data:
4517 groups = lu.cfg.GetAllNodeGroupsInfo()
4521 return query.NodeQueryData([all_info[name] for name in nodenames],
4522 live_data, lu.cfg.GetMasterNode(),
4523 node_to_primary, node_to_secondary, groups,
4524 oob_support, lu.cfg.GetClusterInfo())
4527 class LUNodeQuery(NoHooksLU):
4528 """Logical unit for querying nodes.
4531 # pylint: disable=W0142
4534 def CheckArguments(self):
4535 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4536 self.op.output_fields, self.op.use_locking)
4538 def ExpandNames(self):
4539 self.nq.ExpandNames(self)
4541 def Exec(self, feedback_fn):
4542 return self.nq.OldStyleQuery(self)
4545 class LUNodeQueryvols(NoHooksLU):
4546 """Logical unit for getting volumes on node(s).
4550 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4551 _FIELDS_STATIC = utils.FieldSet("node")
4553 def CheckArguments(self):
4554 _CheckOutputFields(static=self._FIELDS_STATIC,
4555 dynamic=self._FIELDS_DYNAMIC,
4556 selected=self.op.output_fields)
4558 def ExpandNames(self):
4559 self.needed_locks = {}
4560 self.share_locks[locking.LEVEL_NODE] = 1
4561 if not self.op.nodes:
4562 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4564 self.needed_locks[locking.LEVEL_NODE] = \
4565 _GetWantedNodes(self, self.op.nodes)
4567 def Exec(self, feedback_fn):
4568 """Computes the list of nodes and their attributes.
4571 nodenames = self.owned_locks(locking.LEVEL_NODE)
4572 volumes = self.rpc.call_node_volumes(nodenames)
4574 ilist = self.cfg.GetAllInstancesInfo()
4575 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4578 for node in nodenames:
4579 nresult = volumes[node]
4582 msg = nresult.fail_msg
4584 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4587 node_vols = sorted(nresult.payload,
4588 key=operator.itemgetter("dev"))
4590 for vol in node_vols:
4592 for field in self.op.output_fields:
4595 elif field == "phys":
4599 elif field == "name":
4601 elif field == "size":
4602 val = int(float(vol["size"]))
4603 elif field == "instance":
4604 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4606 raise errors.ParameterError(field)
4607 node_output.append(str(val))
4609 output.append(node_output)
4614 class LUNodeQueryStorage(NoHooksLU):
4615 """Logical unit for getting information on storage units on node(s).
4618 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4621 def CheckArguments(self):
4622 _CheckOutputFields(static=self._FIELDS_STATIC,
4623 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4624 selected=self.op.output_fields)
4626 def ExpandNames(self):
4627 self.needed_locks = {}
4628 self.share_locks[locking.LEVEL_NODE] = 1
4631 self.needed_locks[locking.LEVEL_NODE] = \
4632 _GetWantedNodes(self, self.op.nodes)
4634 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4636 def Exec(self, feedback_fn):
4637 """Computes the list of nodes and their attributes.
4640 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4642 # Always get name to sort by
4643 if constants.SF_NAME in self.op.output_fields:
4644 fields = self.op.output_fields[:]
4646 fields = [constants.SF_NAME] + self.op.output_fields
4648 # Never ask for node or type as it's only known to the LU
4649 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4650 while extra in fields:
4651 fields.remove(extra)
4653 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4654 name_idx = field_idx[constants.SF_NAME]
4656 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4657 data = self.rpc.call_storage_list(self.nodes,
4658 self.op.storage_type, st_args,
4659 self.op.name, fields)
4663 for node in utils.NiceSort(self.nodes):
4664 nresult = data[node]
4668 msg = nresult.fail_msg
4670 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4673 rows = dict([(row[name_idx], row) for row in nresult.payload])
4675 for name in utils.NiceSort(rows.keys()):
4680 for field in self.op.output_fields:
4681 if field == constants.SF_NODE:
4683 elif field == constants.SF_TYPE:
4684 val = self.op.storage_type
4685 elif field in field_idx:
4686 val = row[field_idx[field]]
4688 raise errors.ParameterError(field)
4697 class _InstanceQuery(_QueryBase):
4698 FIELDS = query.INSTANCE_FIELDS
4700 def ExpandNames(self, lu):
4701 lu.needed_locks = {}
4702 lu.share_locks = _ShareAll()
4705 self.wanted = _GetWantedInstances(lu, self.names)
4707 self.wanted = locking.ALL_SET
4709 self.do_locking = (self.use_locking and
4710 query.IQ_LIVE in self.requested_data)
4712 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4713 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4714 lu.needed_locks[locking.LEVEL_NODE] = []
4715 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4717 self.do_grouplocks = (self.do_locking and
4718 query.IQ_NODES in self.requested_data)
4720 def DeclareLocks(self, lu, level):
4722 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4723 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4725 # Lock all groups used by instances optimistically; this requires going
4726 # via the node before it's locked, requiring verification later on
4727 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4729 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4730 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4731 elif level == locking.LEVEL_NODE:
4732 lu._LockInstancesNodes() # pylint: disable=W0212
4735 def _CheckGroupLocks(lu):
4736 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4737 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4739 # Check if node groups for locked instances are still correct
4740 for instance_name in owned_instances:
4741 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4743 def _GetQueryData(self, lu):
4744 """Computes the list of instances and their attributes.
4747 if self.do_grouplocks:
4748 self._CheckGroupLocks(lu)
4750 cluster = lu.cfg.GetClusterInfo()
4751 all_info = lu.cfg.GetAllInstancesInfo()
4753 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4755 instance_list = [all_info[name] for name in instance_names]
4756 nodes = frozenset(itertools.chain(*(inst.all_nodes
4757 for inst in instance_list)))
4758 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4761 wrongnode_inst = set()
4763 # Gather data as requested
4764 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4766 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4768 result = node_data[name]
4770 # offline nodes will be in both lists
4771 assert result.fail_msg
4772 offline_nodes.append(name)
4774 bad_nodes.append(name)
4775 elif result.payload:
4776 for inst in result.payload:
4777 if inst in all_info:
4778 if all_info[inst].primary_node == name:
4779 live_data.update(result.payload)
4781 wrongnode_inst.add(inst)
4783 # orphan instance; we don't list it here as we don't
4784 # handle this case yet in the output of instance listing
4785 logging.warning("Orphan instance '%s' found on node %s",
4787 # else no instance is alive
4791 if query.IQ_DISKUSAGE in self.requested_data:
4792 disk_usage = dict((inst.name,
4793 _ComputeDiskSize(inst.disk_template,
4794 [{constants.IDISK_SIZE: disk.size}
4795 for disk in inst.disks]))
4796 for inst in instance_list)
4800 if query.IQ_CONSOLE in self.requested_data:
4802 for inst in instance_list:
4803 if inst.name in live_data:
4804 # Instance is running
4805 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4807 consinfo[inst.name] = None
4808 assert set(consinfo.keys()) == set(instance_names)
4812 if query.IQ_NODES in self.requested_data:
4813 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4815 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4816 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4817 for uuid in set(map(operator.attrgetter("group"),
4823 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4824 disk_usage, offline_nodes, bad_nodes,
4825 live_data, wrongnode_inst, consinfo,
4829 class LUQuery(NoHooksLU):
4830 """Query for resources/items of a certain kind.
4833 # pylint: disable=W0142
4836 def CheckArguments(self):
4837 qcls = _GetQueryImplementation(self.op.what)
4839 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4841 def ExpandNames(self):
4842 self.impl.ExpandNames(self)
4844 def DeclareLocks(self, level):
4845 self.impl.DeclareLocks(self, level)
4847 def Exec(self, feedback_fn):
4848 return self.impl.NewStyleQuery(self)
4851 class LUQueryFields(NoHooksLU):
4852 """Query for resources/items of a certain kind.
4855 # pylint: disable=W0142
4858 def CheckArguments(self):
4859 self.qcls = _GetQueryImplementation(self.op.what)
4861 def ExpandNames(self):
4862 self.needed_locks = {}
4864 def Exec(self, feedback_fn):
4865 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4868 class LUNodeModifyStorage(NoHooksLU):
4869 """Logical unit for modifying a storage volume on a node.
4874 def CheckArguments(self):
4875 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4877 storage_type = self.op.storage_type
4880 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4882 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4883 " modified" % storage_type,
4886 diff = set(self.op.changes.keys()) - modifiable
4888 raise errors.OpPrereqError("The following fields can not be modified for"
4889 " storage units of type '%s': %r" %
4890 (storage_type, list(diff)),
4893 def ExpandNames(self):
4894 self.needed_locks = {
4895 locking.LEVEL_NODE: self.op.node_name,
4898 def Exec(self, feedback_fn):
4899 """Computes the list of nodes and their attributes.
4902 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4903 result = self.rpc.call_storage_modify(self.op.node_name,
4904 self.op.storage_type, st_args,
4905 self.op.name, self.op.changes)
4906 result.Raise("Failed to modify storage unit '%s' on %s" %
4907 (self.op.name, self.op.node_name))
4910 class LUNodeAdd(LogicalUnit):
4911 """Logical unit for adding node to the cluster.
4915 HTYPE = constants.HTYPE_NODE
4916 _NFLAGS = ["master_capable", "vm_capable"]
4918 def CheckArguments(self):
4919 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4920 # validate/normalize the node name
4921 self.hostname = netutils.GetHostname(name=self.op.node_name,
4922 family=self.primary_ip_family)
4923 self.op.node_name = self.hostname.name
4925 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4926 raise errors.OpPrereqError("Cannot readd the master node",
4929 if self.op.readd and self.op.group:
4930 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4931 " being readded", errors.ECODE_INVAL)
4933 def BuildHooksEnv(self):
4936 This will run on all nodes before, and on all nodes + the new node after.
4940 "OP_TARGET": self.op.node_name,
4941 "NODE_NAME": self.op.node_name,
4942 "NODE_PIP": self.op.primary_ip,
4943 "NODE_SIP": self.op.secondary_ip,
4944 "MASTER_CAPABLE": str(self.op.master_capable),
4945 "VM_CAPABLE": str(self.op.vm_capable),
4948 def BuildHooksNodes(self):
4949 """Build hooks nodes.
4952 # Exclude added node
4953 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4954 post_nodes = pre_nodes + [self.op.node_name, ]
4956 return (pre_nodes, post_nodes)
4958 def CheckPrereq(self):
4959 """Check prerequisites.
4962 - the new node is not already in the config
4964 - its parameters (single/dual homed) matches the cluster
4966 Any errors are signaled by raising errors.OpPrereqError.
4970 hostname = self.hostname
4971 node = hostname.name
4972 primary_ip = self.op.primary_ip = hostname.ip
4973 if self.op.secondary_ip is None:
4974 if self.primary_ip_family == netutils.IP6Address.family:
4975 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4976 " IPv4 address must be given as secondary",
4978 self.op.secondary_ip = primary_ip
4980 secondary_ip = self.op.secondary_ip
4981 if not netutils.IP4Address.IsValid(secondary_ip):
4982 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4983 " address" % secondary_ip, errors.ECODE_INVAL)
4985 node_list = cfg.GetNodeList()
4986 if not self.op.readd and node in node_list:
4987 raise errors.OpPrereqError("Node %s is already in the configuration" %
4988 node, errors.ECODE_EXISTS)
4989 elif self.op.readd and node not in node_list:
4990 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4993 self.changed_primary_ip = False
4995 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4996 if self.op.readd and node == existing_node_name:
4997 if existing_node.secondary_ip != secondary_ip:
4998 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4999 " address configuration as before",
5001 if existing_node.primary_ip != primary_ip:
5002 self.changed_primary_ip = True
5006 if (existing_node.primary_ip == primary_ip or
5007 existing_node.secondary_ip == primary_ip or
5008 existing_node.primary_ip == secondary_ip or
5009 existing_node.secondary_ip == secondary_ip):
5010 raise errors.OpPrereqError("New node ip address(es) conflict with"
5011 " existing node %s" % existing_node.name,
5012 errors.ECODE_NOTUNIQUE)
5014 # After this 'if' block, None is no longer a valid value for the
5015 # _capable op attributes
5017 old_node = self.cfg.GetNodeInfo(node)
5018 assert old_node is not None, "Can't retrieve locked node %s" % node
5019 for attr in self._NFLAGS:
5020 if getattr(self.op, attr) is None:
5021 setattr(self.op, attr, getattr(old_node, attr))
5023 for attr in self._NFLAGS:
5024 if getattr(self.op, attr) is None:
5025 setattr(self.op, attr, True)
5027 if self.op.readd and not self.op.vm_capable:
5028 pri, sec = cfg.GetNodeInstances(node)
5030 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5031 " flag set to false, but it already holds"
5032 " instances" % node,
5035 # check that the type of the node (single versus dual homed) is the
5036 # same as for the master
5037 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5038 master_singlehomed = myself.secondary_ip == myself.primary_ip
5039 newbie_singlehomed = secondary_ip == primary_ip
5040 if master_singlehomed != newbie_singlehomed:
5041 if master_singlehomed:
5042 raise errors.OpPrereqError("The master has no secondary ip but the"
5043 " new node has one",
5046 raise errors.OpPrereqError("The master has a secondary ip but the"
5047 " new node doesn't have one",
5050 # checks reachability
5051 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5052 raise errors.OpPrereqError("Node not reachable by ping",
5053 errors.ECODE_ENVIRON)
5055 if not newbie_singlehomed:
5056 # check reachability from my secondary ip to newbie's secondary ip
5057 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5058 source=myself.secondary_ip):
5059 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5060 " based ping to node daemon port",
5061 errors.ECODE_ENVIRON)
5068 if self.op.master_capable:
5069 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5071 self.master_candidate = False
5074 self.new_node = old_node
5076 node_group = cfg.LookupNodeGroup(self.op.group)
5077 self.new_node = objects.Node(name=node,
5078 primary_ip=primary_ip,
5079 secondary_ip=secondary_ip,
5080 master_candidate=self.master_candidate,
5081 offline=False, drained=False,
5084 if self.op.ndparams:
5085 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5087 # check connectivity
5088 result = self.rpc.call_version([self.new_node.name])[self.new_node.name]
5089 result.Raise("Can't get version information from node %s" % node)
5090 if constants.PROTOCOL_VERSION == result.payload:
5091 logging.info("Communication to node %s fine, sw version %s match",
5092 node, result.payload)
5094 raise errors.OpPrereqError("Version mismatch master version %s,"
5095 " node version %s" %
5096 (constants.PROTOCOL_VERSION, result.payload),
5097 errors.ECODE_ENVIRON)
5099 def Exec(self, feedback_fn):
5100 """Adds the new node to the cluster.
5103 new_node = self.new_node
5104 node = new_node.name
5106 # We adding a new node so we assume it's powered
5107 new_node.powered = True
5109 # for re-adds, reset the offline/drained/master-candidate flags;
5110 # we need to reset here, otherwise offline would prevent RPC calls
5111 # later in the procedure; this also means that if the re-add
5112 # fails, we are left with a non-offlined, broken node
5114 new_node.drained = new_node.offline = False # pylint: disable=W0201
5115 self.LogInfo("Readding a node, the offline/drained flags were reset")
5116 # if we demote the node, we do cleanup later in the procedure
5117 new_node.master_candidate = self.master_candidate
5118 if self.changed_primary_ip:
5119 new_node.primary_ip = self.op.primary_ip
5121 # copy the master/vm_capable flags
5122 for attr in self._NFLAGS:
5123 setattr(new_node, attr, getattr(self.op, attr))
5125 # notify the user about any possible mc promotion
5126 if new_node.master_candidate:
5127 self.LogInfo("Node will be a master candidate")
5129 if self.op.ndparams:
5130 new_node.ndparams = self.op.ndparams
5132 new_node.ndparams = {}
5134 # Add node to our /etc/hosts, and add key to known_hosts
5135 if self.cfg.GetClusterInfo().modify_etc_hosts:
5136 master_node = self.cfg.GetMasterNode()
5137 result = self.rpc.call_etc_hosts_modify(master_node,
5138 constants.ETC_HOSTS_ADD,
5141 result.Raise("Can't update hosts file with new host data")
5143 if new_node.secondary_ip != new_node.primary_ip:
5144 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5147 node_verify_list = [self.cfg.GetMasterNode()]
5148 node_verify_param = {
5149 constants.NV_NODELIST: ([node], {}),
5150 # TODO: do a node-net-test as well?
5153 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5154 self.cfg.GetClusterName())
5155 for verifier in node_verify_list:
5156 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5157 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5159 for failed in nl_payload:
5160 feedback_fn("ssh/hostname verification failed"
5161 " (checking from %s): %s" %
5162 (verifier, nl_payload[failed]))
5163 raise errors.OpExecError("ssh/hostname verification failed")
5166 _RedistributeAncillaryFiles(self)
5167 self.context.ReaddNode(new_node)
5168 # make sure we redistribute the config
5169 self.cfg.Update(new_node, feedback_fn)
5170 # and make sure the new node will not have old files around
5171 if not new_node.master_candidate:
5172 result = self.rpc.call_node_demote_from_mc(new_node.name)
5173 msg = result.fail_msg
5175 self.LogWarning("Node failed to demote itself from master"
5176 " candidate status: %s" % msg)
5178 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5179 additional_vm=self.op.vm_capable)
5180 self.context.AddNode(new_node, self.proc.GetECId())
5183 class LUNodeSetParams(LogicalUnit):
5184 """Modifies the parameters of a node.
5186 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5187 to the node role (as _ROLE_*)
5188 @cvar _R2F: a dictionary from node role to tuples of flags
5189 @cvar _FLAGS: a list of attribute names corresponding to the flags
5192 HPATH = "node-modify"
5193 HTYPE = constants.HTYPE_NODE
5195 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5197 (True, False, False): _ROLE_CANDIDATE,
5198 (False, True, False): _ROLE_DRAINED,
5199 (False, False, True): _ROLE_OFFLINE,
5200 (False, False, False): _ROLE_REGULAR,
5202 _R2F = dict((v, k) for k, v in _F2R.items())
5203 _FLAGS = ["master_candidate", "drained", "offline"]
5205 def CheckArguments(self):
5206 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5207 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5208 self.op.master_capable, self.op.vm_capable,
5209 self.op.secondary_ip, self.op.ndparams]
5210 if all_mods.count(None) == len(all_mods):
5211 raise errors.OpPrereqError("Please pass at least one modification",
5213 if all_mods.count(True) > 1:
5214 raise errors.OpPrereqError("Can't set the node into more than one"
5215 " state at the same time",
5218 # Boolean value that tells us whether we might be demoting from MC
5219 self.might_demote = (self.op.master_candidate == False or
5220 self.op.offline == True or
5221 self.op.drained == True or
5222 self.op.master_capable == False)
5224 if self.op.secondary_ip:
5225 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5226 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5227 " address" % self.op.secondary_ip,
5230 self.lock_all = self.op.auto_promote and self.might_demote
5231 self.lock_instances = self.op.secondary_ip is not None
5233 def ExpandNames(self):
5235 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5237 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5239 if self.lock_instances:
5240 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5242 def DeclareLocks(self, level):
5243 # If we have locked all instances, before waiting to lock nodes, release
5244 # all the ones living on nodes unrelated to the current operation.
5245 if level == locking.LEVEL_NODE and self.lock_instances:
5246 self.affected_instances = []
5247 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5250 # Build list of instances to release
5251 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5252 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5253 if (instance.disk_template in constants.DTS_INT_MIRROR and
5254 self.op.node_name in instance.all_nodes):
5255 instances_keep.append(instance_name)
5256 self.affected_instances.append(instance)
5258 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5260 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5261 set(instances_keep))
5263 def BuildHooksEnv(self):
5266 This runs on the master node.
5270 "OP_TARGET": self.op.node_name,
5271 "MASTER_CANDIDATE": str(self.op.master_candidate),
5272 "OFFLINE": str(self.op.offline),
5273 "DRAINED": str(self.op.drained),
5274 "MASTER_CAPABLE": str(self.op.master_capable),
5275 "VM_CAPABLE": str(self.op.vm_capable),
5278 def BuildHooksNodes(self):
5279 """Build hooks nodes.
5282 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5285 def CheckPrereq(self):
5286 """Check prerequisites.
5288 This only checks the instance list against the existing names.
5291 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5293 if (self.op.master_candidate is not None or
5294 self.op.drained is not None or
5295 self.op.offline is not None):
5296 # we can't change the master's node flags
5297 if self.op.node_name == self.cfg.GetMasterNode():
5298 raise errors.OpPrereqError("The master role can be changed"
5299 " only via master-failover",
5302 if self.op.master_candidate and not node.master_capable:
5303 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5304 " it a master candidate" % node.name,
5307 if self.op.vm_capable == False:
5308 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5310 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5311 " the vm_capable flag" % node.name,
5314 if node.master_candidate and self.might_demote and not self.lock_all:
5315 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5316 # check if after removing the current node, we're missing master
5318 (mc_remaining, mc_should, _) = \
5319 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5320 if mc_remaining < mc_should:
5321 raise errors.OpPrereqError("Not enough master candidates, please"
5322 " pass auto promote option to allow"
5323 " promotion", errors.ECODE_STATE)
5325 self.old_flags = old_flags = (node.master_candidate,
5326 node.drained, node.offline)
5327 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5328 self.old_role = old_role = self._F2R[old_flags]
5330 # Check for ineffective changes
5331 for attr in self._FLAGS:
5332 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5333 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5334 setattr(self.op, attr, None)
5336 # Past this point, any flag change to False means a transition
5337 # away from the respective state, as only real changes are kept
5339 # TODO: We might query the real power state if it supports OOB
5340 if _SupportsOob(self.cfg, node):
5341 if self.op.offline is False and not (node.powered or
5342 self.op.powered == True):
5343 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5344 " offline status can be reset") %
5346 elif self.op.powered is not None:
5347 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5348 " as it does not support out-of-band"
5349 " handling") % self.op.node_name)
5351 # If we're being deofflined/drained, we'll MC ourself if needed
5352 if (self.op.drained == False or self.op.offline == False or
5353 (self.op.master_capable and not node.master_capable)):
5354 if _DecideSelfPromotion(self):
5355 self.op.master_candidate = True
5356 self.LogInfo("Auto-promoting node to master candidate")
5358 # If we're no longer master capable, we'll demote ourselves from MC
5359 if self.op.master_capable == False and node.master_candidate:
5360 self.LogInfo("Demoting from master candidate")
5361 self.op.master_candidate = False
5364 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5365 if self.op.master_candidate:
5366 new_role = self._ROLE_CANDIDATE
5367 elif self.op.drained:
5368 new_role = self._ROLE_DRAINED
5369 elif self.op.offline:
5370 new_role = self._ROLE_OFFLINE
5371 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5372 # False is still in new flags, which means we're un-setting (the
5374 new_role = self._ROLE_REGULAR
5375 else: # no new flags, nothing, keep old role
5378 self.new_role = new_role
5380 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5381 # Trying to transition out of offline status
5382 result = self.rpc.call_version([node.name])[node.name]
5384 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5385 " to report its version: %s" %
5386 (node.name, result.fail_msg),
5389 self.LogWarning("Transitioning node from offline to online state"
5390 " without using re-add. Please make sure the node"
5393 if self.op.secondary_ip:
5394 # Ok even without locking, because this can't be changed by any LU
5395 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5396 master_singlehomed = master.secondary_ip == master.primary_ip
5397 if master_singlehomed and self.op.secondary_ip:
5398 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5399 " homed cluster", errors.ECODE_INVAL)
5402 if self.affected_instances:
5403 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5404 " node has instances (%s) configured"
5405 " to use it" % self.affected_instances)
5407 # On online nodes, check that no instances are running, and that
5408 # the node has the new ip and we can reach it.
5409 for instance in self.affected_instances:
5410 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5412 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5413 if master.name != node.name:
5414 # check reachability from master secondary ip to new secondary ip
5415 if not netutils.TcpPing(self.op.secondary_ip,
5416 constants.DEFAULT_NODED_PORT,
5417 source=master.secondary_ip):
5418 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5419 " based ping to node daemon port",
5420 errors.ECODE_ENVIRON)
5422 if self.op.ndparams:
5423 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5424 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5425 self.new_ndparams = new_ndparams
5427 def Exec(self, feedback_fn):
5432 old_role = self.old_role
5433 new_role = self.new_role
5437 if self.op.ndparams:
5438 node.ndparams = self.new_ndparams
5440 if self.op.powered is not None:
5441 node.powered = self.op.powered
5443 for attr in ["master_capable", "vm_capable"]:
5444 val = getattr(self.op, attr)
5446 setattr(node, attr, val)
5447 result.append((attr, str(val)))
5449 if new_role != old_role:
5450 # Tell the node to demote itself, if no longer MC and not offline
5451 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5452 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5454 self.LogWarning("Node failed to demote itself: %s", msg)
5456 new_flags = self._R2F[new_role]
5457 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5459 result.append((desc, str(nf)))
5460 (node.master_candidate, node.drained, node.offline) = new_flags
5462 # we locked all nodes, we adjust the CP before updating this node
5464 _AdjustCandidatePool(self, [node.name])
5466 if self.op.secondary_ip:
5467 node.secondary_ip = self.op.secondary_ip
5468 result.append(("secondary_ip", self.op.secondary_ip))
5470 # this will trigger configuration file update, if needed
5471 self.cfg.Update(node, feedback_fn)
5473 # this will trigger job queue propagation or cleanup if the mc
5475 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5476 self.context.ReaddNode(node)
5481 class LUNodePowercycle(NoHooksLU):
5482 """Powercycles a node.
5487 def CheckArguments(self):
5488 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5489 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5490 raise errors.OpPrereqError("The node is the master and the force"
5491 " parameter was not set",
5494 def ExpandNames(self):
5495 """Locking for PowercycleNode.
5497 This is a last-resort option and shouldn't block on other
5498 jobs. Therefore, we grab no locks.
5501 self.needed_locks = {}
5503 def Exec(self, feedback_fn):
5507 result = self.rpc.call_node_powercycle(self.op.node_name,
5508 self.cfg.GetHypervisorType())
5509 result.Raise("Failed to schedule the reboot")
5510 return result.payload
5513 class LUClusterQuery(NoHooksLU):
5514 """Query cluster configuration.
5519 def ExpandNames(self):
5520 self.needed_locks = {}
5522 def Exec(self, feedback_fn):
5523 """Return cluster config.
5526 cluster = self.cfg.GetClusterInfo()
5529 # Filter just for enabled hypervisors
5530 for os_name, hv_dict in cluster.os_hvp.items():
5531 os_hvp[os_name] = {}
5532 for hv_name, hv_params in hv_dict.items():
5533 if hv_name in cluster.enabled_hypervisors:
5534 os_hvp[os_name][hv_name] = hv_params
5536 # Convert ip_family to ip_version
5537 primary_ip_version = constants.IP4_VERSION
5538 if cluster.primary_ip_family == netutils.IP6Address.family:
5539 primary_ip_version = constants.IP6_VERSION
5542 "software_version": constants.RELEASE_VERSION,
5543 "protocol_version": constants.PROTOCOL_VERSION,
5544 "config_version": constants.CONFIG_VERSION,
5545 "os_api_version": max(constants.OS_API_VERSIONS),
5546 "export_version": constants.EXPORT_VERSION,
5547 "architecture": runtime.GetArchInfo(),
5548 "name": cluster.cluster_name,
5549 "master": cluster.master_node,
5550 "default_hypervisor": cluster.enabled_hypervisors[0],
5551 "enabled_hypervisors": cluster.enabled_hypervisors,
5552 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5553 for hypervisor_name in cluster.enabled_hypervisors]),
5555 "beparams": cluster.beparams,
5556 "osparams": cluster.osparams,
5557 "nicparams": cluster.nicparams,
5558 "ndparams": cluster.ndparams,
5559 "candidate_pool_size": cluster.candidate_pool_size,
5560 "master_netdev": cluster.master_netdev,
5561 "volume_group_name": cluster.volume_group_name,
5562 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5563 "file_storage_dir": cluster.file_storage_dir,
5564 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5565 "maintain_node_health": cluster.maintain_node_health,
5566 "ctime": cluster.ctime,
5567 "mtime": cluster.mtime,
5568 "uuid": cluster.uuid,
5569 "tags": list(cluster.GetTags()),
5570 "uid_pool": cluster.uid_pool,
5571 "default_iallocator": cluster.default_iallocator,
5572 "reserved_lvs": cluster.reserved_lvs,
5573 "primary_ip_version": primary_ip_version,
5574 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5575 "hidden_os": cluster.hidden_os,
5576 "blacklisted_os": cluster.blacklisted_os,
5582 class LUClusterConfigQuery(NoHooksLU):
5583 """Return configuration values.
5587 _FIELDS_DYNAMIC = utils.FieldSet()
5588 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5589 "watcher_pause", "volume_group_name")
5591 def CheckArguments(self):
5592 _CheckOutputFields(static=self._FIELDS_STATIC,
5593 dynamic=self._FIELDS_DYNAMIC,
5594 selected=self.op.output_fields)
5596 def ExpandNames(self):
5597 self.needed_locks = {}
5599 def Exec(self, feedback_fn):
5600 """Dump a representation of the cluster config to the standard output.
5604 for field in self.op.output_fields:
5605 if field == "cluster_name":
5606 entry = self.cfg.GetClusterName()
5607 elif field == "master_node":
5608 entry = self.cfg.GetMasterNode()
5609 elif field == "drain_flag":
5610 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5611 elif field == "watcher_pause":
5612 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5613 elif field == "volume_group_name":
5614 entry = self.cfg.GetVGName()
5616 raise errors.ParameterError(field)
5617 values.append(entry)
5621 class LUInstanceActivateDisks(NoHooksLU):
5622 """Bring up an instance's disks.
5627 def ExpandNames(self):
5628 self._ExpandAndLockInstance()
5629 self.needed_locks[locking.LEVEL_NODE] = []
5630 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5632 def DeclareLocks(self, level):
5633 if level == locking.LEVEL_NODE:
5634 self._LockInstancesNodes()
5636 def CheckPrereq(self):
5637 """Check prerequisites.
5639 This checks that the instance is in the cluster.
5642 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643 assert self.instance is not None, \
5644 "Cannot retrieve locked instance %s" % self.op.instance_name
5645 _CheckNodeOnline(self, self.instance.primary_node)
5647 def Exec(self, feedback_fn):
5648 """Activate the disks.
5651 disks_ok, disks_info = \
5652 _AssembleInstanceDisks(self, self.instance,
5653 ignore_size=self.op.ignore_size)
5655 raise errors.OpExecError("Cannot activate block devices")
5660 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5662 """Prepare the block devices for an instance.
5664 This sets up the block devices on all nodes.
5666 @type lu: L{LogicalUnit}
5667 @param lu: the logical unit on whose behalf we execute
5668 @type instance: L{objects.Instance}
5669 @param instance: the instance for whose disks we assemble
5670 @type disks: list of L{objects.Disk} or None
5671 @param disks: which disks to assemble (or all, if None)
5672 @type ignore_secondaries: boolean
5673 @param ignore_secondaries: if true, errors on secondary nodes
5674 won't result in an error return from the function
5675 @type ignore_size: boolean
5676 @param ignore_size: if true, the current known size of the disk
5677 will not be used during the disk activation, useful for cases
5678 when the size is wrong
5679 @return: False if the operation failed, otherwise a list of
5680 (host, instance_visible_name, node_visible_name)
5681 with the mapping from node devices to instance devices
5686 iname = instance.name
5687 disks = _ExpandCheckDisks(instance, disks)
5689 # With the two passes mechanism we try to reduce the window of
5690 # opportunity for the race condition of switching DRBD to primary
5691 # before handshaking occured, but we do not eliminate it
5693 # The proper fix would be to wait (with some limits) until the
5694 # connection has been made and drbd transitions from WFConnection
5695 # into any other network-connected state (Connected, SyncTarget,
5698 # 1st pass, assemble on all nodes in secondary mode
5699 for idx, inst_disk in enumerate(disks):
5700 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5702 node_disk = node_disk.Copy()
5703 node_disk.UnsetSize()
5704 lu.cfg.SetDiskID(node_disk, node)
5705 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5706 msg = result.fail_msg
5708 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5709 " (is_primary=False, pass=1): %s",
5710 inst_disk.iv_name, node, msg)
5711 if not ignore_secondaries:
5714 # FIXME: race condition on drbd migration to primary
5716 # 2nd pass, do only the primary node
5717 for idx, inst_disk in enumerate(disks):
5720 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5721 if node != instance.primary_node:
5724 node_disk = node_disk.Copy()
5725 node_disk.UnsetSize()
5726 lu.cfg.SetDiskID(node_disk, node)
5727 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5728 msg = result.fail_msg
5730 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5731 " (is_primary=True, pass=2): %s",
5732 inst_disk.iv_name, node, msg)
5735 dev_path = result.payload
5737 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5739 # leave the disks configured for the primary node
5740 # this is a workaround that would be fixed better by
5741 # improving the logical/physical id handling
5743 lu.cfg.SetDiskID(disk, instance.primary_node)
5745 return disks_ok, device_info
5748 def _StartInstanceDisks(lu, instance, force):
5749 """Start the disks of an instance.
5752 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5753 ignore_secondaries=force)
5755 _ShutdownInstanceDisks(lu, instance)
5756 if force is not None and not force:
5757 lu.proc.LogWarning("", hint="If the message above refers to a"
5759 " you can retry the operation using '--force'.")
5760 raise errors.OpExecError("Disk consistency error")
5763 class LUInstanceDeactivateDisks(NoHooksLU):
5764 """Shutdown an instance's disks.
5769 def ExpandNames(self):
5770 self._ExpandAndLockInstance()
5771 self.needed_locks[locking.LEVEL_NODE] = []
5772 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5774 def DeclareLocks(self, level):
5775 if level == locking.LEVEL_NODE:
5776 self._LockInstancesNodes()
5778 def CheckPrereq(self):
5779 """Check prerequisites.
5781 This checks that the instance is in the cluster.
5784 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5785 assert self.instance is not None, \
5786 "Cannot retrieve locked instance %s" % self.op.instance_name
5788 def Exec(self, feedback_fn):
5789 """Deactivate the disks
5792 instance = self.instance
5794 _ShutdownInstanceDisks(self, instance)
5796 _SafeShutdownInstanceDisks(self, instance)
5799 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5800 """Shutdown block devices of an instance.
5802 This function checks if an instance is running, before calling
5803 _ShutdownInstanceDisks.
5806 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5807 _ShutdownInstanceDisks(lu, instance, disks=disks)
5810 def _ExpandCheckDisks(instance, disks):
5811 """Return the instance disks selected by the disks list
5813 @type disks: list of L{objects.Disk} or None
5814 @param disks: selected disks
5815 @rtype: list of L{objects.Disk}
5816 @return: selected instance disks to act on
5820 return instance.disks
5822 if not set(disks).issubset(instance.disks):
5823 raise errors.ProgrammerError("Can only act on disks belonging to the"
5828 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5829 """Shutdown block devices of an instance.
5831 This does the shutdown on all nodes of the instance.
5833 If the ignore_primary is false, errors on the primary node are
5838 disks = _ExpandCheckDisks(instance, disks)
5841 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5842 lu.cfg.SetDiskID(top_disk, node)
5843 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5844 msg = result.fail_msg
5846 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5847 disk.iv_name, node, msg)
5848 if ((node == instance.primary_node and not ignore_primary) or
5849 (node != instance.primary_node and not result.offline)):
5854 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5855 """Checks if a node has enough free memory.
5857 This function check if a given node has the needed amount of free
5858 memory. In case the node has less memory or we cannot get the
5859 information from the node, this function raise an OpPrereqError
5862 @type lu: C{LogicalUnit}
5863 @param lu: a logical unit from which we get configuration data
5865 @param node: the node to check
5866 @type reason: C{str}
5867 @param reason: string to use in the error message
5868 @type requested: C{int}
5869 @param requested: the amount of memory in MiB to check for
5870 @type hypervisor_name: C{str}
5871 @param hypervisor_name: the hypervisor to ask for memory stats
5872 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5873 we cannot check the node
5876 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5877 nodeinfo[node].Raise("Can't get data from node %s" % node,
5878 prereq=True, ecode=errors.ECODE_ENVIRON)
5879 free_mem = nodeinfo[node].payload.get("memory_free", None)
5880 if not isinstance(free_mem, int):
5881 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5882 " was '%s'" % (node, free_mem),
5883 errors.ECODE_ENVIRON)
5884 if requested > free_mem:
5885 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5886 " needed %s MiB, available %s MiB" %
5887 (node, reason, requested, free_mem),
5891 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5892 """Checks if nodes have enough free disk space in the all VGs.
5894 This function check if all given nodes have the needed amount of
5895 free disk. In case any node has less disk or we cannot get the
5896 information from the node, this function raise an OpPrereqError
5899 @type lu: C{LogicalUnit}
5900 @param lu: a logical unit from which we get configuration data
5901 @type nodenames: C{list}
5902 @param nodenames: the list of node names to check
5903 @type req_sizes: C{dict}
5904 @param req_sizes: the hash of vg and corresponding amount of disk in
5906 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5907 or we cannot check the node
5910 for vg, req_size in req_sizes.items():
5911 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5914 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5915 """Checks if nodes have enough free disk space in the specified VG.
5917 This function check if all given nodes have the needed amount of
5918 free disk. In case any node has less disk or we cannot get the
5919 information from the node, this function raise an OpPrereqError
5922 @type lu: C{LogicalUnit}
5923 @param lu: a logical unit from which we get configuration data
5924 @type nodenames: C{list}
5925 @param nodenames: the list of node names to check
5927 @param vg: the volume group to check
5928 @type requested: C{int}
5929 @param requested: the amount of disk in MiB to check for
5930 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5931 or we cannot check the node
5934 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5935 for node in nodenames:
5936 info = nodeinfo[node]
5937 info.Raise("Cannot get current information from node %s" % node,
5938 prereq=True, ecode=errors.ECODE_ENVIRON)
5939 vg_free = info.payload.get("vg_free", None)
5940 if not isinstance(vg_free, int):
5941 raise errors.OpPrereqError("Can't compute free disk space on node"
5942 " %s for vg %s, result was '%s'" %
5943 (node, vg, vg_free), errors.ECODE_ENVIRON)
5944 if requested > vg_free:
5945 raise errors.OpPrereqError("Not enough disk space on target node %s"
5946 " vg %s: required %d MiB, available %d MiB" %
5947 (node, vg, requested, vg_free),
5951 class LUInstanceStartup(LogicalUnit):
5952 """Starts an instance.
5955 HPATH = "instance-start"
5956 HTYPE = constants.HTYPE_INSTANCE
5959 def CheckArguments(self):
5961 if self.op.beparams:
5962 # fill the beparams dict
5963 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5965 def ExpandNames(self):
5966 self._ExpandAndLockInstance()
5968 def BuildHooksEnv(self):
5971 This runs on master, primary and secondary nodes of the instance.
5975 "FORCE": self.op.force,
5978 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5982 def BuildHooksNodes(self):
5983 """Build hooks nodes.
5986 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5989 def CheckPrereq(self):
5990 """Check prerequisites.
5992 This checks that the instance is in the cluster.
5995 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5996 assert self.instance is not None, \
5997 "Cannot retrieve locked instance %s" % self.op.instance_name
6000 if self.op.hvparams:
6001 # check hypervisor parameter syntax (locally)
6002 cluster = self.cfg.GetClusterInfo()
6003 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6004 filled_hvp = cluster.FillHV(instance)
6005 filled_hvp.update(self.op.hvparams)
6006 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6007 hv_type.CheckParameterSyntax(filled_hvp)
6008 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6010 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6012 if self.primary_offline and self.op.ignore_offline_nodes:
6013 self.proc.LogWarning("Ignoring offline primary node")
6015 if self.op.hvparams or self.op.beparams:
6016 self.proc.LogWarning("Overridden parameters are ignored")
6018 _CheckNodeOnline(self, instance.primary_node)
6020 bep = self.cfg.GetClusterInfo().FillBE(instance)
6022 # check bridges existence
6023 _CheckInstanceBridgesExist(self, instance)
6025 remote_info = self.rpc.call_instance_info(instance.primary_node,
6027 instance.hypervisor)
6028 remote_info.Raise("Error checking node %s" % instance.primary_node,
6029 prereq=True, ecode=errors.ECODE_ENVIRON)
6030 if not remote_info.payload: # not running already
6031 _CheckNodeFreeMemory(self, instance.primary_node,
6032 "starting instance %s" % instance.name,
6033 bep[constants.BE_MEMORY], instance.hypervisor)
6035 def Exec(self, feedback_fn):
6036 """Start the instance.
6039 instance = self.instance
6040 force = self.op.force
6042 if not self.op.no_remember:
6043 self.cfg.MarkInstanceUp(instance.name)
6045 if self.primary_offline:
6046 assert self.op.ignore_offline_nodes
6047 self.proc.LogInfo("Primary node offline, marked instance as started")
6049 node_current = instance.primary_node
6051 _StartInstanceDisks(self, instance, force)
6053 result = self.rpc.call_instance_start(node_current, instance,
6054 self.op.hvparams, self.op.beparams,
6055 self.op.startup_paused)
6056 msg = result.fail_msg
6058 _ShutdownInstanceDisks(self, instance)
6059 raise errors.OpExecError("Could not start instance: %s" % msg)
6062 class LUInstanceReboot(LogicalUnit):
6063 """Reboot an instance.
6066 HPATH = "instance-reboot"
6067 HTYPE = constants.HTYPE_INSTANCE
6070 def ExpandNames(self):
6071 self._ExpandAndLockInstance()
6073 def BuildHooksEnv(self):
6076 This runs on master, primary and secondary nodes of the instance.
6080 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6081 "REBOOT_TYPE": self.op.reboot_type,
6082 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6085 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6089 def BuildHooksNodes(self):
6090 """Build hooks nodes.
6093 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6096 def CheckPrereq(self):
6097 """Check prerequisites.
6099 This checks that the instance is in the cluster.
6102 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6103 assert self.instance is not None, \
6104 "Cannot retrieve locked instance %s" % self.op.instance_name
6106 _CheckNodeOnline(self, instance.primary_node)
6108 # check bridges existence
6109 _CheckInstanceBridgesExist(self, instance)
6111 def Exec(self, feedback_fn):
6112 """Reboot the instance.
6115 instance = self.instance
6116 ignore_secondaries = self.op.ignore_secondaries
6117 reboot_type = self.op.reboot_type
6119 remote_info = self.rpc.call_instance_info(instance.primary_node,
6121 instance.hypervisor)
6122 remote_info.Raise("Error checking node %s" % instance.primary_node)
6123 instance_running = bool(remote_info.payload)
6125 node_current = instance.primary_node
6127 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6128 constants.INSTANCE_REBOOT_HARD]:
6129 for disk in instance.disks:
6130 self.cfg.SetDiskID(disk, node_current)
6131 result = self.rpc.call_instance_reboot(node_current, instance,
6133 self.op.shutdown_timeout)
6134 result.Raise("Could not reboot instance")
6136 if instance_running:
6137 result = self.rpc.call_instance_shutdown(node_current, instance,
6138 self.op.shutdown_timeout)
6139 result.Raise("Could not shutdown instance for full reboot")
6140 _ShutdownInstanceDisks(self, instance)
6142 self.LogInfo("Instance %s was already stopped, starting now",
6144 _StartInstanceDisks(self, instance, ignore_secondaries)
6145 result = self.rpc.call_instance_start(node_current, instance,
6147 msg = result.fail_msg
6149 _ShutdownInstanceDisks(self, instance)
6150 raise errors.OpExecError("Could not start instance for"
6151 " full reboot: %s" % msg)
6153 self.cfg.MarkInstanceUp(instance.name)
6156 class LUInstanceShutdown(LogicalUnit):
6157 """Shutdown an instance.
6160 HPATH = "instance-stop"
6161 HTYPE = constants.HTYPE_INSTANCE
6164 def ExpandNames(self):
6165 self._ExpandAndLockInstance()
6167 def BuildHooksEnv(self):
6170 This runs on master, primary and secondary nodes of the instance.
6173 env = _BuildInstanceHookEnvByObject(self, self.instance)
6174 env["TIMEOUT"] = self.op.timeout
6177 def BuildHooksNodes(self):
6178 """Build hooks nodes.
6181 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6184 def CheckPrereq(self):
6185 """Check prerequisites.
6187 This checks that the instance is in the cluster.
6190 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6191 assert self.instance is not None, \
6192 "Cannot retrieve locked instance %s" % self.op.instance_name
6194 self.primary_offline = \
6195 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6197 if self.primary_offline and self.op.ignore_offline_nodes:
6198 self.proc.LogWarning("Ignoring offline primary node")
6200 _CheckNodeOnline(self, self.instance.primary_node)
6202 def Exec(self, feedback_fn):
6203 """Shutdown the instance.
6206 instance = self.instance
6207 node_current = instance.primary_node
6208 timeout = self.op.timeout
6210 if not self.op.no_remember:
6211 self.cfg.MarkInstanceDown(instance.name)
6213 if self.primary_offline:
6214 assert self.op.ignore_offline_nodes
6215 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6217 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6218 msg = result.fail_msg
6220 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6222 _ShutdownInstanceDisks(self, instance)
6225 class LUInstanceReinstall(LogicalUnit):
6226 """Reinstall an instance.
6229 HPATH = "instance-reinstall"
6230 HTYPE = constants.HTYPE_INSTANCE
6233 def ExpandNames(self):
6234 self._ExpandAndLockInstance()
6236 def BuildHooksEnv(self):
6239 This runs on master, primary and secondary nodes of the instance.
6242 return _BuildInstanceHookEnvByObject(self, self.instance)
6244 def BuildHooksNodes(self):
6245 """Build hooks nodes.
6248 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6251 def CheckPrereq(self):
6252 """Check prerequisites.
6254 This checks that the instance is in the cluster and is not running.
6257 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258 assert instance is not None, \
6259 "Cannot retrieve locked instance %s" % self.op.instance_name
6260 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6261 " offline, cannot reinstall")
6262 for node in instance.secondary_nodes:
6263 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6264 " cannot reinstall")
6266 if instance.disk_template == constants.DT_DISKLESS:
6267 raise errors.OpPrereqError("Instance '%s' has no disks" %
6268 self.op.instance_name,
6270 _CheckInstanceDown(self, instance, "cannot reinstall")
6272 if self.op.os_type is not None:
6274 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6275 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6276 instance_os = self.op.os_type
6278 instance_os = instance.os
6280 nodelist = list(instance.all_nodes)
6282 if self.op.osparams:
6283 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6284 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6285 self.os_inst = i_osdict # the new dict (without defaults)
6289 self.instance = instance
6291 def Exec(self, feedback_fn):
6292 """Reinstall the instance.
6295 inst = self.instance
6297 if self.op.os_type is not None:
6298 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6299 inst.os = self.op.os_type
6300 # Write to configuration
6301 self.cfg.Update(inst, feedback_fn)
6303 _StartInstanceDisks(self, inst, None)
6305 feedback_fn("Running the instance OS create scripts...")
6306 # FIXME: pass debug option from opcode to backend
6307 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6308 self.op.debug_level,
6309 osparams=self.os_inst)
6310 result.Raise("Could not install OS for instance %s on node %s" %
6311 (inst.name, inst.primary_node))
6313 _ShutdownInstanceDisks(self, inst)
6316 class LUInstanceRecreateDisks(LogicalUnit):
6317 """Recreate an instance's missing disks.
6320 HPATH = "instance-recreate-disks"
6321 HTYPE = constants.HTYPE_INSTANCE
6324 def CheckArguments(self):
6325 # normalise the disk list
6326 self.op.disks = sorted(frozenset(self.op.disks))
6328 def ExpandNames(self):
6329 self._ExpandAndLockInstance()
6330 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6332 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6333 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6335 self.needed_locks[locking.LEVEL_NODE] = []
6337 def DeclareLocks(self, level):
6338 if level == locking.LEVEL_NODE:
6339 # if we replace the nodes, we only need to lock the old primary,
6340 # otherwise we need to lock all nodes for disk re-creation
6341 primary_only = bool(self.op.nodes)
6342 self._LockInstancesNodes(primary_only=primary_only)
6344 def BuildHooksEnv(self):
6347 This runs on master, primary and secondary nodes of the instance.
6350 return _BuildInstanceHookEnvByObject(self, self.instance)
6352 def BuildHooksNodes(self):
6353 """Build hooks nodes.
6356 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6359 def CheckPrereq(self):
6360 """Check prerequisites.
6362 This checks that the instance is in the cluster and is not running.
6365 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6366 assert instance is not None, \
6367 "Cannot retrieve locked instance %s" % self.op.instance_name
6369 if len(self.op.nodes) != len(instance.all_nodes):
6370 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6371 " %d replacement nodes were specified" %
6372 (instance.name, len(instance.all_nodes),
6373 len(self.op.nodes)),
6375 assert instance.disk_template != constants.DT_DRBD8 or \
6376 len(self.op.nodes) == 2
6377 assert instance.disk_template != constants.DT_PLAIN or \
6378 len(self.op.nodes) == 1
6379 primary_node = self.op.nodes[0]
6381 primary_node = instance.primary_node
6382 _CheckNodeOnline(self, primary_node)
6384 if instance.disk_template == constants.DT_DISKLESS:
6385 raise errors.OpPrereqError("Instance '%s' has no disks" %
6386 self.op.instance_name, errors.ECODE_INVAL)
6387 # if we replace nodes *and* the old primary is offline, we don't
6389 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6390 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6391 if not (self.op.nodes and old_pnode.offline):
6392 _CheckInstanceDown(self, instance, "cannot recreate disks")
6394 if not self.op.disks:
6395 self.op.disks = range(len(instance.disks))
6397 for idx in self.op.disks:
6398 if idx >= len(instance.disks):
6399 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6401 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6402 raise errors.OpPrereqError("Can't recreate disks partially and"
6403 " change the nodes at the same time",
6405 self.instance = instance
6407 def Exec(self, feedback_fn):
6408 """Recreate the disks.
6411 instance = self.instance
6414 mods = [] # keeps track of needed logical_id changes
6416 for idx, disk in enumerate(instance.disks):
6417 if idx not in self.op.disks: # disk idx has not been passed in
6420 # update secondaries for disks, if needed
6422 if disk.dev_type == constants.LD_DRBD8:
6423 # need to update the nodes and minors
6424 assert len(self.op.nodes) == 2
6425 assert len(disk.logical_id) == 6 # otherwise disk internals
6427 (_, _, old_port, _, _, old_secret) = disk.logical_id
6428 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6429 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6430 new_minors[0], new_minors[1], old_secret)
6431 assert len(disk.logical_id) == len(new_id)
6432 mods.append((idx, new_id))
6434 # now that we have passed all asserts above, we can apply the mods
6435 # in a single run (to avoid partial changes)
6436 for idx, new_id in mods:
6437 instance.disks[idx].logical_id = new_id
6439 # change primary node, if needed
6441 instance.primary_node = self.op.nodes[0]
6442 self.LogWarning("Changing the instance's nodes, you will have to"
6443 " remove any disks left on the older nodes manually")
6446 self.cfg.Update(instance, feedback_fn)
6448 _CreateDisks(self, instance, to_skip=to_skip)
6451 class LUInstanceRename(LogicalUnit):
6452 """Rename an instance.
6455 HPATH = "instance-rename"
6456 HTYPE = constants.HTYPE_INSTANCE
6458 def CheckArguments(self):
6462 if self.op.ip_check and not self.op.name_check:
6463 # TODO: make the ip check more flexible and not depend on the name check
6464 raise errors.OpPrereqError("IP address check requires a name check",
6467 def BuildHooksEnv(self):
6470 This runs on master, primary and secondary nodes of the instance.
6473 env = _BuildInstanceHookEnvByObject(self, self.instance)
6474 env["INSTANCE_NEW_NAME"] = self.op.new_name
6477 def BuildHooksNodes(self):
6478 """Build hooks nodes.
6481 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6484 def CheckPrereq(self):
6485 """Check prerequisites.
6487 This checks that the instance is in the cluster and is not running.
6490 self.op.instance_name = _ExpandInstanceName(self.cfg,
6491 self.op.instance_name)
6492 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6493 assert instance is not None
6494 _CheckNodeOnline(self, instance.primary_node)
6495 _CheckInstanceDown(self, instance, "cannot rename")
6496 self.instance = instance
6498 new_name = self.op.new_name
6499 if self.op.name_check:
6500 hostname = netutils.GetHostname(name=new_name)
6501 if hostname.name != new_name:
6502 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6504 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6505 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6506 " same as given hostname '%s'") %
6507 (hostname.name, self.op.new_name),
6509 new_name = self.op.new_name = hostname.name
6510 if (self.op.ip_check and
6511 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6512 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6513 (hostname.ip, new_name),
6514 errors.ECODE_NOTUNIQUE)
6516 instance_list = self.cfg.GetInstanceList()
6517 if new_name in instance_list and new_name != instance.name:
6518 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6519 new_name, errors.ECODE_EXISTS)
6521 def Exec(self, feedback_fn):
6522 """Rename the instance.
6525 inst = self.instance
6526 old_name = inst.name
6528 rename_file_storage = False
6529 if (inst.disk_template in constants.DTS_FILEBASED and
6530 self.op.new_name != inst.name):
6531 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6532 rename_file_storage = True
6534 self.cfg.RenameInstance(inst.name, self.op.new_name)
6535 # Change the instance lock. This is definitely safe while we hold the BGL.
6536 # Otherwise the new lock would have to be added in acquired mode.
6538 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6539 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6541 # re-read the instance from the configuration after rename
6542 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6544 if rename_file_storage:
6545 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6546 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6547 old_file_storage_dir,
6548 new_file_storage_dir)
6549 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6550 " (but the instance has been renamed in Ganeti)" %
6551 (inst.primary_node, old_file_storage_dir,
6552 new_file_storage_dir))
6554 _StartInstanceDisks(self, inst, None)
6556 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6557 old_name, self.op.debug_level)
6558 msg = result.fail_msg
6560 msg = ("Could not run OS rename script for instance %s on node %s"
6561 " (but the instance has been renamed in Ganeti): %s" %
6562 (inst.name, inst.primary_node, msg))
6563 self.proc.LogWarning(msg)
6565 _ShutdownInstanceDisks(self, inst)
6570 class LUInstanceRemove(LogicalUnit):
6571 """Remove an instance.
6574 HPATH = "instance-remove"
6575 HTYPE = constants.HTYPE_INSTANCE
6578 def ExpandNames(self):
6579 self._ExpandAndLockInstance()
6580 self.needed_locks[locking.LEVEL_NODE] = []
6581 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6583 def DeclareLocks(self, level):
6584 if level == locking.LEVEL_NODE:
6585 self._LockInstancesNodes()
6587 def BuildHooksEnv(self):
6590 This runs on master, primary and secondary nodes of the instance.
6593 env = _BuildInstanceHookEnvByObject(self, self.instance)
6594 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6597 def BuildHooksNodes(self):
6598 """Build hooks nodes.
6601 nl = [self.cfg.GetMasterNode()]
6602 nl_post = list(self.instance.all_nodes) + nl
6603 return (nl, nl_post)
6605 def CheckPrereq(self):
6606 """Check prerequisites.
6608 This checks that the instance is in the cluster.
6611 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6612 assert self.instance is not None, \
6613 "Cannot retrieve locked instance %s" % self.op.instance_name
6615 def Exec(self, feedback_fn):
6616 """Remove the instance.
6619 instance = self.instance
6620 logging.info("Shutting down instance %s on node %s",
6621 instance.name, instance.primary_node)
6623 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6624 self.op.shutdown_timeout)
6625 msg = result.fail_msg
6627 if self.op.ignore_failures:
6628 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6630 raise errors.OpExecError("Could not shutdown instance %s on"
6632 (instance.name, instance.primary_node, msg))
6634 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6637 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6638 """Utility function to remove an instance.
6641 logging.info("Removing block devices for instance %s", instance.name)
6643 if not _RemoveDisks(lu, instance):
6644 if not ignore_failures:
6645 raise errors.OpExecError("Can't remove instance's disks")
6646 feedback_fn("Warning: can't remove instance's disks")
6648 logging.info("Removing instance %s out of cluster config", instance.name)
6650 lu.cfg.RemoveInstance(instance.name)
6652 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6653 "Instance lock removal conflict"
6655 # Remove lock for the instance
6656 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6659 class LUInstanceQuery(NoHooksLU):
6660 """Logical unit for querying instances.
6663 # pylint: disable=W0142
6666 def CheckArguments(self):
6667 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6668 self.op.output_fields, self.op.use_locking)
6670 def ExpandNames(self):
6671 self.iq.ExpandNames(self)
6673 def DeclareLocks(self, level):
6674 self.iq.DeclareLocks(self, level)
6676 def Exec(self, feedback_fn):
6677 return self.iq.OldStyleQuery(self)
6680 class LUInstanceFailover(LogicalUnit):
6681 """Failover an instance.
6684 HPATH = "instance-failover"
6685 HTYPE = constants.HTYPE_INSTANCE
6688 def CheckArguments(self):
6689 """Check the arguments.
6692 self.iallocator = getattr(self.op, "iallocator", None)
6693 self.target_node = getattr(self.op, "target_node", None)
6695 def ExpandNames(self):
6696 self._ExpandAndLockInstance()
6698 if self.op.target_node is not None:
6699 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6701 self.needed_locks[locking.LEVEL_NODE] = []
6702 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6704 ignore_consistency = self.op.ignore_consistency
6705 shutdown_timeout = self.op.shutdown_timeout
6706 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6709 ignore_consistency=ignore_consistency,
6710 shutdown_timeout=shutdown_timeout)
6711 self.tasklets = [self._migrater]
6713 def DeclareLocks(self, level):
6714 if level == locking.LEVEL_NODE:
6715 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6716 if instance.disk_template in constants.DTS_EXT_MIRROR:
6717 if self.op.target_node is None:
6718 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6720 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6721 self.op.target_node]
6722 del self.recalculate_locks[locking.LEVEL_NODE]
6724 self._LockInstancesNodes()
6726 def BuildHooksEnv(self):
6729 This runs on master, primary and secondary nodes of the instance.
6732 instance = self._migrater.instance
6733 source_node = instance.primary_node
6734 target_node = self.op.target_node
6736 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6737 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6738 "OLD_PRIMARY": source_node,
6739 "NEW_PRIMARY": target_node,
6742 if instance.disk_template in constants.DTS_INT_MIRROR:
6743 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6744 env["NEW_SECONDARY"] = source_node
6746 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6748 env.update(_BuildInstanceHookEnvByObject(self, instance))
6752 def BuildHooksNodes(self):
6753 """Build hooks nodes.
6756 instance = self._migrater.instance
6757 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6758 return (nl, nl + [instance.primary_node])
6761 class LUInstanceMigrate(LogicalUnit):
6762 """Migrate an instance.
6764 This is migration without shutting down, compared to the failover,
6765 which is done with shutdown.
6768 HPATH = "instance-migrate"
6769 HTYPE = constants.HTYPE_INSTANCE
6772 def ExpandNames(self):
6773 self._ExpandAndLockInstance()
6775 if self.op.target_node is not None:
6776 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6778 self.needed_locks[locking.LEVEL_NODE] = []
6779 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6781 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6782 cleanup=self.op.cleanup,
6784 fallback=self.op.allow_failover)
6785 self.tasklets = [self._migrater]
6787 def DeclareLocks(self, level):
6788 if level == locking.LEVEL_NODE:
6789 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6790 if instance.disk_template in constants.DTS_EXT_MIRROR:
6791 if self.op.target_node is None:
6792 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6794 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6795 self.op.target_node]
6796 del self.recalculate_locks[locking.LEVEL_NODE]
6798 self._LockInstancesNodes()
6800 def BuildHooksEnv(self):
6803 This runs on master, primary and secondary nodes of the instance.
6806 instance = self._migrater.instance
6807 source_node = instance.primary_node
6808 target_node = self.op.target_node
6809 env = _BuildInstanceHookEnvByObject(self, instance)
6811 "MIGRATE_LIVE": self._migrater.live,
6812 "MIGRATE_CLEANUP": self.op.cleanup,
6813 "OLD_PRIMARY": source_node,
6814 "NEW_PRIMARY": target_node,
6817 if instance.disk_template in constants.DTS_INT_MIRROR:
6818 env["OLD_SECONDARY"] = target_node
6819 env["NEW_SECONDARY"] = source_node
6821 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6825 def BuildHooksNodes(self):
6826 """Build hooks nodes.
6829 instance = self._migrater.instance
6830 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6831 return (nl, nl + [instance.primary_node])
6834 class LUInstanceMove(LogicalUnit):
6835 """Move an instance by data-copying.
6838 HPATH = "instance-move"
6839 HTYPE = constants.HTYPE_INSTANCE
6842 def ExpandNames(self):
6843 self._ExpandAndLockInstance()
6844 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6845 self.op.target_node = target_node
6846 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6849 def DeclareLocks(self, level):
6850 if level == locking.LEVEL_NODE:
6851 self._LockInstancesNodes(primary_only=True)
6853 def BuildHooksEnv(self):
6856 This runs on master, primary and secondary nodes of the instance.
6860 "TARGET_NODE": self.op.target_node,
6861 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6863 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6866 def BuildHooksNodes(self):
6867 """Build hooks nodes.
6871 self.cfg.GetMasterNode(),
6872 self.instance.primary_node,
6873 self.op.target_node,
6877 def CheckPrereq(self):
6878 """Check prerequisites.
6880 This checks that the instance is in the cluster.
6883 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884 assert self.instance is not None, \
6885 "Cannot retrieve locked instance %s" % self.op.instance_name
6887 node = self.cfg.GetNodeInfo(self.op.target_node)
6888 assert node is not None, \
6889 "Cannot retrieve locked node %s" % self.op.target_node
6891 self.target_node = target_node = node.name
6893 if target_node == instance.primary_node:
6894 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6895 (instance.name, target_node),
6898 bep = self.cfg.GetClusterInfo().FillBE(instance)
6900 for idx, dsk in enumerate(instance.disks):
6901 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6902 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6903 " cannot copy" % idx, errors.ECODE_STATE)
6905 _CheckNodeOnline(self, target_node)
6906 _CheckNodeNotDrained(self, target_node)
6907 _CheckNodeVmCapable(self, target_node)
6909 if instance.admin_up:
6910 # check memory requirements on the secondary node
6911 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6912 instance.name, bep[constants.BE_MEMORY],
6913 instance.hypervisor)
6915 self.LogInfo("Not checking memory on the secondary node as"
6916 " instance will not be started")
6918 # check bridge existance
6919 _CheckInstanceBridgesExist(self, instance, node=target_node)
6921 def Exec(self, feedback_fn):
6922 """Move an instance.
6924 The move is done by shutting it down on its present node, copying
6925 the data over (slow) and starting it on the new node.
6928 instance = self.instance
6930 source_node = instance.primary_node
6931 target_node = self.target_node
6933 self.LogInfo("Shutting down instance %s on source node %s",
6934 instance.name, source_node)
6936 result = self.rpc.call_instance_shutdown(source_node, instance,
6937 self.op.shutdown_timeout)
6938 msg = result.fail_msg
6940 if self.op.ignore_consistency:
6941 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6942 " Proceeding anyway. Please make sure node"
6943 " %s is down. Error details: %s",
6944 instance.name, source_node, source_node, msg)
6946 raise errors.OpExecError("Could not shutdown instance %s on"
6948 (instance.name, source_node, msg))
6950 # create the target disks
6952 _CreateDisks(self, instance, target_node=target_node)
6953 except errors.OpExecError:
6954 self.LogWarning("Device creation failed, reverting...")
6956 _RemoveDisks(self, instance, target_node=target_node)
6958 self.cfg.ReleaseDRBDMinors(instance.name)
6961 cluster_name = self.cfg.GetClusterInfo().cluster_name
6964 # activate, get path, copy the data over
6965 for idx, disk in enumerate(instance.disks):
6966 self.LogInfo("Copying data for disk %d", idx)
6967 result = self.rpc.call_blockdev_assemble(target_node, disk,
6968 instance.name, True, idx)
6970 self.LogWarning("Can't assemble newly created disk %d: %s",
6971 idx, result.fail_msg)
6972 errs.append(result.fail_msg)
6974 dev_path = result.payload
6975 result = self.rpc.call_blockdev_export(source_node, disk,
6976 target_node, dev_path,
6979 self.LogWarning("Can't copy data over for disk %d: %s",
6980 idx, result.fail_msg)
6981 errs.append(result.fail_msg)
6985 self.LogWarning("Some disks failed to copy, aborting")
6987 _RemoveDisks(self, instance, target_node=target_node)
6989 self.cfg.ReleaseDRBDMinors(instance.name)
6990 raise errors.OpExecError("Errors during disk copy: %s" %
6993 instance.primary_node = target_node
6994 self.cfg.Update(instance, feedback_fn)
6996 self.LogInfo("Removing the disks on the original node")
6997 _RemoveDisks(self, instance, target_node=source_node)
6999 # Only start the instance if it's marked as up
7000 if instance.admin_up:
7001 self.LogInfo("Starting instance %s on node %s",
7002 instance.name, target_node)
7004 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7005 ignore_secondaries=True)
7007 _ShutdownInstanceDisks(self, instance)
7008 raise errors.OpExecError("Can't activate the instance's disks")
7010 result = self.rpc.call_instance_start(target_node, instance,
7012 msg = result.fail_msg
7014 _ShutdownInstanceDisks(self, instance)
7015 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7016 (instance.name, target_node, msg))
7019 class LUNodeMigrate(LogicalUnit):
7020 """Migrate all instances from a node.
7023 HPATH = "node-migrate"
7024 HTYPE = constants.HTYPE_NODE
7027 def CheckArguments(self):
7030 def ExpandNames(self):
7031 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7033 self.share_locks = _ShareAll()
7034 self.needed_locks = {
7035 locking.LEVEL_NODE: [self.op.node_name],
7038 def BuildHooksEnv(self):
7041 This runs on the master, the primary and all the secondaries.
7045 "NODE_NAME": self.op.node_name,
7048 def BuildHooksNodes(self):
7049 """Build hooks nodes.
7052 nl = [self.cfg.GetMasterNode()]
7055 def CheckPrereq(self):
7058 def Exec(self, feedback_fn):
7059 # Prepare jobs for migration instances
7061 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7064 iallocator=self.op.iallocator,
7065 target_node=self.op.target_node)]
7066 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7069 # TODO: Run iallocator in this opcode and pass correct placement options to
7070 # OpInstanceMigrate. Since other jobs can modify the cluster between
7071 # running the iallocator and the actual migration, a good consistency model
7072 # will have to be found.
7074 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7075 frozenset([self.op.node_name]))
7077 return ResultWithJobs(jobs)
7080 class TLMigrateInstance(Tasklet):
7081 """Tasklet class for instance migration.
7084 @ivar live: whether the migration will be done live or non-live;
7085 this variable is initalized only after CheckPrereq has run
7086 @type cleanup: boolean
7087 @ivar cleanup: Wheater we cleanup from a failed migration
7088 @type iallocator: string
7089 @ivar iallocator: The iallocator used to determine target_node
7090 @type target_node: string
7091 @ivar target_node: If given, the target_node to reallocate the instance to
7092 @type failover: boolean
7093 @ivar failover: Whether operation results in failover or migration
7094 @type fallback: boolean
7095 @ivar fallback: Whether fallback to failover is allowed if migration not
7097 @type ignore_consistency: boolean
7098 @ivar ignore_consistency: Wheter we should ignore consistency between source
7100 @type shutdown_timeout: int
7101 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7104 def __init__(self, lu, instance_name, cleanup=False,
7105 failover=False, fallback=False,
7106 ignore_consistency=False,
7107 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7108 """Initializes this class.
7111 Tasklet.__init__(self, lu)
7114 self.instance_name = instance_name
7115 self.cleanup = cleanup
7116 self.live = False # will be overridden later
7117 self.failover = failover
7118 self.fallback = fallback
7119 self.ignore_consistency = ignore_consistency
7120 self.shutdown_timeout = shutdown_timeout
7122 def CheckPrereq(self):
7123 """Check prerequisites.
7125 This checks that the instance is in the cluster.
7128 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7129 instance = self.cfg.GetInstanceInfo(instance_name)
7130 assert instance is not None
7131 self.instance = instance
7133 if (not self.cleanup and not instance.admin_up and not self.failover and
7135 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7137 self.failover = True
7139 if instance.disk_template not in constants.DTS_MIRRORED:
7144 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7145 " %s" % (instance.disk_template, text),
7148 if instance.disk_template in constants.DTS_EXT_MIRROR:
7149 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7151 if self.lu.op.iallocator:
7152 self._RunAllocator()
7154 # We set set self.target_node as it is required by
7156 self.target_node = self.lu.op.target_node
7158 # self.target_node is already populated, either directly or by the
7160 target_node = self.target_node
7161 if self.target_node == instance.primary_node:
7162 raise errors.OpPrereqError("Cannot migrate instance %s"
7163 " to its primary (%s)" %
7164 (instance.name, instance.primary_node))
7166 if len(self.lu.tasklets) == 1:
7167 # It is safe to release locks only when we're the only tasklet
7169 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7170 keep=[instance.primary_node, self.target_node])
7173 secondary_nodes = instance.secondary_nodes
7174 if not secondary_nodes:
7175 raise errors.ConfigurationError("No secondary node but using"
7176 " %s disk template" %
7177 instance.disk_template)
7178 target_node = secondary_nodes[0]
7179 if self.lu.op.iallocator or (self.lu.op.target_node and
7180 self.lu.op.target_node != target_node):
7182 text = "failed over"
7185 raise errors.OpPrereqError("Instances with disk template %s cannot"
7186 " be %s to arbitrary nodes"
7187 " (neither an iallocator nor a target"
7188 " node can be passed)" %
7189 (instance.disk_template, text),
7192 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7194 # check memory requirements on the secondary node
7195 if not self.cleanup and (not self.failover or instance.admin_up):
7196 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7197 instance.name, i_be[constants.BE_MEMORY],
7198 instance.hypervisor)
7200 self.lu.LogInfo("Not checking memory on the secondary node as"
7201 " instance will not be started")
7203 # check bridge existance
7204 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7206 if not self.cleanup:
7207 _CheckNodeNotDrained(self.lu, target_node)
7208 if not self.failover:
7209 result = self.rpc.call_instance_migratable(instance.primary_node,
7211 if result.fail_msg and self.fallback:
7212 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7214 self.failover = True
7216 result.Raise("Can't migrate, please use failover",
7217 prereq=True, ecode=errors.ECODE_STATE)
7219 assert not (self.failover and self.cleanup)
7221 if not self.failover:
7222 if self.lu.op.live is not None and self.lu.op.mode is not None:
7223 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7224 " parameters are accepted",
7226 if self.lu.op.live is not None:
7228 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7230 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7231 # reset the 'live' parameter to None so that repeated
7232 # invocations of CheckPrereq do not raise an exception
7233 self.lu.op.live = None
7234 elif self.lu.op.mode is None:
7235 # read the default value from the hypervisor
7236 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7238 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7240 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7242 # Failover is never live
7245 def _RunAllocator(self):
7246 """Run the allocator based on input opcode.
7249 ial = IAllocator(self.cfg, self.rpc,
7250 mode=constants.IALLOCATOR_MODE_RELOC,
7251 name=self.instance_name,
7252 # TODO See why hail breaks with a single node below
7253 relocate_from=[self.instance.primary_node,
7254 self.instance.primary_node],
7257 ial.Run(self.lu.op.iallocator)
7260 raise errors.OpPrereqError("Can't compute nodes using"
7261 " iallocator '%s': %s" %
7262 (self.lu.op.iallocator, ial.info),
7264 if len(ial.result) != ial.required_nodes:
7265 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7266 " of nodes (%s), required %s" %
7267 (self.lu.op.iallocator, len(ial.result),
7268 ial.required_nodes), errors.ECODE_FAULT)
7269 self.target_node = ial.result[0]
7270 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7271 self.instance_name, self.lu.op.iallocator,
7272 utils.CommaJoin(ial.result))
7274 def _WaitUntilSync(self):
7275 """Poll with custom rpc for disk sync.
7277 This uses our own step-based rpc call.
7280 self.feedback_fn("* wait until resync is done")
7284 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7286 self.instance.disks)
7288 for node, nres in result.items():
7289 nres.Raise("Cannot resync disks on node %s" % node)
7290 node_done, node_percent = nres.payload
7291 all_done = all_done and node_done
7292 if node_percent is not None:
7293 min_percent = min(min_percent, node_percent)
7295 if min_percent < 100:
7296 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7299 def _EnsureSecondary(self, node):
7300 """Demote a node to secondary.
7303 self.feedback_fn("* switching node %s to secondary mode" % node)
7305 for dev in self.instance.disks:
7306 self.cfg.SetDiskID(dev, node)
7308 result = self.rpc.call_blockdev_close(node, self.instance.name,
7309 self.instance.disks)
7310 result.Raise("Cannot change disk to secondary on node %s" % node)
7312 def _GoStandalone(self):
7313 """Disconnect from the network.
7316 self.feedback_fn("* changing into standalone mode")
7317 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7318 self.instance.disks)
7319 for node, nres in result.items():
7320 nres.Raise("Cannot disconnect disks node %s" % node)
7322 def _GoReconnect(self, multimaster):
7323 """Reconnect to the network.
7329 msg = "single-master"
7330 self.feedback_fn("* changing disks into %s mode" % msg)
7331 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7332 self.instance.disks,
7333 self.instance.name, multimaster)
7334 for node, nres in result.items():
7335 nres.Raise("Cannot change disks config on node %s" % node)
7337 def _ExecCleanup(self):
7338 """Try to cleanup after a failed migration.
7340 The cleanup is done by:
7341 - check that the instance is running only on one node
7342 (and update the config if needed)
7343 - change disks on its secondary node to secondary
7344 - wait until disks are fully synchronized
7345 - disconnect from the network
7346 - change disks into single-master mode
7347 - wait again until disks are fully synchronized
7350 instance = self.instance
7351 target_node = self.target_node
7352 source_node = self.source_node
7354 # check running on only one node
7355 self.feedback_fn("* checking where the instance actually runs"
7356 " (if this hangs, the hypervisor might be in"
7358 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7359 for node, result in ins_l.items():
7360 result.Raise("Can't contact node %s" % node)
7362 runningon_source = instance.name in ins_l[source_node].payload
7363 runningon_target = instance.name in ins_l[target_node].payload
7365 if runningon_source and runningon_target:
7366 raise errors.OpExecError("Instance seems to be running on two nodes,"
7367 " or the hypervisor is confused; you will have"
7368 " to ensure manually that it runs only on one"
7369 " and restart this operation")
7371 if not (runningon_source or runningon_target):
7372 raise errors.OpExecError("Instance does not seem to be running at all;"
7373 " in this case it's safer to repair by"
7374 " running 'gnt-instance stop' to ensure disk"
7375 " shutdown, and then restarting it")
7377 if runningon_target:
7378 # the migration has actually succeeded, we need to update the config
7379 self.feedback_fn("* instance running on secondary node (%s),"
7380 " updating config" % target_node)
7381 instance.primary_node = target_node
7382 self.cfg.Update(instance, self.feedback_fn)
7383 demoted_node = source_node
7385 self.feedback_fn("* instance confirmed to be running on its"
7386 " primary node (%s)" % source_node)
7387 demoted_node = target_node
7389 if instance.disk_template in constants.DTS_INT_MIRROR:
7390 self._EnsureSecondary(demoted_node)
7392 self._WaitUntilSync()
7393 except errors.OpExecError:
7394 # we ignore here errors, since if the device is standalone, it
7395 # won't be able to sync
7397 self._GoStandalone()
7398 self._GoReconnect(False)
7399 self._WaitUntilSync()
7401 self.feedback_fn("* done")
7403 def _RevertDiskStatus(self):
7404 """Try to revert the disk status after a failed migration.
7407 target_node = self.target_node
7408 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7412 self._EnsureSecondary(target_node)
7413 self._GoStandalone()
7414 self._GoReconnect(False)
7415 self._WaitUntilSync()
7416 except errors.OpExecError, err:
7417 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7418 " please try to recover the instance manually;"
7419 " error '%s'" % str(err))
7421 def _AbortMigration(self):
7422 """Call the hypervisor code to abort a started migration.
7425 instance = self.instance
7426 target_node = self.target_node
7427 migration_info = self.migration_info
7429 abort_result = self.rpc.call_finalize_migration(target_node,
7433 abort_msg = abort_result.fail_msg
7435 logging.error("Aborting migration failed on target node %s: %s",
7436 target_node, abort_msg)
7437 # Don't raise an exception here, as we stil have to try to revert the
7438 # disk status, even if this step failed.
7440 def _ExecMigration(self):
7441 """Migrate an instance.
7443 The migrate is done by:
7444 - change the disks into dual-master mode
7445 - wait until disks are fully synchronized again
7446 - migrate the instance
7447 - change disks on the new secondary node (the old primary) to secondary
7448 - wait until disks are fully synchronized
7449 - change disks into single-master mode
7452 instance = self.instance
7453 target_node = self.target_node
7454 source_node = self.source_node
7456 # Check for hypervisor version mismatch and warn the user.
7457 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7458 None, self.instance.hypervisor)
7459 src_info = nodeinfo[source_node]
7460 dst_info = nodeinfo[target_node]
7462 if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7463 (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7464 src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7465 dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7466 if src_version != dst_version:
7467 self.feedback_fn("* warning: hypervisor version mismatch between"
7468 " source (%s) and target (%s) node" %
7469 (src_version, dst_version))
7471 self.feedback_fn("* checking disk consistency between source and target")
7472 for dev in instance.disks:
7473 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7474 raise errors.OpExecError("Disk %s is degraded or not fully"
7475 " synchronized on target node,"
7476 " aborting migration" % dev.iv_name)
7478 # First get the migration information from the remote node
7479 result = self.rpc.call_migration_info(source_node, instance)
7480 msg = result.fail_msg
7482 log_err = ("Failed fetching source migration information from %s: %s" %
7484 logging.error(log_err)
7485 raise errors.OpExecError(log_err)
7487 self.migration_info = migration_info = result.payload
7489 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7490 # Then switch the disks to master/master mode
7491 self._EnsureSecondary(target_node)
7492 self._GoStandalone()
7493 self._GoReconnect(True)
7494 self._WaitUntilSync()
7496 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7497 result = self.rpc.call_accept_instance(target_node,
7500 self.nodes_ip[target_node])
7502 msg = result.fail_msg
7504 logging.error("Instance pre-migration failed, trying to revert"
7505 " disk status: %s", msg)
7506 self.feedback_fn("Pre-migration failed, aborting")
7507 self._AbortMigration()
7508 self._RevertDiskStatus()
7509 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7510 (instance.name, msg))
7512 self.feedback_fn("* migrating instance to %s" % target_node)
7513 result = self.rpc.call_instance_migrate(source_node, instance,
7514 self.nodes_ip[target_node],
7516 msg = result.fail_msg
7518 logging.error("Instance migration failed, trying to revert"
7519 " disk status: %s", msg)
7520 self.feedback_fn("Migration failed, aborting")
7521 self._AbortMigration()
7522 self._RevertDiskStatus()
7523 raise errors.OpExecError("Could not migrate instance %s: %s" %
7524 (instance.name, msg))
7526 instance.primary_node = target_node
7527 # distribute new instance config to the other nodes
7528 self.cfg.Update(instance, self.feedback_fn)
7530 result = self.rpc.call_finalize_migration(target_node,
7534 msg = result.fail_msg
7536 logging.error("Instance migration succeeded, but finalization failed:"
7538 raise errors.OpExecError("Could not finalize instance migration: %s" %
7541 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7542 self._EnsureSecondary(source_node)
7543 self._WaitUntilSync()
7544 self._GoStandalone()
7545 self._GoReconnect(False)
7546 self._WaitUntilSync()
7548 self.feedback_fn("* done")
7550 def _ExecFailover(self):
7551 """Failover an instance.
7553 The failover is done by shutting it down on its present node and
7554 starting it on the secondary.
7557 instance = self.instance
7558 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7560 source_node = instance.primary_node
7561 target_node = self.target_node
7563 if instance.admin_up:
7564 self.feedback_fn("* checking disk consistency between source and target")
7565 for dev in instance.disks:
7566 # for drbd, these are drbd over lvm
7567 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7568 if primary_node.offline:
7569 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7571 (primary_node.name, dev.iv_name, target_node))
7572 elif not self.ignore_consistency:
7573 raise errors.OpExecError("Disk %s is degraded on target node,"
7574 " aborting failover" % dev.iv_name)
7576 self.feedback_fn("* not checking disk consistency as instance is not"
7579 self.feedback_fn("* shutting down instance on source node")
7580 logging.info("Shutting down instance %s on node %s",
7581 instance.name, source_node)
7583 result = self.rpc.call_instance_shutdown(source_node, instance,
7584 self.shutdown_timeout)
7585 msg = result.fail_msg
7587 if self.ignore_consistency or primary_node.offline:
7588 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7589 " proceeding anyway; please make sure node"
7590 " %s is down; error details: %s",
7591 instance.name, source_node, source_node, msg)
7593 raise errors.OpExecError("Could not shutdown instance %s on"
7595 (instance.name, source_node, msg))
7597 self.feedback_fn("* deactivating the instance's disks on source node")
7598 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7599 raise errors.OpExecError("Can't shut down the instance's disks")
7601 instance.primary_node = target_node
7602 # distribute new instance config to the other nodes
7603 self.cfg.Update(instance, self.feedback_fn)
7605 # Only start the instance if it's marked as up
7606 if instance.admin_up:
7607 self.feedback_fn("* activating the instance's disks on target node %s" %
7609 logging.info("Starting instance %s on node %s",
7610 instance.name, target_node)
7612 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7613 ignore_secondaries=True)
7615 _ShutdownInstanceDisks(self.lu, instance)
7616 raise errors.OpExecError("Can't activate the instance's disks")
7618 self.feedback_fn("* starting the instance on the target node %s" %
7620 result = self.rpc.call_instance_start(target_node, instance, None, None,
7622 msg = result.fail_msg
7624 _ShutdownInstanceDisks(self.lu, instance)
7625 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7626 (instance.name, target_node, msg))
7628 def Exec(self, feedback_fn):
7629 """Perform the migration.
7632 self.feedback_fn = feedback_fn
7633 self.source_node = self.instance.primary_node
7635 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7636 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7637 self.target_node = self.instance.secondary_nodes[0]
7638 # Otherwise self.target_node has been populated either
7639 # directly, or through an iallocator.
7641 self.all_nodes = [self.source_node, self.target_node]
7642 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7643 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7646 feedback_fn("Failover instance %s" % self.instance.name)
7647 self._ExecFailover()
7649 feedback_fn("Migrating instance %s" % self.instance.name)
7652 return self._ExecCleanup()
7654 return self._ExecMigration()
7657 def _CreateBlockDev(lu, node, instance, device, force_create,
7659 """Create a tree of block devices on a given node.
7661 If this device type has to be created on secondaries, create it and
7664 If not, just recurse to children keeping the same 'force' value.
7666 @param lu: the lu on whose behalf we execute
7667 @param node: the node on which to create the device
7668 @type instance: L{objects.Instance}
7669 @param instance: the instance which owns the device
7670 @type device: L{objects.Disk}
7671 @param device: the device to create
7672 @type force_create: boolean
7673 @param force_create: whether to force creation of this device; this
7674 will be change to True whenever we find a device which has
7675 CreateOnSecondary() attribute
7676 @param info: the extra 'metadata' we should attach to the device
7677 (this will be represented as a LVM tag)
7678 @type force_open: boolean
7679 @param force_open: this parameter will be passes to the
7680 L{backend.BlockdevCreate} function where it specifies
7681 whether we run on primary or not, and it affects both
7682 the child assembly and the device own Open() execution
7685 if device.CreateOnSecondary():
7689 for child in device.children:
7690 _CreateBlockDev(lu, node, instance, child, force_create,
7693 if not force_create:
7696 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7699 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7700 """Create a single block device on a given node.
7702 This will not recurse over children of the device, so they must be
7705 @param lu: the lu on whose behalf we execute
7706 @param node: the node on which to create the device
7707 @type instance: L{objects.Instance}
7708 @param instance: the instance which owns the device
7709 @type device: L{objects.Disk}
7710 @param device: the device to create
7711 @param info: the extra 'metadata' we should attach to the device
7712 (this will be represented as a LVM tag)
7713 @type force_open: boolean
7714 @param force_open: this parameter will be passes to the
7715 L{backend.BlockdevCreate} function where it specifies
7716 whether we run on primary or not, and it affects both
7717 the child assembly and the device own Open() execution
7720 lu.cfg.SetDiskID(device, node)
7721 result = lu.rpc.call_blockdev_create(node, device, device.size,
7722 instance.name, force_open, info)
7723 result.Raise("Can't create block device %s on"
7724 " node %s for instance %s" % (device, node, instance.name))
7725 if device.physical_id is None:
7726 device.physical_id = result.payload
7729 def _GenerateUniqueNames(lu, exts):
7730 """Generate a suitable LV name.
7732 This will generate a logical volume name for the given instance.
7737 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7738 results.append("%s%s" % (new_id, val))
7742 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7743 iv_name, p_minor, s_minor):
7744 """Generate a drbd8 device complete with its children.
7747 assert len(vgnames) == len(names) == 2
7748 port = lu.cfg.AllocatePort()
7749 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7750 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7751 logical_id=(vgnames[0], names[0]))
7752 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7753 logical_id=(vgnames[1], names[1]))
7754 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7755 logical_id=(primary, secondary, port,
7758 children=[dev_data, dev_meta],
7763 def _GenerateDiskTemplate(lu, template_name,
7764 instance_name, primary_node,
7765 secondary_nodes, disk_info,
7766 file_storage_dir, file_driver,
7767 base_index, feedback_fn):
7768 """Generate the entire disk layout for a given template type.
7771 #TODO: compute space requirements
7773 vgname = lu.cfg.GetVGName()
7774 disk_count = len(disk_info)
7776 if template_name == constants.DT_DISKLESS:
7778 elif template_name == constants.DT_PLAIN:
7779 if len(secondary_nodes) != 0:
7780 raise errors.ProgrammerError("Wrong template configuration")
7782 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7783 for i in range(disk_count)])
7784 for idx, disk in enumerate(disk_info):
7785 disk_index = idx + base_index
7786 vg = disk.get(constants.IDISK_VG, vgname)
7787 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7788 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7789 size=disk[constants.IDISK_SIZE],
7790 logical_id=(vg, names[idx]),
7791 iv_name="disk/%d" % disk_index,
7792 mode=disk[constants.IDISK_MODE])
7793 disks.append(disk_dev)
7794 elif template_name == constants.DT_DRBD8:
7795 if len(secondary_nodes) != 1:
7796 raise errors.ProgrammerError("Wrong template configuration")
7797 remote_node = secondary_nodes[0]
7798 minors = lu.cfg.AllocateDRBDMinor(
7799 [primary_node, remote_node] * len(disk_info), instance_name)
7802 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7803 for i in range(disk_count)]):
7804 names.append(lv_prefix + "_data")
7805 names.append(lv_prefix + "_meta")
7806 for idx, disk in enumerate(disk_info):
7807 disk_index = idx + base_index
7808 data_vg = disk.get(constants.IDISK_VG, vgname)
7809 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7810 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7811 disk[constants.IDISK_SIZE],
7813 names[idx * 2:idx * 2 + 2],
7814 "disk/%d" % disk_index,
7815 minors[idx * 2], minors[idx * 2 + 1])
7816 disk_dev.mode = disk[constants.IDISK_MODE]
7817 disks.append(disk_dev)
7818 elif template_name == constants.DT_FILE:
7819 if len(secondary_nodes) != 0:
7820 raise errors.ProgrammerError("Wrong template configuration")
7822 opcodes.RequireFileStorage()
7824 for idx, disk in enumerate(disk_info):
7825 disk_index = idx + base_index
7826 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7827 size=disk[constants.IDISK_SIZE],
7828 iv_name="disk/%d" % disk_index,
7829 logical_id=(file_driver,
7830 "%s/disk%d" % (file_storage_dir,
7832 mode=disk[constants.IDISK_MODE])
7833 disks.append(disk_dev)
7834 elif template_name == constants.DT_SHARED_FILE:
7835 if len(secondary_nodes) != 0:
7836 raise errors.ProgrammerError("Wrong template configuration")
7838 opcodes.RequireSharedFileStorage()
7840 for idx, disk in enumerate(disk_info):
7841 disk_index = idx + base_index
7842 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7843 size=disk[constants.IDISK_SIZE],
7844 iv_name="disk/%d" % disk_index,
7845 logical_id=(file_driver,
7846 "%s/disk%d" % (file_storage_dir,
7848 mode=disk[constants.IDISK_MODE])
7849 disks.append(disk_dev)
7850 elif template_name == constants.DT_BLOCK:
7851 if len(secondary_nodes) != 0:
7852 raise errors.ProgrammerError("Wrong template configuration")
7854 for idx, disk in enumerate(disk_info):
7855 disk_index = idx + base_index
7856 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7857 size=disk[constants.IDISK_SIZE],
7858 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7859 disk[constants.IDISK_ADOPT]),
7860 iv_name="disk/%d" % disk_index,
7861 mode=disk[constants.IDISK_MODE])
7862 disks.append(disk_dev)
7865 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7869 def _GetInstanceInfoText(instance):
7870 """Compute that text that should be added to the disk's metadata.
7873 return "originstname+%s" % instance.name
7876 def _CalcEta(time_taken, written, total_size):
7877 """Calculates the ETA based on size written and total size.
7879 @param time_taken: The time taken so far
7880 @param written: amount written so far
7881 @param total_size: The total size of data to be written
7882 @return: The remaining time in seconds
7885 avg_time = time_taken / float(written)
7886 return (total_size - written) * avg_time
7889 def _WipeDisks(lu, instance):
7890 """Wipes instance disks.
7892 @type lu: L{LogicalUnit}
7893 @param lu: the logical unit on whose behalf we execute
7894 @type instance: L{objects.Instance}
7895 @param instance: the instance whose disks we should create
7896 @return: the success of the wipe
7899 node = instance.primary_node
7901 for device in instance.disks:
7902 lu.cfg.SetDiskID(device, node)
7904 logging.info("Pause sync of instance %s disks", instance.name)
7905 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7907 for idx, success in enumerate(result.payload):
7909 logging.warn("pause-sync of instance %s for disks %d failed",
7913 for idx, device in enumerate(instance.disks):
7914 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7915 # MAX_WIPE_CHUNK at max
7916 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7917 constants.MIN_WIPE_CHUNK_PERCENT)
7918 # we _must_ make this an int, otherwise rounding errors will
7920 wipe_chunk_size = int(wipe_chunk_size)
7922 lu.LogInfo("* Wiping disk %d", idx)
7923 logging.info("Wiping disk %d for instance %s, node %s using"
7924 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7929 start_time = time.time()
7931 while offset < size:
7932 wipe_size = min(wipe_chunk_size, size - offset)
7933 logging.debug("Wiping disk %d, offset %s, chunk %s",
7934 idx, offset, wipe_size)
7935 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7936 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7937 (idx, offset, wipe_size))
7940 if now - last_output >= 60:
7941 eta = _CalcEta(now - start_time, offset, size)
7942 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7943 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7946 logging.info("Resume sync of instance %s disks", instance.name)
7948 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7950 for idx, success in enumerate(result.payload):
7952 lu.LogWarning("Resume sync of disk %d failed, please have a"
7953 " look at the status and troubleshoot the issue", idx)
7954 logging.warn("resume-sync of instance %s for disks %d failed",
7958 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7959 """Create all disks for an instance.
7961 This abstracts away some work from AddInstance.
7963 @type lu: L{LogicalUnit}
7964 @param lu: the logical unit on whose behalf we execute
7965 @type instance: L{objects.Instance}
7966 @param instance: the instance whose disks we should create
7968 @param to_skip: list of indices to skip
7969 @type target_node: string
7970 @param target_node: if passed, overrides the target node for creation
7972 @return: the success of the creation
7975 info = _GetInstanceInfoText(instance)
7976 if target_node is None:
7977 pnode = instance.primary_node
7978 all_nodes = instance.all_nodes
7983 if instance.disk_template in constants.DTS_FILEBASED:
7984 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7985 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7987 result.Raise("Failed to create directory '%s' on"
7988 " node %s" % (file_storage_dir, pnode))
7990 # Note: this needs to be kept in sync with adding of disks in
7991 # LUInstanceSetParams
7992 for idx, device in enumerate(instance.disks):
7993 if to_skip and idx in to_skip:
7995 logging.info("Creating volume %s for instance %s",
7996 device.iv_name, instance.name)
7998 for node in all_nodes:
7999 f_create = node == pnode
8000 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8003 def _RemoveDisks(lu, instance, target_node=None):
8004 """Remove all disks for an instance.
8006 This abstracts away some work from `AddInstance()` and
8007 `RemoveInstance()`. Note that in case some of the devices couldn't
8008 be removed, the removal will continue with the other ones (compare
8009 with `_CreateDisks()`).
8011 @type lu: L{LogicalUnit}
8012 @param lu: the logical unit on whose behalf we execute
8013 @type instance: L{objects.Instance}
8014 @param instance: the instance whose disks we should remove
8015 @type target_node: string
8016 @param target_node: used to override the node on which to remove the disks
8018 @return: the success of the removal
8021 logging.info("Removing block devices for instance %s", instance.name)
8024 for device in instance.disks:
8026 edata = [(target_node, device)]
8028 edata = device.ComputeNodeTree(instance.primary_node)
8029 for node, disk in edata:
8030 lu.cfg.SetDiskID(disk, node)
8031 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8033 lu.LogWarning("Could not remove block device %s on node %s,"
8034 " continuing anyway: %s", device.iv_name, node, msg)
8037 # if this is a DRBD disk, return its port to the pool
8038 if device.dev_type in constants.LDS_DRBD:
8039 tcp_port = device.logical_id[2]
8040 lu.cfg.AddTcpUdpPort(tcp_port)
8042 if instance.disk_template == constants.DT_FILE:
8043 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8047 tgt = instance.primary_node
8048 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8050 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8051 file_storage_dir, instance.primary_node, result.fail_msg)
8057 def _ComputeDiskSizePerVG(disk_template, disks):
8058 """Compute disk size requirements in the volume group
8061 def _compute(disks, payload):
8062 """Universal algorithm.
8067 vgs[disk[constants.IDISK_VG]] = \
8068 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8072 # Required free disk space as a function of disk and swap space
8074 constants.DT_DISKLESS: {},
8075 constants.DT_PLAIN: _compute(disks, 0),
8076 # 128 MB are added for drbd metadata for each disk
8077 constants.DT_DRBD8: _compute(disks, 128),
8078 constants.DT_FILE: {},
8079 constants.DT_SHARED_FILE: {},
8082 if disk_template not in req_size_dict:
8083 raise errors.ProgrammerError("Disk template '%s' size requirement"
8084 " is unknown" % disk_template)
8086 return req_size_dict[disk_template]
8089 def _ComputeDiskSize(disk_template, disks):
8090 """Compute disk size requirements in the volume group
8093 # Required free disk space as a function of disk and swap space
8095 constants.DT_DISKLESS: None,
8096 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8097 # 128 MB are added for drbd metadata for each disk
8098 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8099 constants.DT_FILE: None,
8100 constants.DT_SHARED_FILE: 0,
8101 constants.DT_BLOCK: 0,
8104 if disk_template not in req_size_dict:
8105 raise errors.ProgrammerError("Disk template '%s' size requirement"
8106 " is unknown" % disk_template)
8108 return req_size_dict[disk_template]
8111 def _FilterVmNodes(lu, nodenames):
8112 """Filters out non-vm_capable nodes from a list.
8114 @type lu: L{LogicalUnit}
8115 @param lu: the logical unit for which we check
8116 @type nodenames: list
8117 @param nodenames: the list of nodes on which we should check
8119 @return: the list of vm-capable nodes
8122 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8123 return [name for name in nodenames if name not in vm_nodes]
8126 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8127 """Hypervisor parameter validation.
8129 This function abstract the hypervisor parameter validation to be
8130 used in both instance create and instance modify.
8132 @type lu: L{LogicalUnit}
8133 @param lu: the logical unit for which we check
8134 @type nodenames: list
8135 @param nodenames: the list of nodes on which we should check
8136 @type hvname: string
8137 @param hvname: the name of the hypervisor we should use
8138 @type hvparams: dict
8139 @param hvparams: the parameters which we need to check
8140 @raise errors.OpPrereqError: if the parameters are not valid
8143 nodenames = _FilterVmNodes(lu, nodenames)
8144 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8147 for node in nodenames:
8151 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8154 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8155 """OS parameters validation.
8157 @type lu: L{LogicalUnit}
8158 @param lu: the logical unit for which we check
8159 @type required: boolean
8160 @param required: whether the validation should fail if the OS is not
8162 @type nodenames: list
8163 @param nodenames: the list of nodes on which we should check
8164 @type osname: string
8165 @param osname: the name of the hypervisor we should use
8166 @type osparams: dict
8167 @param osparams: the parameters which we need to check
8168 @raise errors.OpPrereqError: if the parameters are not valid
8171 nodenames = _FilterVmNodes(lu, nodenames)
8172 result = lu.rpc.call_os_validate(required, nodenames, osname,
8173 [constants.OS_VALIDATE_PARAMETERS],
8175 for node, nres in result.items():
8176 # we don't check for offline cases since this should be run only
8177 # against the master node and/or an instance's nodes
8178 nres.Raise("OS Parameters validation failed on node %s" % node)
8179 if not nres.payload:
8180 lu.LogInfo("OS %s not found on node %s, validation skipped",
8184 class LUInstanceCreate(LogicalUnit):
8185 """Create an instance.
8188 HPATH = "instance-add"
8189 HTYPE = constants.HTYPE_INSTANCE
8192 def CheckArguments(self):
8196 # do not require name_check to ease forward/backward compatibility
8198 if self.op.no_install and self.op.start:
8199 self.LogInfo("No-installation mode selected, disabling startup")
8200 self.op.start = False
8201 # validate/normalize the instance name
8202 self.op.instance_name = \
8203 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8205 if self.op.ip_check and not self.op.name_check:
8206 # TODO: make the ip check more flexible and not depend on the name check
8207 raise errors.OpPrereqError("Cannot do IP address check without a name"
8208 " check", errors.ECODE_INVAL)
8210 # check nics' parameter names
8211 for nic in self.op.nics:
8212 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8214 # check disks. parameter names and consistent adopt/no-adopt strategy
8215 has_adopt = has_no_adopt = False
8216 for disk in self.op.disks:
8217 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8218 if constants.IDISK_ADOPT in disk:
8222 if has_adopt and has_no_adopt:
8223 raise errors.OpPrereqError("Either all disks are adopted or none is",
8226 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8227 raise errors.OpPrereqError("Disk adoption is not supported for the"
8228 " '%s' disk template" %
8229 self.op.disk_template,
8231 if self.op.iallocator is not None:
8232 raise errors.OpPrereqError("Disk adoption not allowed with an"
8233 " iallocator script", errors.ECODE_INVAL)
8234 if self.op.mode == constants.INSTANCE_IMPORT:
8235 raise errors.OpPrereqError("Disk adoption not allowed for"
8236 " instance import", errors.ECODE_INVAL)
8238 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8239 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8240 " but no 'adopt' parameter given" %
8241 self.op.disk_template,
8244 self.adopt_disks = has_adopt
8246 # instance name verification
8247 if self.op.name_check:
8248 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8249 self.op.instance_name = self.hostname1.name
8250 # used in CheckPrereq for ip ping check
8251 self.check_ip = self.hostname1.ip
8253 self.check_ip = None
8255 # file storage checks
8256 if (self.op.file_driver and
8257 not self.op.file_driver in constants.FILE_DRIVER):
8258 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8259 self.op.file_driver, errors.ECODE_INVAL)
8261 if self.op.disk_template == constants.DT_FILE:
8262 opcodes.RequireFileStorage()
8263 elif self.op.disk_template == constants.DT_SHARED_FILE:
8264 opcodes.RequireSharedFileStorage()
8266 ### Node/iallocator related checks
8267 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8269 if self.op.pnode is not None:
8270 if self.op.disk_template in constants.DTS_INT_MIRROR:
8271 if self.op.snode is None:
8272 raise errors.OpPrereqError("The networked disk templates need"
8273 " a mirror node", errors.ECODE_INVAL)
8275 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8277 self.op.snode = None
8279 self._cds = _GetClusterDomainSecret()
8281 if self.op.mode == constants.INSTANCE_IMPORT:
8282 # On import force_variant must be True, because if we forced it at
8283 # initial install, our only chance when importing it back is that it
8285 self.op.force_variant = True
8287 if self.op.no_install:
8288 self.LogInfo("No-installation mode has no effect during import")
8290 elif self.op.mode == constants.INSTANCE_CREATE:
8291 if self.op.os_type is None:
8292 raise errors.OpPrereqError("No guest OS specified",
8294 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8295 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8296 " installation" % self.op.os_type,
8298 if self.op.disk_template is None:
8299 raise errors.OpPrereqError("No disk template specified",
8302 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8303 # Check handshake to ensure both clusters have the same domain secret
8304 src_handshake = self.op.source_handshake
8305 if not src_handshake:
8306 raise errors.OpPrereqError("Missing source handshake",
8309 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8312 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8315 # Load and check source CA
8316 self.source_x509_ca_pem = self.op.source_x509_ca
8317 if not self.source_x509_ca_pem:
8318 raise errors.OpPrereqError("Missing source X509 CA",
8322 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8324 except OpenSSL.crypto.Error, err:
8325 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8326 (err, ), errors.ECODE_INVAL)
8328 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8329 if errcode is not None:
8330 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8333 self.source_x509_ca = cert
8335 src_instance_name = self.op.source_instance_name
8336 if not src_instance_name:
8337 raise errors.OpPrereqError("Missing source instance name",
8340 self.source_instance_name = \
8341 netutils.GetHostname(name=src_instance_name).name
8344 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8345 self.op.mode, errors.ECODE_INVAL)
8347 def ExpandNames(self):
8348 """ExpandNames for CreateInstance.
8350 Figure out the right locks for instance creation.
8353 self.needed_locks = {}
8355 instance_name = self.op.instance_name
8356 # this is just a preventive check, but someone might still add this
8357 # instance in the meantime, and creation will fail at lock-add time
8358 if instance_name in self.cfg.GetInstanceList():
8359 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8360 instance_name, errors.ECODE_EXISTS)
8362 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8364 if self.op.iallocator:
8365 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8367 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8368 nodelist = [self.op.pnode]
8369 if self.op.snode is not None:
8370 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8371 nodelist.append(self.op.snode)
8372 self.needed_locks[locking.LEVEL_NODE] = nodelist
8374 # in case of import lock the source node too
8375 if self.op.mode == constants.INSTANCE_IMPORT:
8376 src_node = self.op.src_node
8377 src_path = self.op.src_path
8379 if src_path is None:
8380 self.op.src_path = src_path = self.op.instance_name
8382 if src_node is None:
8383 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8384 self.op.src_node = None
8385 if os.path.isabs(src_path):
8386 raise errors.OpPrereqError("Importing an instance from a path"
8387 " requires a source node option",
8390 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8391 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8392 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8393 if not os.path.isabs(src_path):
8394 self.op.src_path = src_path = \
8395 utils.PathJoin(constants.EXPORT_DIR, src_path)
8397 def _RunAllocator(self):
8398 """Run the allocator based on input opcode.
8401 nics = [n.ToDict() for n in self.nics]
8402 ial = IAllocator(self.cfg, self.rpc,
8403 mode=constants.IALLOCATOR_MODE_ALLOC,
8404 name=self.op.instance_name,
8405 disk_template=self.op.disk_template,
8408 vcpus=self.be_full[constants.BE_VCPUS],
8409 memory=self.be_full[constants.BE_MEMORY],
8412 hypervisor=self.op.hypervisor,
8415 ial.Run(self.op.iallocator)
8418 raise errors.OpPrereqError("Can't compute nodes using"
8419 " iallocator '%s': %s" %
8420 (self.op.iallocator, ial.info),
8422 if len(ial.result) != ial.required_nodes:
8423 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8424 " of nodes (%s), required %s" %
8425 (self.op.iallocator, len(ial.result),
8426 ial.required_nodes), errors.ECODE_FAULT)
8427 self.op.pnode = ial.result[0]
8428 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8429 self.op.instance_name, self.op.iallocator,
8430 utils.CommaJoin(ial.result))
8431 if ial.required_nodes == 2:
8432 self.op.snode = ial.result[1]
8434 def BuildHooksEnv(self):
8437 This runs on master, primary and secondary nodes of the instance.
8441 "ADD_MODE": self.op.mode,
8443 if self.op.mode == constants.INSTANCE_IMPORT:
8444 env["SRC_NODE"] = self.op.src_node
8445 env["SRC_PATH"] = self.op.src_path
8446 env["SRC_IMAGES"] = self.src_images
8448 env.update(_BuildInstanceHookEnv(
8449 name=self.op.instance_name,
8450 primary_node=self.op.pnode,
8451 secondary_nodes=self.secondaries,
8452 status=self.op.start,
8453 os_type=self.op.os_type,
8454 memory=self.be_full[constants.BE_MEMORY],
8455 vcpus=self.be_full[constants.BE_VCPUS],
8456 nics=_NICListToTuple(self, self.nics),
8457 disk_template=self.op.disk_template,
8458 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8459 for d in self.disks],
8462 hypervisor_name=self.op.hypervisor,
8468 def BuildHooksNodes(self):
8469 """Build hooks nodes.
8472 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8475 def _ReadExportInfo(self):
8476 """Reads the export information from disk.
8478 It will override the opcode source node and path with the actual
8479 information, if these two were not specified before.
8481 @return: the export information
8484 assert self.op.mode == constants.INSTANCE_IMPORT
8486 src_node = self.op.src_node
8487 src_path = self.op.src_path
8489 if src_node is None:
8490 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8491 exp_list = self.rpc.call_export_list(locked_nodes)
8493 for node in exp_list:
8494 if exp_list[node].fail_msg:
8496 if src_path in exp_list[node].payload:
8498 self.op.src_node = src_node = node
8499 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8503 raise errors.OpPrereqError("No export found for relative path %s" %
8504 src_path, errors.ECODE_INVAL)
8506 _CheckNodeOnline(self, src_node)
8507 result = self.rpc.call_export_info(src_node, src_path)
8508 result.Raise("No export or invalid export found in dir %s" % src_path)
8510 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8511 if not export_info.has_section(constants.INISECT_EXP):
8512 raise errors.ProgrammerError("Corrupted export config",
8513 errors.ECODE_ENVIRON)
8515 ei_version = export_info.get(constants.INISECT_EXP, "version")
8516 if (int(ei_version) != constants.EXPORT_VERSION):
8517 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8518 (ei_version, constants.EXPORT_VERSION),
8519 errors.ECODE_ENVIRON)
8522 def _ReadExportParams(self, einfo):
8523 """Use export parameters as defaults.
8525 In case the opcode doesn't specify (as in override) some instance
8526 parameters, then try to use them from the export information, if
8530 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8532 if self.op.disk_template is None:
8533 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8534 self.op.disk_template = einfo.get(constants.INISECT_INS,
8537 raise errors.OpPrereqError("No disk template specified and the export"
8538 " is missing the disk_template information",
8541 if not self.op.disks:
8542 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8544 # TODO: import the disk iv_name too
8545 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8546 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8547 disks.append({constants.IDISK_SIZE: disk_sz})
8548 self.op.disks = disks
8550 raise errors.OpPrereqError("No disk info specified and the export"
8551 " is missing the disk information",
8554 if (not self.op.nics and
8555 einfo.has_option(constants.INISECT_INS, "nic_count")):
8557 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8559 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8560 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8565 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8566 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8568 if (self.op.hypervisor is None and
8569 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8570 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8572 if einfo.has_section(constants.INISECT_HYP):
8573 # use the export parameters but do not override the ones
8574 # specified by the user
8575 for name, value in einfo.items(constants.INISECT_HYP):
8576 if name not in self.op.hvparams:
8577 self.op.hvparams[name] = value
8579 if einfo.has_section(constants.INISECT_BEP):
8580 # use the parameters, without overriding
8581 for name, value in einfo.items(constants.INISECT_BEP):
8582 if name not in self.op.beparams:
8583 self.op.beparams[name] = value
8585 # try to read the parameters old style, from the main section
8586 for name in constants.BES_PARAMETERS:
8587 if (name not in self.op.beparams and
8588 einfo.has_option(constants.INISECT_INS, name)):
8589 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8591 if einfo.has_section(constants.INISECT_OSP):
8592 # use the parameters, without overriding
8593 for name, value in einfo.items(constants.INISECT_OSP):
8594 if name not in self.op.osparams:
8595 self.op.osparams[name] = value
8597 def _RevertToDefaults(self, cluster):
8598 """Revert the instance parameters to the default values.
8602 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8603 for name in self.op.hvparams.keys():
8604 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8605 del self.op.hvparams[name]
8607 be_defs = cluster.SimpleFillBE({})
8608 for name in self.op.beparams.keys():
8609 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8610 del self.op.beparams[name]
8612 nic_defs = cluster.SimpleFillNIC({})
8613 for nic in self.op.nics:
8614 for name in constants.NICS_PARAMETERS:
8615 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8618 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8619 for name in self.op.osparams.keys():
8620 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8621 del self.op.osparams[name]
8623 def _CalculateFileStorageDir(self):
8624 """Calculate final instance file storage dir.
8627 # file storage dir calculation/check
8628 self.instance_file_storage_dir = None
8629 if self.op.disk_template in constants.DTS_FILEBASED:
8630 # build the full file storage dir path
8633 if self.op.disk_template == constants.DT_SHARED_FILE:
8634 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8636 get_fsd_fn = self.cfg.GetFileStorageDir
8638 cfg_storagedir = get_fsd_fn()
8639 if not cfg_storagedir:
8640 raise errors.OpPrereqError("Cluster file storage dir not defined")
8641 joinargs.append(cfg_storagedir)
8643 if self.op.file_storage_dir is not None:
8644 joinargs.append(self.op.file_storage_dir)
8646 joinargs.append(self.op.instance_name)
8648 # pylint: disable=W0142
8649 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8651 def CheckPrereq(self):
8652 """Check prerequisites.
8655 self._CalculateFileStorageDir()
8657 if self.op.mode == constants.INSTANCE_IMPORT:
8658 export_info = self._ReadExportInfo()
8659 self._ReadExportParams(export_info)
8661 if (not self.cfg.GetVGName() and
8662 self.op.disk_template not in constants.DTS_NOT_LVM):
8663 raise errors.OpPrereqError("Cluster does not support lvm-based"
8664 " instances", errors.ECODE_STATE)
8666 if self.op.hypervisor is None:
8667 self.op.hypervisor = self.cfg.GetHypervisorType()
8669 cluster = self.cfg.GetClusterInfo()
8670 enabled_hvs = cluster.enabled_hypervisors
8671 if self.op.hypervisor not in enabled_hvs:
8672 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8673 " cluster (%s)" % (self.op.hypervisor,
8674 ",".join(enabled_hvs)),
8677 # Check tag validity
8678 for tag in self.op.tags:
8679 objects.TaggableObject.ValidateTag(tag)
8681 # check hypervisor parameter syntax (locally)
8682 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8683 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8685 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8686 hv_type.CheckParameterSyntax(filled_hvp)
8687 self.hv_full = filled_hvp
8688 # check that we don't specify global parameters on an instance
8689 _CheckGlobalHvParams(self.op.hvparams)
8691 # fill and remember the beparams dict
8692 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8693 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8695 # build os parameters
8696 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8698 # now that hvp/bep are in final format, let's reset to defaults,
8700 if self.op.identify_defaults:
8701 self._RevertToDefaults(cluster)
8705 for idx, nic in enumerate(self.op.nics):
8706 nic_mode_req = nic.get(constants.INIC_MODE, None)
8707 nic_mode = nic_mode_req
8708 if nic_mode is None:
8709 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8711 # in routed mode, for the first nic, the default ip is 'auto'
8712 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8713 default_ip_mode = constants.VALUE_AUTO
8715 default_ip_mode = constants.VALUE_NONE
8717 # ip validity checks
8718 ip = nic.get(constants.INIC_IP, default_ip_mode)
8719 if ip is None or ip.lower() == constants.VALUE_NONE:
8721 elif ip.lower() == constants.VALUE_AUTO:
8722 if not self.op.name_check:
8723 raise errors.OpPrereqError("IP address set to auto but name checks"
8724 " have been skipped",
8726 nic_ip = self.hostname1.ip
8728 if not netutils.IPAddress.IsValid(ip):
8729 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8733 # TODO: check the ip address for uniqueness
8734 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8735 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8738 # MAC address verification
8739 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8740 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8741 mac = utils.NormalizeAndValidateMac(mac)
8744 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8745 except errors.ReservationError:
8746 raise errors.OpPrereqError("MAC address %s already in use"
8747 " in cluster" % mac,
8748 errors.ECODE_NOTUNIQUE)
8750 # Build nic parameters
8751 link = nic.get(constants.INIC_LINK, None)
8754 nicparams[constants.NIC_MODE] = nic_mode_req
8756 nicparams[constants.NIC_LINK] = link
8758 check_params = cluster.SimpleFillNIC(nicparams)
8759 objects.NIC.CheckParameterSyntax(check_params)
8760 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8762 # disk checks/pre-build
8763 default_vg = self.cfg.GetVGName()
8765 for disk in self.op.disks:
8766 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8767 if mode not in constants.DISK_ACCESS_SET:
8768 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8769 mode, errors.ECODE_INVAL)
8770 size = disk.get(constants.IDISK_SIZE, None)
8772 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8775 except (TypeError, ValueError):
8776 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8779 data_vg = disk.get(constants.IDISK_VG, default_vg)
8781 constants.IDISK_SIZE: size,
8782 constants.IDISK_MODE: mode,
8783 constants.IDISK_VG: data_vg,
8784 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8786 if constants.IDISK_ADOPT in disk:
8787 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8788 self.disks.append(new_disk)
8790 if self.op.mode == constants.INSTANCE_IMPORT:
8792 # Check that the new instance doesn't have less disks than the export
8793 instance_disks = len(self.disks)
8794 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8795 if instance_disks < export_disks:
8796 raise errors.OpPrereqError("Not enough disks to import."
8797 " (instance: %d, export: %d)" %
8798 (instance_disks, export_disks),
8802 for idx in range(export_disks):
8803 option = "disk%d_dump" % idx
8804 if export_info.has_option(constants.INISECT_INS, option):
8805 # FIXME: are the old os-es, disk sizes, etc. useful?
8806 export_name = export_info.get(constants.INISECT_INS, option)
8807 image = utils.PathJoin(self.op.src_path, export_name)
8808 disk_images.append(image)
8810 disk_images.append(False)
8812 self.src_images = disk_images
8814 old_name = export_info.get(constants.INISECT_INS, "name")
8816 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8817 except (TypeError, ValueError), err:
8818 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8819 " an integer: %s" % str(err),
8821 if self.op.instance_name == old_name:
8822 for idx, nic in enumerate(self.nics):
8823 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8824 nic_mac_ini = "nic%d_mac" % idx
8825 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8827 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8829 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8830 if self.op.ip_check:
8831 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8832 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8833 (self.check_ip, self.op.instance_name),
8834 errors.ECODE_NOTUNIQUE)
8836 #### mac address generation
8837 # By generating here the mac address both the allocator and the hooks get
8838 # the real final mac address rather than the 'auto' or 'generate' value.
8839 # There is a race condition between the generation and the instance object
8840 # creation, which means that we know the mac is valid now, but we're not
8841 # sure it will be when we actually add the instance. If things go bad
8842 # adding the instance will abort because of a duplicate mac, and the
8843 # creation job will fail.
8844 for nic in self.nics:
8845 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8846 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8850 if self.op.iallocator is not None:
8851 self._RunAllocator()
8853 # Release all unneeded node locks
8854 _ReleaseLocks(self, locking.LEVEL_NODE,
8855 keep=filter(None, [self.op.pnode, self.op.snode,
8858 #### node related checks
8860 # check primary node
8861 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8862 assert self.pnode is not None, \
8863 "Cannot retrieve locked node %s" % self.op.pnode
8865 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8866 pnode.name, errors.ECODE_STATE)
8868 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8869 pnode.name, errors.ECODE_STATE)
8870 if not pnode.vm_capable:
8871 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8872 " '%s'" % pnode.name, errors.ECODE_STATE)
8874 self.secondaries = []
8876 # mirror node verification
8877 if self.op.disk_template in constants.DTS_INT_MIRROR:
8878 if self.op.snode == pnode.name:
8879 raise errors.OpPrereqError("The secondary node cannot be the"
8880 " primary node", errors.ECODE_INVAL)
8881 _CheckNodeOnline(self, self.op.snode)
8882 _CheckNodeNotDrained(self, self.op.snode)
8883 _CheckNodeVmCapable(self, self.op.snode)
8884 self.secondaries.append(self.op.snode)
8886 nodenames = [pnode.name] + self.secondaries
8888 if not self.adopt_disks:
8889 # Check lv size requirements, if not adopting
8890 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8891 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8893 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8894 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8895 disk[constants.IDISK_ADOPT])
8896 for disk in self.disks])
8897 if len(all_lvs) != len(self.disks):
8898 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8900 for lv_name in all_lvs:
8902 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8903 # to ReserveLV uses the same syntax
8904 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8905 except errors.ReservationError:
8906 raise errors.OpPrereqError("LV named %s used by another instance" %
8907 lv_name, errors.ECODE_NOTUNIQUE)
8909 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8910 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8912 node_lvs = self.rpc.call_lv_list([pnode.name],
8913 vg_names.payload.keys())[pnode.name]
8914 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8915 node_lvs = node_lvs.payload
8917 delta = all_lvs.difference(node_lvs.keys())
8919 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8920 utils.CommaJoin(delta),
8922 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8924 raise errors.OpPrereqError("Online logical volumes found, cannot"
8925 " adopt: %s" % utils.CommaJoin(online_lvs),
8927 # update the size of disk based on what is found
8928 for dsk in self.disks:
8929 dsk[constants.IDISK_SIZE] = \
8930 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8931 dsk[constants.IDISK_ADOPT])][0]))
8933 elif self.op.disk_template == constants.DT_BLOCK:
8934 # Normalize and de-duplicate device paths
8935 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8936 for disk in self.disks])
8937 if len(all_disks) != len(self.disks):
8938 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8940 baddisks = [d for d in all_disks
8941 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8943 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8944 " cannot be adopted" %
8945 (", ".join(baddisks),
8946 constants.ADOPTABLE_BLOCKDEV_ROOT),
8949 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8950 list(all_disks))[pnode.name]
8951 node_disks.Raise("Cannot get block device information from node %s" %
8953 node_disks = node_disks.payload
8954 delta = all_disks.difference(node_disks.keys())
8956 raise errors.OpPrereqError("Missing block device(s): %s" %
8957 utils.CommaJoin(delta),
8959 for dsk in self.disks:
8960 dsk[constants.IDISK_SIZE] = \
8961 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8963 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8965 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8966 # check OS parameters (remotely)
8967 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8969 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8971 # memory check on primary node
8973 _CheckNodeFreeMemory(self, self.pnode.name,
8974 "creating instance %s" % self.op.instance_name,
8975 self.be_full[constants.BE_MEMORY],
8978 self.dry_run_result = list(nodenames)
8980 def Exec(self, feedback_fn):
8981 """Create and add the instance to the cluster.
8984 instance = self.op.instance_name
8985 pnode_name = self.pnode.name
8987 ht_kind = self.op.hypervisor
8988 if ht_kind in constants.HTS_REQ_PORT:
8989 network_port = self.cfg.AllocatePort()
8993 disks = _GenerateDiskTemplate(self,
8994 self.op.disk_template,
8995 instance, pnode_name,
8998 self.instance_file_storage_dir,
8999 self.op.file_driver,
9003 iobj = objects.Instance(name=instance, os=self.op.os_type,
9004 primary_node=pnode_name,
9005 nics=self.nics, disks=disks,
9006 disk_template=self.op.disk_template,
9008 network_port=network_port,
9009 beparams=self.op.beparams,
9010 hvparams=self.op.hvparams,
9011 hypervisor=self.op.hypervisor,
9012 osparams=self.op.osparams,
9016 for tag in self.op.tags:
9019 if self.adopt_disks:
9020 if self.op.disk_template == constants.DT_PLAIN:
9021 # rename LVs to the newly-generated names; we need to construct
9022 # 'fake' LV disks with the old data, plus the new unique_id
9023 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9025 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9026 rename_to.append(t_dsk.logical_id)
9027 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9028 self.cfg.SetDiskID(t_dsk, pnode_name)
9029 result = self.rpc.call_blockdev_rename(pnode_name,
9030 zip(tmp_disks, rename_to))
9031 result.Raise("Failed to rename adoped LVs")
9033 feedback_fn("* creating instance disks...")
9035 _CreateDisks(self, iobj)
9036 except errors.OpExecError:
9037 self.LogWarning("Device creation failed, reverting...")
9039 _RemoveDisks(self, iobj)
9041 self.cfg.ReleaseDRBDMinors(instance)
9044 feedback_fn("adding instance %s to cluster config" % instance)
9046 self.cfg.AddInstance(iobj, self.proc.GetECId())
9048 # Declare that we don't want to remove the instance lock anymore, as we've
9049 # added the instance to the config
9050 del self.remove_locks[locking.LEVEL_INSTANCE]
9052 if self.op.mode == constants.INSTANCE_IMPORT:
9053 # Release unused nodes
9054 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9057 _ReleaseLocks(self, locking.LEVEL_NODE)
9060 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9061 feedback_fn("* wiping instance disks...")
9063 _WipeDisks(self, iobj)
9064 except errors.OpExecError, err:
9065 logging.exception("Wiping disks failed")
9066 self.LogWarning("Wiping instance disks failed (%s)", err)
9070 # Something is already wrong with the disks, don't do anything else
9072 elif self.op.wait_for_sync:
9073 disk_abort = not _WaitForSync(self, iobj)
9074 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9075 # make sure the disks are not degraded (still sync-ing is ok)
9076 feedback_fn("* checking mirrors status")
9077 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9082 _RemoveDisks(self, iobj)
9083 self.cfg.RemoveInstance(iobj.name)
9084 # Make sure the instance lock gets removed
9085 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9086 raise errors.OpExecError("There are some degraded disks for"
9089 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9090 if self.op.mode == constants.INSTANCE_CREATE:
9091 if not self.op.no_install:
9092 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9093 not self.op.wait_for_sync)
9095 feedback_fn("* pausing disk sync to install instance OS")
9096 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9098 for idx, success in enumerate(result.payload):
9100 logging.warn("pause-sync of instance %s for disk %d failed",
9103 feedback_fn("* running the instance OS create scripts...")
9104 # FIXME: pass debug option from opcode to backend
9106 self.rpc.call_instance_os_add(pnode_name, iobj, False,
9107 self.op.debug_level)
9109 feedback_fn("* resuming disk sync")
9110 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9112 for idx, success in enumerate(result.payload):
9114 logging.warn("resume-sync of instance %s for disk %d failed",
9117 os_add_result.Raise("Could not add os for instance %s"
9118 " on node %s" % (instance, pnode_name))
9120 elif self.op.mode == constants.INSTANCE_IMPORT:
9121 feedback_fn("* running the instance OS import scripts...")
9125 for idx, image in enumerate(self.src_images):
9129 # FIXME: pass debug option from opcode to backend
9130 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9131 constants.IEIO_FILE, (image, ),
9132 constants.IEIO_SCRIPT,
9133 (iobj.disks[idx], idx),
9135 transfers.append(dt)
9138 masterd.instance.TransferInstanceData(self, feedback_fn,
9139 self.op.src_node, pnode_name,
9140 self.pnode.secondary_ip,
9142 if not compat.all(import_result):
9143 self.LogWarning("Some disks for instance %s on node %s were not"
9144 " imported successfully" % (instance, pnode_name))
9146 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9147 feedback_fn("* preparing remote import...")
9148 # The source cluster will stop the instance before attempting to make a
9149 # connection. In some cases stopping an instance can take a long time,
9150 # hence the shutdown timeout is added to the connection timeout.
9151 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9152 self.op.source_shutdown_timeout)
9153 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9155 assert iobj.primary_node == self.pnode.name
9157 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9158 self.source_x509_ca,
9159 self._cds, timeouts)
9160 if not compat.all(disk_results):
9161 # TODO: Should the instance still be started, even if some disks
9162 # failed to import (valid for local imports, too)?
9163 self.LogWarning("Some disks for instance %s on node %s were not"
9164 " imported successfully" % (instance, pnode_name))
9166 # Run rename script on newly imported instance
9167 assert iobj.name == instance
9168 feedback_fn("Running rename script for %s" % instance)
9169 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9170 self.source_instance_name,
9171 self.op.debug_level)
9173 self.LogWarning("Failed to run rename script for %s on node"
9174 " %s: %s" % (instance, pnode_name, result.fail_msg))
9177 # also checked in the prereq part
9178 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9182 iobj.admin_up = True
9183 self.cfg.Update(iobj, feedback_fn)
9184 logging.info("Starting instance %s on node %s", instance, pnode_name)
9185 feedback_fn("* starting instance...")
9186 result = self.rpc.call_instance_start(pnode_name, iobj,
9188 result.Raise("Could not start instance")
9190 return list(iobj.all_nodes)
9193 class LUInstanceConsole(NoHooksLU):
9194 """Connect to an instance's console.
9196 This is somewhat special in that it returns the command line that
9197 you need to run on the master node in order to connect to the
9203 def ExpandNames(self):
9204 self._ExpandAndLockInstance()
9206 def CheckPrereq(self):
9207 """Check prerequisites.
9209 This checks that the instance is in the cluster.
9212 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9213 assert self.instance is not None, \
9214 "Cannot retrieve locked instance %s" % self.op.instance_name
9215 _CheckNodeOnline(self, self.instance.primary_node)
9217 def Exec(self, feedback_fn):
9218 """Connect to the console of an instance
9221 instance = self.instance
9222 node = instance.primary_node
9224 node_insts = self.rpc.call_instance_list([node],
9225 [instance.hypervisor])[node]
9226 node_insts.Raise("Can't get node information from %s" % node)
9228 if instance.name not in node_insts.payload:
9229 if instance.admin_up:
9230 state = constants.INSTST_ERRORDOWN
9232 state = constants.INSTST_ADMINDOWN
9233 raise errors.OpExecError("Instance %s is not running (state %s)" %
9234 (instance.name, state))
9236 logging.debug("Connecting to console of %s on %s", instance.name, node)
9238 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9241 def _GetInstanceConsole(cluster, instance):
9242 """Returns console information for an instance.
9244 @type cluster: L{objects.Cluster}
9245 @type instance: L{objects.Instance}
9249 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9250 # beparams and hvparams are passed separately, to avoid editing the
9251 # instance and then saving the defaults in the instance itself.
9252 hvparams = cluster.FillHV(instance)
9253 beparams = cluster.FillBE(instance)
9254 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9256 assert console.instance == instance.name
9257 assert console.Validate()
9259 return console.ToDict()
9262 class LUInstanceReplaceDisks(LogicalUnit):
9263 """Replace the disks of an instance.
9266 HPATH = "mirrors-replace"
9267 HTYPE = constants.HTYPE_INSTANCE
9270 def CheckArguments(self):
9271 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9274 def ExpandNames(self):
9275 self._ExpandAndLockInstance()
9277 assert locking.LEVEL_NODE not in self.needed_locks
9278 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9280 assert self.op.iallocator is None or self.op.remote_node is None, \
9281 "Conflicting options"
9283 if self.op.remote_node is not None:
9284 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9286 # Warning: do not remove the locking of the new secondary here
9287 # unless DRBD8.AddChildren is changed to work in parallel;
9288 # currently it doesn't since parallel invocations of
9289 # FindUnusedMinor will conflict
9290 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9291 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9293 self.needed_locks[locking.LEVEL_NODE] = []
9294 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9296 if self.op.iallocator is not None:
9297 # iallocator will select a new node in the same group
9298 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9300 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9301 self.op.iallocator, self.op.remote_node,
9302 self.op.disks, False, self.op.early_release)
9304 self.tasklets = [self.replacer]
9306 def DeclareLocks(self, level):
9307 if level == locking.LEVEL_NODEGROUP:
9308 assert self.op.remote_node is None
9309 assert self.op.iallocator is not None
9310 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9312 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9313 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9314 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9316 elif level == locking.LEVEL_NODE:
9317 if self.op.iallocator is not None:
9318 assert self.op.remote_node is None
9319 assert not self.needed_locks[locking.LEVEL_NODE]
9321 # Lock member nodes of all locked groups
9322 self.needed_locks[locking.LEVEL_NODE] = [node_name
9323 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9324 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9326 self._LockInstancesNodes()
9328 def BuildHooksEnv(self):
9331 This runs on the master, the primary and all the secondaries.
9334 instance = self.replacer.instance
9336 "MODE": self.op.mode,
9337 "NEW_SECONDARY": self.op.remote_node,
9338 "OLD_SECONDARY": instance.secondary_nodes[0],
9340 env.update(_BuildInstanceHookEnvByObject(self, instance))
9343 def BuildHooksNodes(self):
9344 """Build hooks nodes.
9347 instance = self.replacer.instance
9349 self.cfg.GetMasterNode(),
9350 instance.primary_node,
9352 if self.op.remote_node is not None:
9353 nl.append(self.op.remote_node)
9356 def CheckPrereq(self):
9357 """Check prerequisites.
9360 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9361 self.op.iallocator is None)
9363 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9365 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9367 return LogicalUnit.CheckPrereq(self)
9370 class TLReplaceDisks(Tasklet):
9371 """Replaces disks for an instance.
9373 Note: Locking is not within the scope of this class.
9376 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9377 disks, delay_iallocator, early_release):
9378 """Initializes this class.
9381 Tasklet.__init__(self, lu)
9384 self.instance_name = instance_name
9386 self.iallocator_name = iallocator_name
9387 self.remote_node = remote_node
9389 self.delay_iallocator = delay_iallocator
9390 self.early_release = early_release
9393 self.instance = None
9394 self.new_node = None
9395 self.target_node = None
9396 self.other_node = None
9397 self.remote_node_info = None
9398 self.node_secondary_ip = None
9401 def CheckArguments(mode, remote_node, iallocator):
9402 """Helper function for users of this class.
9405 # check for valid parameter combination
9406 if mode == constants.REPLACE_DISK_CHG:
9407 if remote_node is None and iallocator is None:
9408 raise errors.OpPrereqError("When changing the secondary either an"
9409 " iallocator script must be used or the"
9410 " new node given", errors.ECODE_INVAL)
9412 if remote_node is not None and iallocator is not None:
9413 raise errors.OpPrereqError("Give either the iallocator or the new"
9414 " secondary, not both", errors.ECODE_INVAL)
9416 elif remote_node is not None or iallocator is not None:
9417 # Not replacing the secondary
9418 raise errors.OpPrereqError("The iallocator and new node options can"
9419 " only be used when changing the"
9420 " secondary node", errors.ECODE_INVAL)
9423 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9424 """Compute a new secondary node using an IAllocator.
9427 ial = IAllocator(lu.cfg, lu.rpc,
9428 mode=constants.IALLOCATOR_MODE_RELOC,
9430 relocate_from=list(relocate_from))
9432 ial.Run(iallocator_name)
9435 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9436 " %s" % (iallocator_name, ial.info),
9439 if len(ial.result) != ial.required_nodes:
9440 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9441 " of nodes (%s), required %s" %
9443 len(ial.result), ial.required_nodes),
9446 remote_node_name = ial.result[0]
9448 lu.LogInfo("Selected new secondary for instance '%s': %s",
9449 instance_name, remote_node_name)
9451 return remote_node_name
9453 def _FindFaultyDisks(self, node_name):
9454 """Wrapper for L{_FindFaultyInstanceDisks}.
9457 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9460 def _CheckDisksActivated(self, instance):
9461 """Checks if the instance disks are activated.
9463 @param instance: The instance to check disks
9464 @return: True if they are activated, False otherwise
9467 nodes = instance.all_nodes
9469 for idx, dev in enumerate(instance.disks):
9471 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9472 self.cfg.SetDiskID(dev, node)
9474 result = self.rpc.call_blockdev_find(node, dev)
9478 elif result.fail_msg or not result.payload:
9483 def CheckPrereq(self):
9484 """Check prerequisites.
9486 This checks that the instance is in the cluster.
9489 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9490 assert instance is not None, \
9491 "Cannot retrieve locked instance %s" % self.instance_name
9493 if instance.disk_template != constants.DT_DRBD8:
9494 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9495 " instances", errors.ECODE_INVAL)
9497 if len(instance.secondary_nodes) != 1:
9498 raise errors.OpPrereqError("The instance has a strange layout,"
9499 " expected one secondary but found %d" %
9500 len(instance.secondary_nodes),
9503 if not self.delay_iallocator:
9504 self._CheckPrereq2()
9506 def _CheckPrereq2(self):
9507 """Check prerequisites, second part.
9509 This function should always be part of CheckPrereq. It was separated and is
9510 now called from Exec because during node evacuation iallocator was only
9511 called with an unmodified cluster model, not taking planned changes into
9515 instance = self.instance
9516 secondary_node = instance.secondary_nodes[0]
9518 if self.iallocator_name is None:
9519 remote_node = self.remote_node
9521 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9522 instance.name, instance.secondary_nodes)
9524 if remote_node is None:
9525 self.remote_node_info = None
9527 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9528 "Remote node '%s' is not locked" % remote_node
9530 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9531 assert self.remote_node_info is not None, \
9532 "Cannot retrieve locked node %s" % remote_node
9534 if remote_node == self.instance.primary_node:
9535 raise errors.OpPrereqError("The specified node is the primary node of"
9536 " the instance", errors.ECODE_INVAL)
9538 if remote_node == secondary_node:
9539 raise errors.OpPrereqError("The specified node is already the"
9540 " secondary node of the instance",
9543 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9544 constants.REPLACE_DISK_CHG):
9545 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9548 if self.mode == constants.REPLACE_DISK_AUTO:
9549 if not self._CheckDisksActivated(instance):
9550 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9551 " first" % self.instance_name,
9553 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9554 faulty_secondary = self._FindFaultyDisks(secondary_node)
9556 if faulty_primary and faulty_secondary:
9557 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9558 " one node and can not be repaired"
9559 " automatically" % self.instance_name,
9563 self.disks = faulty_primary
9564 self.target_node = instance.primary_node
9565 self.other_node = secondary_node
9566 check_nodes = [self.target_node, self.other_node]
9567 elif faulty_secondary:
9568 self.disks = faulty_secondary
9569 self.target_node = secondary_node
9570 self.other_node = instance.primary_node
9571 check_nodes = [self.target_node, self.other_node]
9577 # Non-automatic modes
9578 if self.mode == constants.REPLACE_DISK_PRI:
9579 self.target_node = instance.primary_node
9580 self.other_node = secondary_node
9581 check_nodes = [self.target_node, self.other_node]
9583 elif self.mode == constants.REPLACE_DISK_SEC:
9584 self.target_node = secondary_node
9585 self.other_node = instance.primary_node
9586 check_nodes = [self.target_node, self.other_node]
9588 elif self.mode == constants.REPLACE_DISK_CHG:
9589 self.new_node = remote_node
9590 self.other_node = instance.primary_node
9591 self.target_node = secondary_node
9592 check_nodes = [self.new_node, self.other_node]
9594 _CheckNodeNotDrained(self.lu, remote_node)
9595 _CheckNodeVmCapable(self.lu, remote_node)
9597 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9598 assert old_node_info is not None
9599 if old_node_info.offline and not self.early_release:
9600 # doesn't make sense to delay the release
9601 self.early_release = True
9602 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9603 " early-release mode", secondary_node)
9606 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9609 # If not specified all disks should be replaced
9611 self.disks = range(len(self.instance.disks))
9613 for node in check_nodes:
9614 _CheckNodeOnline(self.lu, node)
9616 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9619 if node_name is not None)
9621 # Release unneeded node locks
9622 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9624 # Release any owned node group
9625 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9626 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9628 # Check whether disks are valid
9629 for disk_idx in self.disks:
9630 instance.FindDisk(disk_idx)
9632 # Get secondary node IP addresses
9633 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9634 in self.cfg.GetMultiNodeInfo(touched_nodes))
9636 def Exec(self, feedback_fn):
9637 """Execute disk replacement.
9639 This dispatches the disk replacement to the appropriate handler.
9642 if self.delay_iallocator:
9643 self._CheckPrereq2()
9646 # Verify owned locks before starting operation
9647 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9648 assert set(owned_nodes) == set(self.node_secondary_ip), \
9649 ("Incorrect node locks, owning %s, expected %s" %
9650 (owned_nodes, self.node_secondary_ip.keys()))
9652 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9653 assert list(owned_instances) == [self.instance_name], \
9654 "Instance '%s' not locked" % self.instance_name
9656 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9657 "Should not own any node group lock at this point"
9660 feedback_fn("No disks need replacement")
9663 feedback_fn("Replacing disk(s) %s for %s" %
9664 (utils.CommaJoin(self.disks), self.instance.name))
9666 activate_disks = (not self.instance.admin_up)
9668 # Activate the instance disks if we're replacing them on a down instance
9670 _StartInstanceDisks(self.lu, self.instance, True)
9673 # Should we replace the secondary node?
9674 if self.new_node is not None:
9675 fn = self._ExecDrbd8Secondary
9677 fn = self._ExecDrbd8DiskOnly
9679 result = fn(feedback_fn)
9681 # Deactivate the instance disks if we're replacing them on a
9684 _SafeShutdownInstanceDisks(self.lu, self.instance)
9687 # Verify owned locks
9688 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9689 nodes = frozenset(self.node_secondary_ip)
9690 assert ((self.early_release and not owned_nodes) or
9691 (not self.early_release and not (set(owned_nodes) - nodes))), \
9692 ("Not owning the correct locks, early_release=%s, owned=%r,"
9693 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9697 def _CheckVolumeGroup(self, nodes):
9698 self.lu.LogInfo("Checking volume groups")
9700 vgname = self.cfg.GetVGName()
9702 # Make sure volume group exists on all involved nodes
9703 results = self.rpc.call_vg_list(nodes)
9705 raise errors.OpExecError("Can't list volume groups on the nodes")
9709 res.Raise("Error checking node %s" % node)
9710 if vgname not in res.payload:
9711 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9714 def _CheckDisksExistence(self, nodes):
9715 # Check disk existence
9716 for idx, dev in enumerate(self.instance.disks):
9717 if idx not in self.disks:
9721 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9722 self.cfg.SetDiskID(dev, node)
9724 result = self.rpc.call_blockdev_find(node, dev)
9726 msg = result.fail_msg
9727 if msg or not result.payload:
9729 msg = "disk not found"
9730 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9733 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9734 for idx, dev in enumerate(self.instance.disks):
9735 if idx not in self.disks:
9738 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9741 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9743 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9744 " replace disks for instance %s" %
9745 (node_name, self.instance.name))
9747 def _CreateNewStorage(self, node_name):
9748 """Create new storage on the primary or secondary node.
9750 This is only used for same-node replaces, not for changing the
9751 secondary node, hence we don't want to modify the existing disk.
9756 for idx, dev in enumerate(self.instance.disks):
9757 if idx not in self.disks:
9760 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9762 self.cfg.SetDiskID(dev, node_name)
9764 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9765 names = _GenerateUniqueNames(self.lu, lv_names)
9767 vg_data = dev.children[0].logical_id[0]
9768 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9769 logical_id=(vg_data, names[0]))
9770 vg_meta = dev.children[1].logical_id[0]
9771 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9772 logical_id=(vg_meta, names[1]))
9774 new_lvs = [lv_data, lv_meta]
9775 old_lvs = [child.Copy() for child in dev.children]
9776 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9778 # we pass force_create=True to force the LVM creation
9779 for new_lv in new_lvs:
9780 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9781 _GetInstanceInfoText(self.instance), False)
9785 def _CheckDevices(self, node_name, iv_names):
9786 for name, (dev, _, _) in iv_names.iteritems():
9787 self.cfg.SetDiskID(dev, node_name)
9789 result = self.rpc.call_blockdev_find(node_name, dev)
9791 msg = result.fail_msg
9792 if msg or not result.payload:
9794 msg = "disk not found"
9795 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9798 if result.payload.is_degraded:
9799 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9801 def _RemoveOldStorage(self, node_name, iv_names):
9802 for name, (_, old_lvs, _) in iv_names.iteritems():
9803 self.lu.LogInfo("Remove logical volumes for %s" % name)
9806 self.cfg.SetDiskID(lv, node_name)
9808 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9810 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9811 hint="remove unused LVs manually")
9813 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9814 """Replace a disk on the primary or secondary for DRBD 8.
9816 The algorithm for replace is quite complicated:
9818 1. for each disk to be replaced:
9820 1. create new LVs on the target node with unique names
9821 1. detach old LVs from the drbd device
9822 1. rename old LVs to name_replaced.<time_t>
9823 1. rename new LVs to old LVs
9824 1. attach the new LVs (with the old names now) to the drbd device
9826 1. wait for sync across all devices
9828 1. for each modified disk:
9830 1. remove old LVs (which have the name name_replaces.<time_t>)
9832 Failures are not very well handled.
9837 # Step: check device activation
9838 self.lu.LogStep(1, steps_total, "Check device existence")
9839 self._CheckDisksExistence([self.other_node, self.target_node])
9840 self._CheckVolumeGroup([self.target_node, self.other_node])
9842 # Step: check other node consistency
9843 self.lu.LogStep(2, steps_total, "Check peer consistency")
9844 self._CheckDisksConsistency(self.other_node,
9845 self.other_node == self.instance.primary_node,
9848 # Step: create new storage
9849 self.lu.LogStep(3, steps_total, "Allocate new storage")
9850 iv_names = self._CreateNewStorage(self.target_node)
9852 # Step: for each lv, detach+rename*2+attach
9853 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9854 for dev, old_lvs, new_lvs in iv_names.itervalues():
9855 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9857 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9859 result.Raise("Can't detach drbd from local storage on node"
9860 " %s for device %s" % (self.target_node, dev.iv_name))
9862 #cfg.Update(instance)
9864 # ok, we created the new LVs, so now we know we have the needed
9865 # storage; as such, we proceed on the target node to rename
9866 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9867 # using the assumption that logical_id == physical_id (which in
9868 # turn is the unique_id on that node)
9870 # FIXME(iustin): use a better name for the replaced LVs
9871 temp_suffix = int(time.time())
9872 ren_fn = lambda d, suff: (d.physical_id[0],
9873 d.physical_id[1] + "_replaced-%s" % suff)
9875 # Build the rename list based on what LVs exist on the node
9876 rename_old_to_new = []
9877 for to_ren in old_lvs:
9878 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9879 if not result.fail_msg and result.payload:
9881 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9883 self.lu.LogInfo("Renaming the old LVs on the target node")
9884 result = self.rpc.call_blockdev_rename(self.target_node,
9886 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9888 # Now we rename the new LVs to the old LVs
9889 self.lu.LogInfo("Renaming the new LVs on the target node")
9890 rename_new_to_old = [(new, old.physical_id)
9891 for old, new in zip(old_lvs, new_lvs)]
9892 result = self.rpc.call_blockdev_rename(self.target_node,
9894 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9896 # Intermediate steps of in memory modifications
9897 for old, new in zip(old_lvs, new_lvs):
9898 new.logical_id = old.logical_id
9899 self.cfg.SetDiskID(new, self.target_node)
9901 # We need to modify old_lvs so that removal later removes the
9902 # right LVs, not the newly added ones; note that old_lvs is a
9904 for disk in old_lvs:
9905 disk.logical_id = ren_fn(disk, temp_suffix)
9906 self.cfg.SetDiskID(disk, self.target_node)
9908 # Now that the new lvs have the old name, we can add them to the device
9909 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9910 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9912 msg = result.fail_msg
9914 for new_lv in new_lvs:
9915 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9918 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9919 hint=("cleanup manually the unused logical"
9921 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9924 if self.early_release:
9925 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9927 self._RemoveOldStorage(self.target_node, iv_names)
9928 # WARNING: we release both node locks here, do not do other RPCs
9929 # than WaitForSync to the primary node
9930 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9931 names=[self.target_node, self.other_node])
9934 # This can fail as the old devices are degraded and _WaitForSync
9935 # does a combined result over all disks, so we don't check its return value
9936 self.lu.LogStep(cstep, steps_total, "Sync devices")
9938 _WaitForSync(self.lu, self.instance)
9940 # Check all devices manually
9941 self._CheckDevices(self.instance.primary_node, iv_names)
9943 # Step: remove old storage
9944 if not self.early_release:
9945 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9947 self._RemoveOldStorage(self.target_node, iv_names)
9949 def _ExecDrbd8Secondary(self, feedback_fn):
9950 """Replace the secondary node for DRBD 8.
9952 The algorithm for replace is quite complicated:
9953 - for all disks of the instance:
9954 - create new LVs on the new node with same names
9955 - shutdown the drbd device on the old secondary
9956 - disconnect the drbd network on the primary
9957 - create the drbd device on the new secondary
9958 - network attach the drbd on the primary, using an artifice:
9959 the drbd code for Attach() will connect to the network if it
9960 finds a device which is connected to the good local disks but
9962 - wait for sync across all devices
9963 - remove all disks from the old secondary
9965 Failures are not very well handled.
9970 pnode = self.instance.primary_node
9972 # Step: check device activation
9973 self.lu.LogStep(1, steps_total, "Check device existence")
9974 self._CheckDisksExistence([self.instance.primary_node])
9975 self._CheckVolumeGroup([self.instance.primary_node])
9977 # Step: check other node consistency
9978 self.lu.LogStep(2, steps_total, "Check peer consistency")
9979 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9981 # Step: create new storage
9982 self.lu.LogStep(3, steps_total, "Allocate new storage")
9983 for idx, dev in enumerate(self.instance.disks):
9984 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9985 (self.new_node, idx))
9986 # we pass force_create=True to force LVM creation
9987 for new_lv in dev.children:
9988 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9989 _GetInstanceInfoText(self.instance), False)
9991 # Step 4: dbrd minors and drbd setups changes
9992 # after this, we must manually remove the drbd minors on both the
9993 # error and the success paths
9994 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9995 minors = self.cfg.AllocateDRBDMinor([self.new_node
9996 for dev in self.instance.disks],
9998 logging.debug("Allocated minors %r", minors)
10001 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10002 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10003 (self.new_node, idx))
10004 # create new devices on new_node; note that we create two IDs:
10005 # one without port, so the drbd will be activated without
10006 # networking information on the new node at this stage, and one
10007 # with network, for the latter activation in step 4
10008 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10009 if self.instance.primary_node == o_node1:
10012 assert self.instance.primary_node == o_node2, "Three-node instance?"
10015 new_alone_id = (self.instance.primary_node, self.new_node, None,
10016 p_minor, new_minor, o_secret)
10017 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10018 p_minor, new_minor, o_secret)
10020 iv_names[idx] = (dev, dev.children, new_net_id)
10021 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10023 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10024 logical_id=new_alone_id,
10025 children=dev.children,
10028 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10029 _GetInstanceInfoText(self.instance), False)
10030 except errors.GenericError:
10031 self.cfg.ReleaseDRBDMinors(self.instance.name)
10034 # We have new devices, shutdown the drbd on the old secondary
10035 for idx, dev in enumerate(self.instance.disks):
10036 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10037 self.cfg.SetDiskID(dev, self.target_node)
10038 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10040 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10041 "node: %s" % (idx, msg),
10042 hint=("Please cleanup this device manually as"
10043 " soon as possible"))
10045 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10046 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10047 self.instance.disks)[pnode]
10049 msg = result.fail_msg
10051 # detaches didn't succeed (unlikely)
10052 self.cfg.ReleaseDRBDMinors(self.instance.name)
10053 raise errors.OpExecError("Can't detach the disks from the network on"
10054 " old node: %s" % (msg,))
10056 # if we managed to detach at least one, we update all the disks of
10057 # the instance to point to the new secondary
10058 self.lu.LogInfo("Updating instance configuration")
10059 for dev, _, new_logical_id in iv_names.itervalues():
10060 dev.logical_id = new_logical_id
10061 self.cfg.SetDiskID(dev, self.instance.primary_node)
10063 self.cfg.Update(self.instance, feedback_fn)
10065 # and now perform the drbd attach
10066 self.lu.LogInfo("Attaching primary drbds to new secondary"
10067 " (standalone => connected)")
10068 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10070 self.node_secondary_ip,
10071 self.instance.disks,
10072 self.instance.name,
10074 for to_node, to_result in result.items():
10075 msg = to_result.fail_msg
10077 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10079 hint=("please do a gnt-instance info to see the"
10080 " status of disks"))
10082 if self.early_release:
10083 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10085 self._RemoveOldStorage(self.target_node, iv_names)
10086 # WARNING: we release all node locks here, do not do other RPCs
10087 # than WaitForSync to the primary node
10088 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10089 names=[self.instance.primary_node,
10094 # This can fail as the old devices are degraded and _WaitForSync
10095 # does a combined result over all disks, so we don't check its return value
10096 self.lu.LogStep(cstep, steps_total, "Sync devices")
10098 _WaitForSync(self.lu, self.instance)
10100 # Check all devices manually
10101 self._CheckDevices(self.instance.primary_node, iv_names)
10103 # Step: remove old storage
10104 if not self.early_release:
10105 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10106 self._RemoveOldStorage(self.target_node, iv_names)
10109 class LURepairNodeStorage(NoHooksLU):
10110 """Repairs the volume group on a node.
10115 def CheckArguments(self):
10116 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10118 storage_type = self.op.storage_type
10120 if (constants.SO_FIX_CONSISTENCY not in
10121 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10122 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10123 " repaired" % storage_type,
10124 errors.ECODE_INVAL)
10126 def ExpandNames(self):
10127 self.needed_locks = {
10128 locking.LEVEL_NODE: [self.op.node_name],
10131 def _CheckFaultyDisks(self, instance, node_name):
10132 """Ensure faulty disks abort the opcode or at least warn."""
10134 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10136 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10137 " node '%s'" % (instance.name, node_name),
10138 errors.ECODE_STATE)
10139 except errors.OpPrereqError, err:
10140 if self.op.ignore_consistency:
10141 self.proc.LogWarning(str(err.args[0]))
10145 def CheckPrereq(self):
10146 """Check prerequisites.
10149 # Check whether any instance on this node has faulty disks
10150 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10151 if not inst.admin_up:
10153 check_nodes = set(inst.all_nodes)
10154 check_nodes.discard(self.op.node_name)
10155 for inst_node_name in check_nodes:
10156 self._CheckFaultyDisks(inst, inst_node_name)
10158 def Exec(self, feedback_fn):
10159 feedback_fn("Repairing storage unit '%s' on %s ..." %
10160 (self.op.name, self.op.node_name))
10162 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10163 result = self.rpc.call_storage_execute(self.op.node_name,
10164 self.op.storage_type, st_args,
10166 constants.SO_FIX_CONSISTENCY)
10167 result.Raise("Failed to repair storage unit '%s' on %s" %
10168 (self.op.name, self.op.node_name))
10171 class LUNodeEvacuate(NoHooksLU):
10172 """Evacuates instances off a list of nodes.
10177 _MODE2IALLOCATOR = {
10178 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10179 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10180 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10182 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10183 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10184 constants.IALLOCATOR_NEVAC_MODES)
10186 def CheckArguments(self):
10187 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10189 def ExpandNames(self):
10190 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10192 if self.op.remote_node is not None:
10193 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10194 assert self.op.remote_node
10196 if self.op.remote_node == self.op.node_name:
10197 raise errors.OpPrereqError("Can not use evacuated node as a new"
10198 " secondary node", errors.ECODE_INVAL)
10200 if self.op.mode != constants.NODE_EVAC_SEC:
10201 raise errors.OpPrereqError("Without the use of an iallocator only"
10202 " secondary instances can be evacuated",
10203 errors.ECODE_INVAL)
10206 self.share_locks = _ShareAll()
10207 self.needed_locks = {
10208 locking.LEVEL_INSTANCE: [],
10209 locking.LEVEL_NODEGROUP: [],
10210 locking.LEVEL_NODE: [],
10213 # Determine nodes (via group) optimistically, needs verification once locks
10214 # have been acquired
10215 self.lock_nodes = self._DetermineNodes()
10217 def _DetermineNodes(self):
10218 """Gets the list of nodes to operate on.
10221 if self.op.remote_node is None:
10222 # Iallocator will choose any node(s) in the same group
10223 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10225 group_nodes = frozenset([self.op.remote_node])
10227 # Determine nodes to be locked
10228 return set([self.op.node_name]) | group_nodes
10230 def _DetermineInstances(self):
10231 """Builds list of instances to operate on.
10234 assert self.op.mode in constants.NODE_EVAC_MODES
10236 if self.op.mode == constants.NODE_EVAC_PRI:
10237 # Primary instances only
10238 inst_fn = _GetNodePrimaryInstances
10239 assert self.op.remote_node is None, \
10240 "Evacuating primary instances requires iallocator"
10241 elif self.op.mode == constants.NODE_EVAC_SEC:
10242 # Secondary instances only
10243 inst_fn = _GetNodeSecondaryInstances
10246 assert self.op.mode == constants.NODE_EVAC_ALL
10247 inst_fn = _GetNodeInstances
10248 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10250 raise errors.OpPrereqError("Due to an issue with the iallocator"
10251 " interface it is not possible to evacuate"
10252 " all instances at once; specify explicitly"
10253 " whether to evacuate primary or secondary"
10255 errors.ECODE_INVAL)
10257 return inst_fn(self.cfg, self.op.node_name)
10259 def DeclareLocks(self, level):
10260 if level == locking.LEVEL_INSTANCE:
10261 # Lock instances optimistically, needs verification once node and group
10262 # locks have been acquired
10263 self.needed_locks[locking.LEVEL_INSTANCE] = \
10264 set(i.name for i in self._DetermineInstances())
10266 elif level == locking.LEVEL_NODEGROUP:
10267 # Lock node groups for all potential target nodes optimistically, needs
10268 # verification once nodes have been acquired
10269 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10270 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10272 elif level == locking.LEVEL_NODE:
10273 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10275 def CheckPrereq(self):
10277 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10278 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10279 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10281 need_nodes = self._DetermineNodes()
10283 if not owned_nodes.issuperset(need_nodes):
10284 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10285 " locks were acquired, current nodes are"
10286 " are '%s', used to be '%s'; retry the"
10288 (self.op.node_name,
10289 utils.CommaJoin(need_nodes),
10290 utils.CommaJoin(owned_nodes)),
10291 errors.ECODE_STATE)
10293 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10294 if owned_groups != wanted_groups:
10295 raise errors.OpExecError("Node groups changed since locks were acquired,"
10296 " current groups are '%s', used to be '%s';"
10297 " retry the operation" %
10298 (utils.CommaJoin(wanted_groups),
10299 utils.CommaJoin(owned_groups)))
10301 # Determine affected instances
10302 self.instances = self._DetermineInstances()
10303 self.instance_names = [i.name for i in self.instances]
10305 if set(self.instance_names) != owned_instances:
10306 raise errors.OpExecError("Instances on node '%s' changed since locks"
10307 " were acquired, current instances are '%s',"
10308 " used to be '%s'; retry the operation" %
10309 (self.op.node_name,
10310 utils.CommaJoin(self.instance_names),
10311 utils.CommaJoin(owned_instances)))
10313 if self.instance_names:
10314 self.LogInfo("Evacuating instances from node '%s': %s",
10316 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10318 self.LogInfo("No instances to evacuate from node '%s'",
10321 if self.op.remote_node is not None:
10322 for i in self.instances:
10323 if i.primary_node == self.op.remote_node:
10324 raise errors.OpPrereqError("Node %s is the primary node of"
10325 " instance %s, cannot use it as"
10327 (self.op.remote_node, i.name),
10328 errors.ECODE_INVAL)
10330 def Exec(self, feedback_fn):
10331 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10333 if not self.instance_names:
10334 # No instances to evacuate
10337 elif self.op.iallocator is not None:
10338 # TODO: Implement relocation to other group
10339 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10340 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10341 instances=list(self.instance_names))
10343 ial.Run(self.op.iallocator)
10345 if not ial.success:
10346 raise errors.OpPrereqError("Can't compute node evacuation using"
10347 " iallocator '%s': %s" %
10348 (self.op.iallocator, ial.info),
10349 errors.ECODE_NORES)
10351 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10353 elif self.op.remote_node is not None:
10354 assert self.op.mode == constants.NODE_EVAC_SEC
10356 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10357 remote_node=self.op.remote_node,
10359 mode=constants.REPLACE_DISK_CHG,
10360 early_release=self.op.early_release)]
10361 for instance_name in self.instance_names
10365 raise errors.ProgrammerError("No iallocator or remote node")
10367 return ResultWithJobs(jobs)
10370 def _SetOpEarlyRelease(early_release, op):
10371 """Sets C{early_release} flag on opcodes if available.
10375 op.early_release = early_release
10376 except AttributeError:
10377 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10382 def _NodeEvacDest(use_nodes, group, nodes):
10383 """Returns group or nodes depending on caller's choice.
10387 return utils.CommaJoin(nodes)
10392 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10393 """Unpacks the result of change-group and node-evacuate iallocator requests.
10395 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10396 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10398 @type lu: L{LogicalUnit}
10399 @param lu: Logical unit instance
10400 @type alloc_result: tuple/list
10401 @param alloc_result: Result from iallocator
10402 @type early_release: bool
10403 @param early_release: Whether to release locks early if possible
10404 @type use_nodes: bool
10405 @param use_nodes: Whether to display node names instead of groups
10408 (moved, failed, jobs) = alloc_result
10411 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10412 for (name, reason) in failed)
10413 lu.LogWarning("Unable to evacuate instances %s", failreason)
10414 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10417 lu.LogInfo("Instances to be moved: %s",
10418 utils.CommaJoin("%s (to %s)" %
10419 (name, _NodeEvacDest(use_nodes, group, nodes))
10420 for (name, group, nodes) in moved))
10422 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10423 map(opcodes.OpCode.LoadOpCode, ops))
10427 class LUInstanceGrowDisk(LogicalUnit):
10428 """Grow a disk of an instance.
10431 HPATH = "disk-grow"
10432 HTYPE = constants.HTYPE_INSTANCE
10435 def ExpandNames(self):
10436 self._ExpandAndLockInstance()
10437 self.needed_locks[locking.LEVEL_NODE] = []
10438 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10440 def DeclareLocks(self, level):
10441 if level == locking.LEVEL_NODE:
10442 self._LockInstancesNodes()
10444 def BuildHooksEnv(self):
10445 """Build hooks env.
10447 This runs on the master, the primary and all the secondaries.
10451 "DISK": self.op.disk,
10452 "AMOUNT": self.op.amount,
10454 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10457 def BuildHooksNodes(self):
10458 """Build hooks nodes.
10461 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10464 def CheckPrereq(self):
10465 """Check prerequisites.
10467 This checks that the instance is in the cluster.
10470 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10471 assert instance is not None, \
10472 "Cannot retrieve locked instance %s" % self.op.instance_name
10473 nodenames = list(instance.all_nodes)
10474 for node in nodenames:
10475 _CheckNodeOnline(self, node)
10477 self.instance = instance
10479 if instance.disk_template not in constants.DTS_GROWABLE:
10480 raise errors.OpPrereqError("Instance's disk layout does not support"
10481 " growing", errors.ECODE_INVAL)
10483 self.disk = instance.FindDisk(self.op.disk)
10485 if instance.disk_template not in (constants.DT_FILE,
10486 constants.DT_SHARED_FILE):
10487 # TODO: check the free disk space for file, when that feature will be
10489 _CheckNodesFreeDiskPerVG(self, nodenames,
10490 self.disk.ComputeGrowth(self.op.amount))
10492 def Exec(self, feedback_fn):
10493 """Execute disk grow.
10496 instance = self.instance
10499 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10501 raise errors.OpExecError("Cannot activate block device to grow")
10503 # First run all grow ops in dry-run mode
10504 for node in instance.all_nodes:
10505 self.cfg.SetDiskID(disk, node)
10506 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10507 result.Raise("Grow request failed to node %s" % node)
10509 # We know that (as far as we can test) operations across different
10510 # nodes will succeed, time to run it for real
10511 for node in instance.all_nodes:
10512 self.cfg.SetDiskID(disk, node)
10513 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10514 result.Raise("Grow request failed to node %s" % node)
10516 # TODO: Rewrite code to work properly
10517 # DRBD goes into sync mode for a short amount of time after executing the
10518 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10519 # calling "resize" in sync mode fails. Sleeping for a short amount of
10520 # time is a work-around.
10523 disk.RecordGrow(self.op.amount)
10524 self.cfg.Update(instance, feedback_fn)
10525 if self.op.wait_for_sync:
10526 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10528 self.proc.LogWarning("Disk sync-ing has not returned a good"
10529 " status; please check the instance")
10530 if not instance.admin_up:
10531 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10532 elif not instance.admin_up:
10533 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10534 " not supposed to be running because no wait for"
10535 " sync mode was requested")
10538 class LUInstanceQueryData(NoHooksLU):
10539 """Query runtime instance data.
10544 def ExpandNames(self):
10545 self.needed_locks = {}
10547 # Use locking if requested or when non-static information is wanted
10548 if not (self.op.static or self.op.use_locking):
10549 self.LogWarning("Non-static data requested, locks need to be acquired")
10550 self.op.use_locking = True
10552 if self.op.instances or not self.op.use_locking:
10553 # Expand instance names right here
10554 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10556 # Will use acquired locks
10557 self.wanted_names = None
10559 if self.op.use_locking:
10560 self.share_locks = _ShareAll()
10562 if self.wanted_names is None:
10563 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10565 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10567 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10568 self.needed_locks[locking.LEVEL_NODE] = []
10569 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10571 def DeclareLocks(self, level):
10572 if self.op.use_locking:
10573 if level == locking.LEVEL_NODEGROUP:
10574 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10576 # Lock all groups used by instances optimistically; this requires going
10577 # via the node before it's locked, requiring verification later on
10578 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10579 frozenset(group_uuid
10580 for instance_name in owned_instances
10582 self.cfg.GetInstanceNodeGroups(instance_name))
10584 elif level == locking.LEVEL_NODE:
10585 self._LockInstancesNodes()
10587 def CheckPrereq(self):
10588 """Check prerequisites.
10590 This only checks the optional instance list against the existing names.
10593 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10594 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10595 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10597 if self.wanted_names is None:
10598 assert self.op.use_locking, "Locking was not used"
10599 self.wanted_names = owned_instances
10601 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
10603 if self.op.use_locking:
10604 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
10607 assert not (owned_instances or owned_groups or owned_nodes)
10609 self.wanted_instances = instances.values()
10611 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10612 """Returns the status of a block device
10615 if self.op.static or not node:
10618 self.cfg.SetDiskID(dev, node)
10620 result = self.rpc.call_blockdev_find(node, dev)
10624 result.Raise("Can't compute disk status for %s" % instance_name)
10626 status = result.payload
10630 return (status.dev_path, status.major, status.minor,
10631 status.sync_percent, status.estimated_time,
10632 status.is_degraded, status.ldisk_status)
10634 def _ComputeDiskStatus(self, instance, snode, dev):
10635 """Compute block device status.
10638 if dev.dev_type in constants.LDS_DRBD:
10639 # we change the snode then (otherwise we use the one passed in)
10640 if dev.logical_id[0] == instance.primary_node:
10641 snode = dev.logical_id[1]
10643 snode = dev.logical_id[0]
10645 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10646 instance.name, dev)
10647 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10650 dev_children = map(compat.partial(self._ComputeDiskStatus,
10657 "iv_name": dev.iv_name,
10658 "dev_type": dev.dev_type,
10659 "logical_id": dev.logical_id,
10660 "physical_id": dev.physical_id,
10661 "pstatus": dev_pstatus,
10662 "sstatus": dev_sstatus,
10663 "children": dev_children,
10668 def Exec(self, feedback_fn):
10669 """Gather and return data"""
10672 cluster = self.cfg.GetClusterInfo()
10674 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
10675 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
10677 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
10678 for node in nodes.values()))
10680 group2name_fn = lambda uuid: groups[uuid].name
10682 for instance in self.wanted_instances:
10683 pnode = nodes[instance.primary_node]
10685 if self.op.static or pnode.offline:
10686 remote_state = None
10688 self.LogWarning("Primary node %s is marked offline, returning static"
10689 " information only for instance %s" %
10690 (pnode.name, instance.name))
10692 remote_info = self.rpc.call_instance_info(instance.primary_node,
10694 instance.hypervisor)
10695 remote_info.Raise("Error checking node %s" % instance.primary_node)
10696 remote_info = remote_info.payload
10697 if remote_info and "state" in remote_info:
10698 remote_state = "up"
10700 remote_state = "down"
10702 if instance.admin_up:
10703 config_state = "up"
10705 config_state = "down"
10707 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10710 snodes_group_uuids = [nodes[snode_name].group
10711 for snode_name in instance.secondary_nodes]
10713 result[instance.name] = {
10714 "name": instance.name,
10715 "config_state": config_state,
10716 "run_state": remote_state,
10717 "pnode": instance.primary_node,
10718 "pnode_group_uuid": pnode.group,
10719 "pnode_group_name": group2name_fn(pnode.group),
10720 "snodes": instance.secondary_nodes,
10721 "snodes_group_uuids": snodes_group_uuids,
10722 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
10724 # this happens to be the same format used for hooks
10725 "nics": _NICListToTuple(self, instance.nics),
10726 "disk_template": instance.disk_template,
10728 "hypervisor": instance.hypervisor,
10729 "network_port": instance.network_port,
10730 "hv_instance": instance.hvparams,
10731 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10732 "be_instance": instance.beparams,
10733 "be_actual": cluster.FillBE(instance),
10734 "os_instance": instance.osparams,
10735 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10736 "serial_no": instance.serial_no,
10737 "mtime": instance.mtime,
10738 "ctime": instance.ctime,
10739 "uuid": instance.uuid,
10745 class LUInstanceSetParams(LogicalUnit):
10746 """Modifies an instances's parameters.
10749 HPATH = "instance-modify"
10750 HTYPE = constants.HTYPE_INSTANCE
10753 def CheckArguments(self):
10754 if not (self.op.nics or self.op.disks or self.op.disk_template or
10755 self.op.hvparams or self.op.beparams or self.op.os_name):
10756 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10758 if self.op.hvparams:
10759 _CheckGlobalHvParams(self.op.hvparams)
10763 for disk_op, disk_dict in self.op.disks:
10764 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10765 if disk_op == constants.DDM_REMOVE:
10766 disk_addremove += 1
10768 elif disk_op == constants.DDM_ADD:
10769 disk_addremove += 1
10771 if not isinstance(disk_op, int):
10772 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10773 if not isinstance(disk_dict, dict):
10774 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10775 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10777 if disk_op == constants.DDM_ADD:
10778 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10779 if mode not in constants.DISK_ACCESS_SET:
10780 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10781 errors.ECODE_INVAL)
10782 size = disk_dict.get(constants.IDISK_SIZE, None)
10784 raise errors.OpPrereqError("Required disk parameter size missing",
10785 errors.ECODE_INVAL)
10788 except (TypeError, ValueError), err:
10789 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10790 str(err), errors.ECODE_INVAL)
10791 disk_dict[constants.IDISK_SIZE] = size
10793 # modification of disk
10794 if constants.IDISK_SIZE in disk_dict:
10795 raise errors.OpPrereqError("Disk size change not possible, use"
10796 " grow-disk", errors.ECODE_INVAL)
10798 if disk_addremove > 1:
10799 raise errors.OpPrereqError("Only one disk add or remove operation"
10800 " supported at a time", errors.ECODE_INVAL)
10802 if self.op.disks and self.op.disk_template is not None:
10803 raise errors.OpPrereqError("Disk template conversion and other disk"
10804 " changes not supported at the same time",
10805 errors.ECODE_INVAL)
10807 if (self.op.disk_template and
10808 self.op.disk_template in constants.DTS_INT_MIRROR and
10809 self.op.remote_node is None):
10810 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10811 " one requires specifying a secondary node",
10812 errors.ECODE_INVAL)
10816 for nic_op, nic_dict in self.op.nics:
10817 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10818 if nic_op == constants.DDM_REMOVE:
10821 elif nic_op == constants.DDM_ADD:
10824 if not isinstance(nic_op, int):
10825 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10826 if not isinstance(nic_dict, dict):
10827 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10828 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10830 # nic_dict should be a dict
10831 nic_ip = nic_dict.get(constants.INIC_IP, None)
10832 if nic_ip is not None:
10833 if nic_ip.lower() == constants.VALUE_NONE:
10834 nic_dict[constants.INIC_IP] = None
10836 if not netutils.IPAddress.IsValid(nic_ip):
10837 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10838 errors.ECODE_INVAL)
10840 nic_bridge = nic_dict.get("bridge", None)
10841 nic_link = nic_dict.get(constants.INIC_LINK, None)
10842 if nic_bridge and nic_link:
10843 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10844 " at the same time", errors.ECODE_INVAL)
10845 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10846 nic_dict["bridge"] = None
10847 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10848 nic_dict[constants.INIC_LINK] = None
10850 if nic_op == constants.DDM_ADD:
10851 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10852 if nic_mac is None:
10853 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10855 if constants.INIC_MAC in nic_dict:
10856 nic_mac = nic_dict[constants.INIC_MAC]
10857 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10858 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10860 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10861 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10862 " modifying an existing nic",
10863 errors.ECODE_INVAL)
10865 if nic_addremove > 1:
10866 raise errors.OpPrereqError("Only one NIC add or remove operation"
10867 " supported at a time", errors.ECODE_INVAL)
10869 def ExpandNames(self):
10870 self._ExpandAndLockInstance()
10871 self.needed_locks[locking.LEVEL_NODE] = []
10872 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10874 def DeclareLocks(self, level):
10875 if level == locking.LEVEL_NODE:
10876 self._LockInstancesNodes()
10877 if self.op.disk_template and self.op.remote_node:
10878 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10879 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10881 def BuildHooksEnv(self):
10882 """Build hooks env.
10884 This runs on the master, primary and secondaries.
10888 if constants.BE_MEMORY in self.be_new:
10889 args["memory"] = self.be_new[constants.BE_MEMORY]
10890 if constants.BE_VCPUS in self.be_new:
10891 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10892 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10893 # information at all.
10896 nic_override = dict(self.op.nics)
10897 for idx, nic in enumerate(self.instance.nics):
10898 if idx in nic_override:
10899 this_nic_override = nic_override[idx]
10901 this_nic_override = {}
10902 if constants.INIC_IP in this_nic_override:
10903 ip = this_nic_override[constants.INIC_IP]
10906 if constants.INIC_MAC in this_nic_override:
10907 mac = this_nic_override[constants.INIC_MAC]
10910 if idx in self.nic_pnew:
10911 nicparams = self.nic_pnew[idx]
10913 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10914 mode = nicparams[constants.NIC_MODE]
10915 link = nicparams[constants.NIC_LINK]
10916 args["nics"].append((ip, mac, mode, link))
10917 if constants.DDM_ADD in nic_override:
10918 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10919 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10920 nicparams = self.nic_pnew[constants.DDM_ADD]
10921 mode = nicparams[constants.NIC_MODE]
10922 link = nicparams[constants.NIC_LINK]
10923 args["nics"].append((ip, mac, mode, link))
10924 elif constants.DDM_REMOVE in nic_override:
10925 del args["nics"][-1]
10927 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10928 if self.op.disk_template:
10929 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10933 def BuildHooksNodes(self):
10934 """Build hooks nodes.
10937 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10940 def CheckPrereq(self):
10941 """Check prerequisites.
10943 This only checks the instance list against the existing names.
10946 # checking the new params on the primary/secondary nodes
10948 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10949 cluster = self.cluster = self.cfg.GetClusterInfo()
10950 assert self.instance is not None, \
10951 "Cannot retrieve locked instance %s" % self.op.instance_name
10952 pnode = instance.primary_node
10953 nodelist = list(instance.all_nodes)
10956 if self.op.os_name and not self.op.force:
10957 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10958 self.op.force_variant)
10959 instance_os = self.op.os_name
10961 instance_os = instance.os
10963 if self.op.disk_template:
10964 if instance.disk_template == self.op.disk_template:
10965 raise errors.OpPrereqError("Instance already has disk template %s" %
10966 instance.disk_template, errors.ECODE_INVAL)
10968 if (instance.disk_template,
10969 self.op.disk_template) not in self._DISK_CONVERSIONS:
10970 raise errors.OpPrereqError("Unsupported disk template conversion from"
10971 " %s to %s" % (instance.disk_template,
10972 self.op.disk_template),
10973 errors.ECODE_INVAL)
10974 _CheckInstanceDown(self, instance, "cannot change disk template")
10975 if self.op.disk_template in constants.DTS_INT_MIRROR:
10976 if self.op.remote_node == pnode:
10977 raise errors.OpPrereqError("Given new secondary node %s is the same"
10978 " as the primary node of the instance" %
10979 self.op.remote_node, errors.ECODE_STATE)
10980 _CheckNodeOnline(self, self.op.remote_node)
10981 _CheckNodeNotDrained(self, self.op.remote_node)
10982 # FIXME: here we assume that the old instance type is DT_PLAIN
10983 assert instance.disk_template == constants.DT_PLAIN
10984 disks = [{constants.IDISK_SIZE: d.size,
10985 constants.IDISK_VG: d.logical_id[0]}
10986 for d in instance.disks]
10987 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10988 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10990 # hvparams processing
10991 if self.op.hvparams:
10992 hv_type = instance.hypervisor
10993 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10994 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10995 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10998 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10999 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11000 self.hv_new = hv_new # the new actual values
11001 self.hv_inst = i_hvdict # the new dict (without defaults)
11003 self.hv_new = self.hv_inst = {}
11005 # beparams processing
11006 if self.op.beparams:
11007 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11009 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11010 be_new = cluster.SimpleFillBE(i_bedict)
11011 self.be_new = be_new # the new actual values
11012 self.be_inst = i_bedict # the new dict (without defaults)
11014 self.be_new = self.be_inst = {}
11015 be_old = cluster.FillBE(instance)
11017 # osparams processing
11018 if self.op.osparams:
11019 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11020 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11021 self.os_inst = i_osdict # the new dict (without defaults)
11027 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11028 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11029 mem_check_list = [pnode]
11030 if be_new[constants.BE_AUTO_BALANCE]:
11031 # either we changed auto_balance to yes or it was from before
11032 mem_check_list.extend(instance.secondary_nodes)
11033 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11034 instance.hypervisor)
11035 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11036 instance.hypervisor)
11037 pninfo = nodeinfo[pnode]
11038 msg = pninfo.fail_msg
11040 # Assume the primary node is unreachable and go ahead
11041 self.warn.append("Can't get info from primary node %s: %s" %
11043 elif not isinstance(pninfo.payload.get("memory_free", None), int):
11044 self.warn.append("Node data from primary node %s doesn't contain"
11045 " free memory information" % pnode)
11046 elif instance_info.fail_msg:
11047 self.warn.append("Can't get instance runtime information: %s" %
11048 instance_info.fail_msg)
11050 if instance_info.payload:
11051 current_mem = int(instance_info.payload["memory"])
11053 # Assume instance not running
11054 # (there is a slight race condition here, but it's not very probable,
11055 # and we have no other way to check)
11057 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11058 pninfo.payload["memory_free"])
11060 raise errors.OpPrereqError("This change will prevent the instance"
11061 " from starting, due to %d MB of memory"
11062 " missing on its primary node" % miss_mem,
11063 errors.ECODE_NORES)
11065 if be_new[constants.BE_AUTO_BALANCE]:
11066 for node, nres in nodeinfo.items():
11067 if node not in instance.secondary_nodes:
11069 nres.Raise("Can't get info from secondary node %s" % node,
11070 prereq=True, ecode=errors.ECODE_STATE)
11071 if not isinstance(nres.payload.get("memory_free", None), int):
11072 raise errors.OpPrereqError("Secondary node %s didn't return free"
11073 " memory information" % node,
11074 errors.ECODE_STATE)
11075 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11076 raise errors.OpPrereqError("This change will prevent the instance"
11077 " from failover to its secondary node"
11078 " %s, due to not enough memory" % node,
11079 errors.ECODE_STATE)
11083 self.nic_pinst = {}
11084 for nic_op, nic_dict in self.op.nics:
11085 if nic_op == constants.DDM_REMOVE:
11086 if not instance.nics:
11087 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11088 errors.ECODE_INVAL)
11090 if nic_op != constants.DDM_ADD:
11092 if not instance.nics:
11093 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11094 " no NICs" % nic_op,
11095 errors.ECODE_INVAL)
11096 if nic_op < 0 or nic_op >= len(instance.nics):
11097 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11099 (nic_op, len(instance.nics) - 1),
11100 errors.ECODE_INVAL)
11101 old_nic_params = instance.nics[nic_op].nicparams
11102 old_nic_ip = instance.nics[nic_op].ip
11104 old_nic_params = {}
11107 update_params_dict = dict([(key, nic_dict[key])
11108 for key in constants.NICS_PARAMETERS
11109 if key in nic_dict])
11111 if "bridge" in nic_dict:
11112 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11114 new_nic_params = _GetUpdatedParams(old_nic_params,
11115 update_params_dict)
11116 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11117 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11118 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11119 self.nic_pinst[nic_op] = new_nic_params
11120 self.nic_pnew[nic_op] = new_filled_nic_params
11121 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11123 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11124 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11125 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11127 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11129 self.warn.append(msg)
11131 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11132 if new_nic_mode == constants.NIC_MODE_ROUTED:
11133 if constants.INIC_IP in nic_dict:
11134 nic_ip = nic_dict[constants.INIC_IP]
11136 nic_ip = old_nic_ip
11138 raise errors.OpPrereqError("Cannot set the nic ip to None"
11139 " on a routed nic", errors.ECODE_INVAL)
11140 if constants.INIC_MAC in nic_dict:
11141 nic_mac = nic_dict[constants.INIC_MAC]
11142 if nic_mac is None:
11143 raise errors.OpPrereqError("Cannot set the nic mac to None",
11144 errors.ECODE_INVAL)
11145 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11146 # otherwise generate the mac
11147 nic_dict[constants.INIC_MAC] = \
11148 self.cfg.GenerateMAC(self.proc.GetECId())
11150 # or validate/reserve the current one
11152 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11153 except errors.ReservationError:
11154 raise errors.OpPrereqError("MAC address %s already in use"
11155 " in cluster" % nic_mac,
11156 errors.ECODE_NOTUNIQUE)
11159 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11160 raise errors.OpPrereqError("Disk operations not supported for"
11161 " diskless instances",
11162 errors.ECODE_INVAL)
11163 for disk_op, _ in self.op.disks:
11164 if disk_op == constants.DDM_REMOVE:
11165 if len(instance.disks) == 1:
11166 raise errors.OpPrereqError("Cannot remove the last disk of"
11167 " an instance", errors.ECODE_INVAL)
11168 _CheckInstanceDown(self, instance, "cannot remove disks")
11170 if (disk_op == constants.DDM_ADD and
11171 len(instance.disks) >= constants.MAX_DISKS):
11172 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11173 " add more" % constants.MAX_DISKS,
11174 errors.ECODE_STATE)
11175 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11177 if disk_op < 0 or disk_op >= len(instance.disks):
11178 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11180 (disk_op, len(instance.disks)),
11181 errors.ECODE_INVAL)
11185 def _ConvertPlainToDrbd(self, feedback_fn):
11186 """Converts an instance from plain to drbd.
11189 feedback_fn("Converting template to drbd")
11190 instance = self.instance
11191 pnode = instance.primary_node
11192 snode = self.op.remote_node
11194 # create a fake disk info for _GenerateDiskTemplate
11195 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11196 constants.IDISK_VG: d.logical_id[0]}
11197 for d in instance.disks]
11198 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11199 instance.name, pnode, [snode],
11200 disk_info, None, None, 0, feedback_fn)
11201 info = _GetInstanceInfoText(instance)
11202 feedback_fn("Creating aditional volumes...")
11203 # first, create the missing data and meta devices
11204 for disk in new_disks:
11205 # unfortunately this is... not too nice
11206 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11208 for child in disk.children:
11209 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11210 # at this stage, all new LVs have been created, we can rename the
11212 feedback_fn("Renaming original volumes...")
11213 rename_list = [(o, n.children[0].logical_id)
11214 for (o, n) in zip(instance.disks, new_disks)]
11215 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11216 result.Raise("Failed to rename original LVs")
11218 feedback_fn("Initializing DRBD devices...")
11219 # all child devices are in place, we can now create the DRBD devices
11220 for disk in new_disks:
11221 for node in [pnode, snode]:
11222 f_create = node == pnode
11223 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11225 # at this point, the instance has been modified
11226 instance.disk_template = constants.DT_DRBD8
11227 instance.disks = new_disks
11228 self.cfg.Update(instance, feedback_fn)
11230 # disks are created, waiting for sync
11231 disk_abort = not _WaitForSync(self, instance,
11232 oneshot=not self.op.wait_for_sync)
11234 raise errors.OpExecError("There are some degraded disks for"
11235 " this instance, please cleanup manually")
11237 def _ConvertDrbdToPlain(self, feedback_fn):
11238 """Converts an instance from drbd to plain.
11241 instance = self.instance
11242 assert len(instance.secondary_nodes) == 1
11243 pnode = instance.primary_node
11244 snode = instance.secondary_nodes[0]
11245 feedback_fn("Converting template to plain")
11247 old_disks = instance.disks
11248 new_disks = [d.children[0] for d in old_disks]
11250 # copy over size and mode
11251 for parent, child in zip(old_disks, new_disks):
11252 child.size = parent.size
11253 child.mode = parent.mode
11255 # update instance structure
11256 instance.disks = new_disks
11257 instance.disk_template = constants.DT_PLAIN
11258 self.cfg.Update(instance, feedback_fn)
11260 feedback_fn("Removing volumes on the secondary node...")
11261 for disk in old_disks:
11262 self.cfg.SetDiskID(disk, snode)
11263 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11265 self.LogWarning("Could not remove block device %s on node %s,"
11266 " continuing anyway: %s", disk.iv_name, snode, msg)
11268 feedback_fn("Removing unneeded volumes on the primary node...")
11269 for idx, disk in enumerate(old_disks):
11270 meta = disk.children[1]
11271 self.cfg.SetDiskID(meta, pnode)
11272 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11274 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11275 " continuing anyway: %s", idx, pnode, msg)
11277 # this is a DRBD disk, return its port to the pool
11278 for disk in old_disks:
11279 tcp_port = disk.logical_id[2]
11280 self.cfg.AddTcpUdpPort(tcp_port)
11282 def Exec(self, feedback_fn):
11283 """Modifies an instance.
11285 All parameters take effect only at the next restart of the instance.
11288 # Process here the warnings from CheckPrereq, as we don't have a
11289 # feedback_fn there.
11290 for warn in self.warn:
11291 feedback_fn("WARNING: %s" % warn)
11294 instance = self.instance
11296 for disk_op, disk_dict in self.op.disks:
11297 if disk_op == constants.DDM_REMOVE:
11298 # remove the last disk
11299 device = instance.disks.pop()
11300 device_idx = len(instance.disks)
11301 for node, disk in device.ComputeNodeTree(instance.primary_node):
11302 self.cfg.SetDiskID(disk, node)
11303 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11305 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11306 " continuing anyway", device_idx, node, msg)
11307 result.append(("disk/%d" % device_idx, "remove"))
11309 # if this is a DRBD disk, return its port to the pool
11310 if device.dev_type in constants.LDS_DRBD:
11311 tcp_port = device.logical_id[2]
11312 self.cfg.AddTcpUdpPort(tcp_port)
11313 elif disk_op == constants.DDM_ADD:
11315 if instance.disk_template in (constants.DT_FILE,
11316 constants.DT_SHARED_FILE):
11317 file_driver, file_path = instance.disks[0].logical_id
11318 file_path = os.path.dirname(file_path)
11320 file_driver = file_path = None
11321 disk_idx_base = len(instance.disks)
11322 new_disk = _GenerateDiskTemplate(self,
11323 instance.disk_template,
11324 instance.name, instance.primary_node,
11325 instance.secondary_nodes,
11329 disk_idx_base, feedback_fn)[0]
11330 instance.disks.append(new_disk)
11331 info = _GetInstanceInfoText(instance)
11333 logging.info("Creating volume %s for instance %s",
11334 new_disk.iv_name, instance.name)
11335 # Note: this needs to be kept in sync with _CreateDisks
11337 for node in instance.all_nodes:
11338 f_create = node == instance.primary_node
11340 _CreateBlockDev(self, node, instance, new_disk,
11341 f_create, info, f_create)
11342 except errors.OpExecError, err:
11343 self.LogWarning("Failed to create volume %s (%s) on"
11345 new_disk.iv_name, new_disk, node, err)
11346 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11347 (new_disk.size, new_disk.mode)))
11349 # change a given disk
11350 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11351 result.append(("disk.mode/%d" % disk_op,
11352 disk_dict[constants.IDISK_MODE]))
11354 if self.op.disk_template:
11355 r_shut = _ShutdownInstanceDisks(self, instance)
11357 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11358 " proceed with disk template conversion")
11359 mode = (instance.disk_template, self.op.disk_template)
11361 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11363 self.cfg.ReleaseDRBDMinors(instance.name)
11365 result.append(("disk_template", self.op.disk_template))
11368 for nic_op, nic_dict in self.op.nics:
11369 if nic_op == constants.DDM_REMOVE:
11370 # remove the last nic
11371 del instance.nics[-1]
11372 result.append(("nic.%d" % len(instance.nics), "remove"))
11373 elif nic_op == constants.DDM_ADD:
11374 # mac and bridge should be set, by now
11375 mac = nic_dict[constants.INIC_MAC]
11376 ip = nic_dict.get(constants.INIC_IP, None)
11377 nicparams = self.nic_pinst[constants.DDM_ADD]
11378 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11379 instance.nics.append(new_nic)
11380 result.append(("nic.%d" % (len(instance.nics) - 1),
11381 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11382 (new_nic.mac, new_nic.ip,
11383 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11384 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11387 for key in (constants.INIC_MAC, constants.INIC_IP):
11388 if key in nic_dict:
11389 setattr(instance.nics[nic_op], key, nic_dict[key])
11390 if nic_op in self.nic_pinst:
11391 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11392 for key, val in nic_dict.iteritems():
11393 result.append(("nic.%s/%d" % (key, nic_op), val))
11396 if self.op.hvparams:
11397 instance.hvparams = self.hv_inst
11398 for key, val in self.op.hvparams.iteritems():
11399 result.append(("hv/%s" % key, val))
11402 if self.op.beparams:
11403 instance.beparams = self.be_inst
11404 for key, val in self.op.beparams.iteritems():
11405 result.append(("be/%s" % key, val))
11408 if self.op.os_name:
11409 instance.os = self.op.os_name
11412 if self.op.osparams:
11413 instance.osparams = self.os_inst
11414 for key, val in self.op.osparams.iteritems():
11415 result.append(("os/%s" % key, val))
11417 self.cfg.Update(instance, feedback_fn)
11421 _DISK_CONVERSIONS = {
11422 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11423 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11427 class LUInstanceChangeGroup(LogicalUnit):
11428 HPATH = "instance-change-group"
11429 HTYPE = constants.HTYPE_INSTANCE
11432 def ExpandNames(self):
11433 self.share_locks = _ShareAll()
11434 self.needed_locks = {
11435 locking.LEVEL_NODEGROUP: [],
11436 locking.LEVEL_NODE: [],
11439 self._ExpandAndLockInstance()
11441 if self.op.target_groups:
11442 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11443 self.op.target_groups)
11445 self.req_target_uuids = None
11447 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11449 def DeclareLocks(self, level):
11450 if level == locking.LEVEL_NODEGROUP:
11451 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11453 if self.req_target_uuids:
11454 lock_groups = set(self.req_target_uuids)
11456 # Lock all groups used by instance optimistically; this requires going
11457 # via the node before it's locked, requiring verification later on
11458 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11459 lock_groups.update(instance_groups)
11461 # No target groups, need to lock all of them
11462 lock_groups = locking.ALL_SET
11464 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11466 elif level == locking.LEVEL_NODE:
11467 if self.req_target_uuids:
11468 # Lock all nodes used by instances
11469 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11470 self._LockInstancesNodes()
11472 # Lock all nodes in all potential target groups
11473 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11474 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11475 member_nodes = [node_name
11476 for group in lock_groups
11477 for node_name in self.cfg.GetNodeGroup(group).members]
11478 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11480 # Lock all nodes as all groups are potential targets
11481 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11483 def CheckPrereq(self):
11484 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11485 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11486 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11488 assert (self.req_target_uuids is None or
11489 owned_groups.issuperset(self.req_target_uuids))
11490 assert owned_instances == set([self.op.instance_name])
11492 # Get instance information
11493 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11495 # Check if node groups for locked instance are still correct
11496 assert owned_nodes.issuperset(self.instance.all_nodes), \
11497 ("Instance %s's nodes changed while we kept the lock" %
11498 self.op.instance_name)
11500 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11503 if self.req_target_uuids:
11504 # User requested specific target groups
11505 self.target_uuids = frozenset(self.req_target_uuids)
11507 # All groups except those used by the instance are potential targets
11508 self.target_uuids = owned_groups - inst_groups
11510 conflicting_groups = self.target_uuids & inst_groups
11511 if conflicting_groups:
11512 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11513 " used by the instance '%s'" %
11514 (utils.CommaJoin(conflicting_groups),
11515 self.op.instance_name),
11516 errors.ECODE_INVAL)
11518 if not self.target_uuids:
11519 raise errors.OpPrereqError("There are no possible target groups",
11520 errors.ECODE_INVAL)
11522 def BuildHooksEnv(self):
11523 """Build hooks env.
11526 assert self.target_uuids
11529 "TARGET_GROUPS": " ".join(self.target_uuids),
11532 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11536 def BuildHooksNodes(self):
11537 """Build hooks nodes.
11540 mn = self.cfg.GetMasterNode()
11541 return ([mn], [mn])
11543 def Exec(self, feedback_fn):
11544 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11546 assert instances == [self.op.instance_name], "Instance not locked"
11548 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11549 instances=instances, target_groups=list(self.target_uuids))
11551 ial.Run(self.op.iallocator)
11553 if not ial.success:
11554 raise errors.OpPrereqError("Can't compute solution for changing group of"
11555 " instance '%s' using iallocator '%s': %s" %
11556 (self.op.instance_name, self.op.iallocator,
11558 errors.ECODE_NORES)
11560 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11562 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11563 " instance '%s'", len(jobs), self.op.instance_name)
11565 return ResultWithJobs(jobs)
11568 class LUBackupQuery(NoHooksLU):
11569 """Query the exports list
11574 def ExpandNames(self):
11575 self.needed_locks = {}
11576 self.share_locks[locking.LEVEL_NODE] = 1
11577 if not self.op.nodes:
11578 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11580 self.needed_locks[locking.LEVEL_NODE] = \
11581 _GetWantedNodes(self, self.op.nodes)
11583 def Exec(self, feedback_fn):
11584 """Compute the list of all the exported system images.
11587 @return: a dictionary with the structure node->(export-list)
11588 where export-list is a list of the instances exported on
11592 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11593 rpcresult = self.rpc.call_export_list(self.nodes)
11595 for node in rpcresult:
11596 if rpcresult[node].fail_msg:
11597 result[node] = False
11599 result[node] = rpcresult[node].payload
11604 class LUBackupPrepare(NoHooksLU):
11605 """Prepares an instance for an export and returns useful information.
11610 def ExpandNames(self):
11611 self._ExpandAndLockInstance()
11613 def CheckPrereq(self):
11614 """Check prerequisites.
11617 instance_name = self.op.instance_name
11619 self.instance = self.cfg.GetInstanceInfo(instance_name)
11620 assert self.instance is not None, \
11621 "Cannot retrieve locked instance %s" % self.op.instance_name
11622 _CheckNodeOnline(self, self.instance.primary_node)
11624 self._cds = _GetClusterDomainSecret()
11626 def Exec(self, feedback_fn):
11627 """Prepares an instance for an export.
11630 instance = self.instance
11632 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11633 salt = utils.GenerateSecret(8)
11635 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11636 result = self.rpc.call_x509_cert_create(instance.primary_node,
11637 constants.RIE_CERT_VALIDITY)
11638 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11640 (name, cert_pem) = result.payload
11642 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11646 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11647 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11649 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11655 class LUBackupExport(LogicalUnit):
11656 """Export an instance to an image in the cluster.
11659 HPATH = "instance-export"
11660 HTYPE = constants.HTYPE_INSTANCE
11663 def CheckArguments(self):
11664 """Check the arguments.
11667 self.x509_key_name = self.op.x509_key_name
11668 self.dest_x509_ca_pem = self.op.destination_x509_ca
11670 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11671 if not self.x509_key_name:
11672 raise errors.OpPrereqError("Missing X509 key name for encryption",
11673 errors.ECODE_INVAL)
11675 if not self.dest_x509_ca_pem:
11676 raise errors.OpPrereqError("Missing destination X509 CA",
11677 errors.ECODE_INVAL)
11679 def ExpandNames(self):
11680 self._ExpandAndLockInstance()
11682 # Lock all nodes for local exports
11683 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11684 # FIXME: lock only instance primary and destination node
11686 # Sad but true, for now we have do lock all nodes, as we don't know where
11687 # the previous export might be, and in this LU we search for it and
11688 # remove it from its current node. In the future we could fix this by:
11689 # - making a tasklet to search (share-lock all), then create the
11690 # new one, then one to remove, after
11691 # - removing the removal operation altogether
11692 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11694 def DeclareLocks(self, level):
11695 """Last minute lock declaration."""
11696 # All nodes are locked anyway, so nothing to do here.
11698 def BuildHooksEnv(self):
11699 """Build hooks env.
11701 This will run on the master, primary node and target node.
11705 "EXPORT_MODE": self.op.mode,
11706 "EXPORT_NODE": self.op.target_node,
11707 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11708 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11709 # TODO: Generic function for boolean env variables
11710 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11713 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11717 def BuildHooksNodes(self):
11718 """Build hooks nodes.
11721 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11723 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11724 nl.append(self.op.target_node)
11728 def CheckPrereq(self):
11729 """Check prerequisites.
11731 This checks that the instance and node names are valid.
11734 instance_name = self.op.instance_name
11736 self.instance = self.cfg.GetInstanceInfo(instance_name)
11737 assert self.instance is not None, \
11738 "Cannot retrieve locked instance %s" % self.op.instance_name
11739 _CheckNodeOnline(self, self.instance.primary_node)
11741 if (self.op.remove_instance and self.instance.admin_up and
11742 not self.op.shutdown):
11743 raise errors.OpPrereqError("Can not remove instance without shutting it"
11746 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11747 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11748 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11749 assert self.dst_node is not None
11751 _CheckNodeOnline(self, self.dst_node.name)
11752 _CheckNodeNotDrained(self, self.dst_node.name)
11755 self.dest_disk_info = None
11756 self.dest_x509_ca = None
11758 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11759 self.dst_node = None
11761 if len(self.op.target_node) != len(self.instance.disks):
11762 raise errors.OpPrereqError(("Received destination information for %s"
11763 " disks, but instance %s has %s disks") %
11764 (len(self.op.target_node), instance_name,
11765 len(self.instance.disks)),
11766 errors.ECODE_INVAL)
11768 cds = _GetClusterDomainSecret()
11770 # Check X509 key name
11772 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11773 except (TypeError, ValueError), err:
11774 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11776 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11777 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11778 errors.ECODE_INVAL)
11780 # Load and verify CA
11782 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11783 except OpenSSL.crypto.Error, err:
11784 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11785 (err, ), errors.ECODE_INVAL)
11787 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11788 if errcode is not None:
11789 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11790 (msg, ), errors.ECODE_INVAL)
11792 self.dest_x509_ca = cert
11794 # Verify target information
11796 for idx, disk_data in enumerate(self.op.target_node):
11798 (host, port, magic) = \
11799 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11800 except errors.GenericError, err:
11801 raise errors.OpPrereqError("Target info for disk %s: %s" %
11802 (idx, err), errors.ECODE_INVAL)
11804 disk_info.append((host, port, magic))
11806 assert len(disk_info) == len(self.op.target_node)
11807 self.dest_disk_info = disk_info
11810 raise errors.ProgrammerError("Unhandled export mode %r" %
11813 # instance disk type verification
11814 # TODO: Implement export support for file-based disks
11815 for disk in self.instance.disks:
11816 if disk.dev_type == constants.LD_FILE:
11817 raise errors.OpPrereqError("Export not supported for instances with"
11818 " file-based disks", errors.ECODE_INVAL)
11820 def _CleanupExports(self, feedback_fn):
11821 """Removes exports of current instance from all other nodes.
11823 If an instance in a cluster with nodes A..D was exported to node C, its
11824 exports will be removed from the nodes A, B and D.
11827 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11829 nodelist = self.cfg.GetNodeList()
11830 nodelist.remove(self.dst_node.name)
11832 # on one-node clusters nodelist will be empty after the removal
11833 # if we proceed the backup would be removed because OpBackupQuery
11834 # substitutes an empty list with the full cluster node list.
11835 iname = self.instance.name
11837 feedback_fn("Removing old exports for instance %s" % iname)
11838 exportlist = self.rpc.call_export_list(nodelist)
11839 for node in exportlist:
11840 if exportlist[node].fail_msg:
11842 if iname in exportlist[node].payload:
11843 msg = self.rpc.call_export_remove(node, iname).fail_msg
11845 self.LogWarning("Could not remove older export for instance %s"
11846 " on node %s: %s", iname, node, msg)
11848 def Exec(self, feedback_fn):
11849 """Export an instance to an image in the cluster.
11852 assert self.op.mode in constants.EXPORT_MODES
11854 instance = self.instance
11855 src_node = instance.primary_node
11857 if self.op.shutdown:
11858 # shutdown the instance, but not the disks
11859 feedback_fn("Shutting down instance %s" % instance.name)
11860 result = self.rpc.call_instance_shutdown(src_node, instance,
11861 self.op.shutdown_timeout)
11862 # TODO: Maybe ignore failures if ignore_remove_failures is set
11863 result.Raise("Could not shutdown instance %s on"
11864 " node %s" % (instance.name, src_node))
11866 # set the disks ID correctly since call_instance_start needs the
11867 # correct drbd minor to create the symlinks
11868 for disk in instance.disks:
11869 self.cfg.SetDiskID(disk, src_node)
11871 activate_disks = (not instance.admin_up)
11874 # Activate the instance disks if we'exporting a stopped instance
11875 feedback_fn("Activating disks for %s" % instance.name)
11876 _StartInstanceDisks(self, instance, None)
11879 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11882 helper.CreateSnapshots()
11884 if (self.op.shutdown and instance.admin_up and
11885 not self.op.remove_instance):
11886 assert not activate_disks
11887 feedback_fn("Starting instance %s" % instance.name)
11888 result = self.rpc.call_instance_start(src_node, instance,
11890 msg = result.fail_msg
11892 feedback_fn("Failed to start instance: %s" % msg)
11893 _ShutdownInstanceDisks(self, instance)
11894 raise errors.OpExecError("Could not start instance: %s" % msg)
11896 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11897 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11898 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11899 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11900 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11902 (key_name, _, _) = self.x509_key_name
11905 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11908 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11909 key_name, dest_ca_pem,
11914 # Check for backwards compatibility
11915 assert len(dresults) == len(instance.disks)
11916 assert compat.all(isinstance(i, bool) for i in dresults), \
11917 "Not all results are boolean: %r" % dresults
11921 feedback_fn("Deactivating disks for %s" % instance.name)
11922 _ShutdownInstanceDisks(self, instance)
11924 if not (compat.all(dresults) and fin_resu):
11927 failures.append("export finalization")
11928 if not compat.all(dresults):
11929 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11931 failures.append("disk export: disk(s) %s" % fdsk)
11933 raise errors.OpExecError("Export failed, errors in %s" %
11934 utils.CommaJoin(failures))
11936 # At this point, the export was successful, we can cleanup/finish
11938 # Remove instance if requested
11939 if self.op.remove_instance:
11940 feedback_fn("Removing instance %s" % instance.name)
11941 _RemoveInstance(self, feedback_fn, instance,
11942 self.op.ignore_remove_failures)
11944 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11945 self._CleanupExports(feedback_fn)
11947 return fin_resu, dresults
11950 class LUBackupRemove(NoHooksLU):
11951 """Remove exports related to the named instance.
11956 def ExpandNames(self):
11957 self.needed_locks = {}
11958 # We need all nodes to be locked in order for RemoveExport to work, but we
11959 # don't need to lock the instance itself, as nothing will happen to it (and
11960 # we can remove exports also for a removed instance)
11961 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11963 def Exec(self, feedback_fn):
11964 """Remove any export.
11967 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11968 # If the instance was not found we'll try with the name that was passed in.
11969 # This will only work if it was an FQDN, though.
11971 if not instance_name:
11973 instance_name = self.op.instance_name
11975 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11976 exportlist = self.rpc.call_export_list(locked_nodes)
11978 for node in exportlist:
11979 msg = exportlist[node].fail_msg
11981 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11983 if instance_name in exportlist[node].payload:
11985 result = self.rpc.call_export_remove(node, instance_name)
11986 msg = result.fail_msg
11988 logging.error("Could not remove export for instance %s"
11989 " on node %s: %s", instance_name, node, msg)
11991 if fqdn_warn and not found:
11992 feedback_fn("Export not found. If trying to remove an export belonging"
11993 " to a deleted instance please use its Fully Qualified"
11997 class LUGroupAdd(LogicalUnit):
11998 """Logical unit for creating node groups.
12001 HPATH = "group-add"
12002 HTYPE = constants.HTYPE_GROUP
12005 def ExpandNames(self):
12006 # We need the new group's UUID here so that we can create and acquire the
12007 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12008 # that it should not check whether the UUID exists in the configuration.
12009 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12010 self.needed_locks = {}
12011 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12013 def CheckPrereq(self):
12014 """Check prerequisites.
12016 This checks that the given group name is not an existing node group
12021 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12022 except errors.OpPrereqError:
12025 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12026 " node group (UUID: %s)" %
12027 (self.op.group_name, existing_uuid),
12028 errors.ECODE_EXISTS)
12030 if self.op.ndparams:
12031 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12033 def BuildHooksEnv(self):
12034 """Build hooks env.
12038 "GROUP_NAME": self.op.group_name,
12041 def BuildHooksNodes(self):
12042 """Build hooks nodes.
12045 mn = self.cfg.GetMasterNode()
12046 return ([mn], [mn])
12048 def Exec(self, feedback_fn):
12049 """Add the node group to the cluster.
12052 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12053 uuid=self.group_uuid,
12054 alloc_policy=self.op.alloc_policy,
12055 ndparams=self.op.ndparams)
12057 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12058 del self.remove_locks[locking.LEVEL_NODEGROUP]
12061 class LUGroupAssignNodes(NoHooksLU):
12062 """Logical unit for assigning nodes to groups.
12067 def ExpandNames(self):
12068 # These raise errors.OpPrereqError on their own:
12069 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12070 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12072 # We want to lock all the affected nodes and groups. We have readily
12073 # available the list of nodes, and the *destination* group. To gather the
12074 # list of "source" groups, we need to fetch node information later on.
12075 self.needed_locks = {
12076 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12077 locking.LEVEL_NODE: self.op.nodes,
12080 def DeclareLocks(self, level):
12081 if level == locking.LEVEL_NODEGROUP:
12082 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12084 # Try to get all affected nodes' groups without having the group or node
12085 # lock yet. Needs verification later in the code flow.
12086 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12088 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12090 def CheckPrereq(self):
12091 """Check prerequisites.
12094 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12095 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12096 frozenset(self.op.nodes))
12098 expected_locks = (set([self.group_uuid]) |
12099 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12100 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12101 if actual_locks != expected_locks:
12102 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12103 " current groups are '%s', used to be '%s'" %
12104 (utils.CommaJoin(expected_locks),
12105 utils.CommaJoin(actual_locks)))
12107 self.node_data = self.cfg.GetAllNodesInfo()
12108 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12109 instance_data = self.cfg.GetAllInstancesInfo()
12111 if self.group is None:
12112 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12113 (self.op.group_name, self.group_uuid))
12115 (new_splits, previous_splits) = \
12116 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12117 for node in self.op.nodes],
12118 self.node_data, instance_data)
12121 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12123 if not self.op.force:
12124 raise errors.OpExecError("The following instances get split by this"
12125 " change and --force was not given: %s" %
12128 self.LogWarning("This operation will split the following instances: %s",
12131 if previous_splits:
12132 self.LogWarning("In addition, these already-split instances continue"
12133 " to be split across groups: %s",
12134 utils.CommaJoin(utils.NiceSort(previous_splits)))
12136 def Exec(self, feedback_fn):
12137 """Assign nodes to a new group.
12140 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12142 self.cfg.AssignGroupNodes(mods)
12145 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12146 """Check for split instances after a node assignment.
12148 This method considers a series of node assignments as an atomic operation,
12149 and returns information about split instances after applying the set of
12152 In particular, it returns information about newly split instances, and
12153 instances that were already split, and remain so after the change.
12155 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12158 @type changes: list of (node_name, new_group_uuid) pairs.
12159 @param changes: list of node assignments to consider.
12160 @param node_data: a dict with data for all nodes
12161 @param instance_data: a dict with all instances to consider
12162 @rtype: a two-tuple
12163 @return: a list of instances that were previously okay and result split as a
12164 consequence of this change, and a list of instances that were previously
12165 split and this change does not fix.
12168 changed_nodes = dict((node, group) for node, group in changes
12169 if node_data[node].group != group)
12171 all_split_instances = set()
12172 previously_split_instances = set()
12174 def InstanceNodes(instance):
12175 return [instance.primary_node] + list(instance.secondary_nodes)
12177 for inst in instance_data.values():
12178 if inst.disk_template not in constants.DTS_INT_MIRROR:
12181 instance_nodes = InstanceNodes(inst)
12183 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12184 previously_split_instances.add(inst.name)
12186 if len(set(changed_nodes.get(node, node_data[node].group)
12187 for node in instance_nodes)) > 1:
12188 all_split_instances.add(inst.name)
12190 return (list(all_split_instances - previously_split_instances),
12191 list(previously_split_instances & all_split_instances))
12194 class _GroupQuery(_QueryBase):
12195 FIELDS = query.GROUP_FIELDS
12197 def ExpandNames(self, lu):
12198 lu.needed_locks = {}
12200 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12201 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12204 self.wanted = [name_to_uuid[name]
12205 for name in utils.NiceSort(name_to_uuid.keys())]
12207 # Accept names to be either names or UUIDs.
12210 all_uuid = frozenset(self._all_groups.keys())
12212 for name in self.names:
12213 if name in all_uuid:
12214 self.wanted.append(name)
12215 elif name in name_to_uuid:
12216 self.wanted.append(name_to_uuid[name])
12218 missing.append(name)
12221 raise errors.OpPrereqError("Some groups do not exist: %s" %
12222 utils.CommaJoin(missing),
12223 errors.ECODE_NOENT)
12225 def DeclareLocks(self, lu, level):
12228 def _GetQueryData(self, lu):
12229 """Computes the list of node groups and their attributes.
12232 do_nodes = query.GQ_NODE in self.requested_data
12233 do_instances = query.GQ_INST in self.requested_data
12235 group_to_nodes = None
12236 group_to_instances = None
12238 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12239 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12240 # latter GetAllInstancesInfo() is not enough, for we have to go through
12241 # instance->node. Hence, we will need to process nodes even if we only need
12242 # instance information.
12243 if do_nodes or do_instances:
12244 all_nodes = lu.cfg.GetAllNodesInfo()
12245 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12248 for node in all_nodes.values():
12249 if node.group in group_to_nodes:
12250 group_to_nodes[node.group].append(node.name)
12251 node_to_group[node.name] = node.group
12254 all_instances = lu.cfg.GetAllInstancesInfo()
12255 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12257 for instance in all_instances.values():
12258 node = instance.primary_node
12259 if node in node_to_group:
12260 group_to_instances[node_to_group[node]].append(instance.name)
12263 # Do not pass on node information if it was not requested.
12264 group_to_nodes = None
12266 return query.GroupQueryData([self._all_groups[uuid]
12267 for uuid in self.wanted],
12268 group_to_nodes, group_to_instances)
12271 class LUGroupQuery(NoHooksLU):
12272 """Logical unit for querying node groups.
12277 def CheckArguments(self):
12278 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12279 self.op.output_fields, False)
12281 def ExpandNames(self):
12282 self.gq.ExpandNames(self)
12284 def DeclareLocks(self, level):
12285 self.gq.DeclareLocks(self, level)
12287 def Exec(self, feedback_fn):
12288 return self.gq.OldStyleQuery(self)
12291 class LUGroupSetParams(LogicalUnit):
12292 """Modifies the parameters of a node group.
12295 HPATH = "group-modify"
12296 HTYPE = constants.HTYPE_GROUP
12299 def CheckArguments(self):
12302 self.op.alloc_policy,
12305 if all_changes.count(None) == len(all_changes):
12306 raise errors.OpPrereqError("Please pass at least one modification",
12307 errors.ECODE_INVAL)
12309 def ExpandNames(self):
12310 # This raises errors.OpPrereqError on its own:
12311 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12313 self.needed_locks = {
12314 locking.LEVEL_NODEGROUP: [self.group_uuid],
12317 def CheckPrereq(self):
12318 """Check prerequisites.
12321 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12323 if self.group is None:
12324 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12325 (self.op.group_name, self.group_uuid))
12327 if self.op.ndparams:
12328 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12329 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12330 self.new_ndparams = new_ndparams
12332 def BuildHooksEnv(self):
12333 """Build hooks env.
12337 "GROUP_NAME": self.op.group_name,
12338 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12341 def BuildHooksNodes(self):
12342 """Build hooks nodes.
12345 mn = self.cfg.GetMasterNode()
12346 return ([mn], [mn])
12348 def Exec(self, feedback_fn):
12349 """Modifies the node group.
12354 if self.op.ndparams:
12355 self.group.ndparams = self.new_ndparams
12356 result.append(("ndparams", str(self.group.ndparams)))
12358 if self.op.alloc_policy:
12359 self.group.alloc_policy = self.op.alloc_policy
12361 self.cfg.Update(self.group, feedback_fn)
12365 class LUGroupRemove(LogicalUnit):
12366 HPATH = "group-remove"
12367 HTYPE = constants.HTYPE_GROUP
12370 def ExpandNames(self):
12371 # This will raises errors.OpPrereqError on its own:
12372 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12373 self.needed_locks = {
12374 locking.LEVEL_NODEGROUP: [self.group_uuid],
12377 def CheckPrereq(self):
12378 """Check prerequisites.
12380 This checks that the given group name exists as a node group, that is
12381 empty (i.e., contains no nodes), and that is not the last group of the
12385 # Verify that the group is empty.
12386 group_nodes = [node.name
12387 for node in self.cfg.GetAllNodesInfo().values()
12388 if node.group == self.group_uuid]
12391 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12393 (self.op.group_name,
12394 utils.CommaJoin(utils.NiceSort(group_nodes))),
12395 errors.ECODE_STATE)
12397 # Verify the cluster would not be left group-less.
12398 if len(self.cfg.GetNodeGroupList()) == 1:
12399 raise errors.OpPrereqError("Group '%s' is the only group,"
12400 " cannot be removed" %
12401 self.op.group_name,
12402 errors.ECODE_STATE)
12404 def BuildHooksEnv(self):
12405 """Build hooks env.
12409 "GROUP_NAME": self.op.group_name,
12412 def BuildHooksNodes(self):
12413 """Build hooks nodes.
12416 mn = self.cfg.GetMasterNode()
12417 return ([mn], [mn])
12419 def Exec(self, feedback_fn):
12420 """Remove the node group.
12424 self.cfg.RemoveNodeGroup(self.group_uuid)
12425 except errors.ConfigurationError:
12426 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12427 (self.op.group_name, self.group_uuid))
12429 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12432 class LUGroupRename(LogicalUnit):
12433 HPATH = "group-rename"
12434 HTYPE = constants.HTYPE_GROUP
12437 def ExpandNames(self):
12438 # This raises errors.OpPrereqError on its own:
12439 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12441 self.needed_locks = {
12442 locking.LEVEL_NODEGROUP: [self.group_uuid],
12445 def CheckPrereq(self):
12446 """Check prerequisites.
12448 Ensures requested new name is not yet used.
12452 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12453 except errors.OpPrereqError:
12456 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12457 " node group (UUID: %s)" %
12458 (self.op.new_name, new_name_uuid),
12459 errors.ECODE_EXISTS)
12461 def BuildHooksEnv(self):
12462 """Build hooks env.
12466 "OLD_NAME": self.op.group_name,
12467 "NEW_NAME": self.op.new_name,
12470 def BuildHooksNodes(self):
12471 """Build hooks nodes.
12474 mn = self.cfg.GetMasterNode()
12476 all_nodes = self.cfg.GetAllNodesInfo()
12477 all_nodes.pop(mn, None)
12480 run_nodes.extend(node.name for node in all_nodes.values()
12481 if node.group == self.group_uuid)
12483 return (run_nodes, run_nodes)
12485 def Exec(self, feedback_fn):
12486 """Rename the node group.
12489 group = self.cfg.GetNodeGroup(self.group_uuid)
12492 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12493 (self.op.group_name, self.group_uuid))
12495 group.name = self.op.new_name
12496 self.cfg.Update(group, feedback_fn)
12498 return self.op.new_name
12501 class LUGroupEvacuate(LogicalUnit):
12502 HPATH = "group-evacuate"
12503 HTYPE = constants.HTYPE_GROUP
12506 def ExpandNames(self):
12507 # This raises errors.OpPrereqError on its own:
12508 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12510 if self.op.target_groups:
12511 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12512 self.op.target_groups)
12514 self.req_target_uuids = []
12516 if self.group_uuid in self.req_target_uuids:
12517 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12518 " as a target group (targets are %s)" %
12520 utils.CommaJoin(self.req_target_uuids)),
12521 errors.ECODE_INVAL)
12523 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12525 self.share_locks = _ShareAll()
12526 self.needed_locks = {
12527 locking.LEVEL_INSTANCE: [],
12528 locking.LEVEL_NODEGROUP: [],
12529 locking.LEVEL_NODE: [],
12532 def DeclareLocks(self, level):
12533 if level == locking.LEVEL_INSTANCE:
12534 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12536 # Lock instances optimistically, needs verification once node and group
12537 # locks have been acquired
12538 self.needed_locks[locking.LEVEL_INSTANCE] = \
12539 self.cfg.GetNodeGroupInstances(self.group_uuid)
12541 elif level == locking.LEVEL_NODEGROUP:
12542 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12544 if self.req_target_uuids:
12545 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12547 # Lock all groups used by instances optimistically; this requires going
12548 # via the node before it's locked, requiring verification later on
12549 lock_groups.update(group_uuid
12550 for instance_name in
12551 self.owned_locks(locking.LEVEL_INSTANCE)
12553 self.cfg.GetInstanceNodeGroups(instance_name))
12555 # No target groups, need to lock all of them
12556 lock_groups = locking.ALL_SET
12558 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12560 elif level == locking.LEVEL_NODE:
12561 # This will only lock the nodes in the group to be evacuated which
12562 # contain actual instances
12563 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12564 self._LockInstancesNodes()
12566 # Lock all nodes in group to be evacuated and target groups
12567 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12568 assert self.group_uuid in owned_groups
12569 member_nodes = [node_name
12570 for group in owned_groups
12571 for node_name in self.cfg.GetNodeGroup(group).members]
12572 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12574 def CheckPrereq(self):
12575 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12576 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12577 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12579 assert owned_groups.issuperset(self.req_target_uuids)
12580 assert self.group_uuid in owned_groups
12582 # Check if locked instances are still correct
12583 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12585 # Get instance information
12586 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12588 # Check if node groups for locked instances are still correct
12589 _CheckInstancesNodeGroups(self.cfg, self.instances,
12590 owned_groups, owned_nodes, self.group_uuid)
12592 if self.req_target_uuids:
12593 # User requested specific target groups
12594 self.target_uuids = self.req_target_uuids
12596 # All groups except the one to be evacuated are potential targets
12597 self.target_uuids = [group_uuid for group_uuid in owned_groups
12598 if group_uuid != self.group_uuid]
12600 if not self.target_uuids:
12601 raise errors.OpPrereqError("There are no possible target groups",
12602 errors.ECODE_INVAL)
12604 def BuildHooksEnv(self):
12605 """Build hooks env.
12609 "GROUP_NAME": self.op.group_name,
12610 "TARGET_GROUPS": " ".join(self.target_uuids),
12613 def BuildHooksNodes(self):
12614 """Build hooks nodes.
12617 mn = self.cfg.GetMasterNode()
12619 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12621 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12623 return (run_nodes, run_nodes)
12625 def Exec(self, feedback_fn):
12626 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12628 assert self.group_uuid not in self.target_uuids
12630 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12631 instances=instances, target_groups=self.target_uuids)
12633 ial.Run(self.op.iallocator)
12635 if not ial.success:
12636 raise errors.OpPrereqError("Can't compute group evacuation using"
12637 " iallocator '%s': %s" %
12638 (self.op.iallocator, ial.info),
12639 errors.ECODE_NORES)
12641 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12643 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12644 len(jobs), self.op.group_name)
12646 return ResultWithJobs(jobs)
12649 class TagsLU(NoHooksLU): # pylint: disable=W0223
12650 """Generic tags LU.
12652 This is an abstract class which is the parent of all the other tags LUs.
12655 def ExpandNames(self):
12656 self.group_uuid = None
12657 self.needed_locks = {}
12658 if self.op.kind == constants.TAG_NODE:
12659 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12660 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12661 elif self.op.kind == constants.TAG_INSTANCE:
12662 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12663 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12664 elif self.op.kind == constants.TAG_NODEGROUP:
12665 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12667 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12668 # not possible to acquire the BGL based on opcode parameters)
12670 def CheckPrereq(self):
12671 """Check prerequisites.
12674 if self.op.kind == constants.TAG_CLUSTER:
12675 self.target = self.cfg.GetClusterInfo()
12676 elif self.op.kind == constants.TAG_NODE:
12677 self.target = self.cfg.GetNodeInfo(self.op.name)
12678 elif self.op.kind == constants.TAG_INSTANCE:
12679 self.target = self.cfg.GetInstanceInfo(self.op.name)
12680 elif self.op.kind == constants.TAG_NODEGROUP:
12681 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12683 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12684 str(self.op.kind), errors.ECODE_INVAL)
12687 class LUTagsGet(TagsLU):
12688 """Returns the tags of a given object.
12693 def ExpandNames(self):
12694 TagsLU.ExpandNames(self)
12696 # Share locks as this is only a read operation
12697 self.share_locks = _ShareAll()
12699 def Exec(self, feedback_fn):
12700 """Returns the tag list.
12703 return list(self.target.GetTags())
12706 class LUTagsSearch(NoHooksLU):
12707 """Searches the tags for a given pattern.
12712 def ExpandNames(self):
12713 self.needed_locks = {}
12715 def CheckPrereq(self):
12716 """Check prerequisites.
12718 This checks the pattern passed for validity by compiling it.
12722 self.re = re.compile(self.op.pattern)
12723 except re.error, err:
12724 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12725 (self.op.pattern, err), errors.ECODE_INVAL)
12727 def Exec(self, feedback_fn):
12728 """Returns the tag list.
12732 tgts = [("/cluster", cfg.GetClusterInfo())]
12733 ilist = cfg.GetAllInstancesInfo().values()
12734 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12735 nlist = cfg.GetAllNodesInfo().values()
12736 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12737 tgts.extend(("/nodegroup/%s" % n.name, n)
12738 for n in cfg.GetAllNodeGroupsInfo().values())
12740 for path, target in tgts:
12741 for tag in target.GetTags():
12742 if self.re.search(tag):
12743 results.append((path, tag))
12747 class LUTagsSet(TagsLU):
12748 """Sets a tag on a given object.
12753 def CheckPrereq(self):
12754 """Check prerequisites.
12756 This checks the type and length of the tag name and value.
12759 TagsLU.CheckPrereq(self)
12760 for tag in self.op.tags:
12761 objects.TaggableObject.ValidateTag(tag)
12763 def Exec(self, feedback_fn):
12768 for tag in self.op.tags:
12769 self.target.AddTag(tag)
12770 except errors.TagError, err:
12771 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12772 self.cfg.Update(self.target, feedback_fn)
12775 class LUTagsDel(TagsLU):
12776 """Delete a list of tags from a given object.
12781 def CheckPrereq(self):
12782 """Check prerequisites.
12784 This checks that we have the given tag.
12787 TagsLU.CheckPrereq(self)
12788 for tag in self.op.tags:
12789 objects.TaggableObject.ValidateTag(tag)
12790 del_tags = frozenset(self.op.tags)
12791 cur_tags = self.target.GetTags()
12793 diff_tags = del_tags - cur_tags
12795 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12796 raise errors.OpPrereqError("Tag(s) %s not found" %
12797 (utils.CommaJoin(diff_names), ),
12798 errors.ECODE_NOENT)
12800 def Exec(self, feedback_fn):
12801 """Remove the tag from the object.
12804 for tag in self.op.tags:
12805 self.target.RemoveTag(tag)
12806 self.cfg.Update(self.target, feedback_fn)
12809 class LUTestDelay(NoHooksLU):
12810 """Sleep for a specified amount of time.
12812 This LU sleeps on the master and/or nodes for a specified amount of
12818 def ExpandNames(self):
12819 """Expand names and set required locks.
12821 This expands the node list, if any.
12824 self.needed_locks = {}
12825 if self.op.on_nodes:
12826 # _GetWantedNodes can be used here, but is not always appropriate to use
12827 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12828 # more information.
12829 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12830 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12832 def _TestDelay(self):
12833 """Do the actual sleep.
12836 if self.op.on_master:
12837 if not utils.TestDelay(self.op.duration):
12838 raise errors.OpExecError("Error during master delay test")
12839 if self.op.on_nodes:
12840 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12841 for node, node_result in result.items():
12842 node_result.Raise("Failure during rpc call to node %s" % node)
12844 def Exec(self, feedback_fn):
12845 """Execute the test delay opcode, with the wanted repetitions.
12848 if self.op.repeat == 0:
12851 top_value = self.op.repeat - 1
12852 for i in range(self.op.repeat):
12853 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12857 class LUTestJqueue(NoHooksLU):
12858 """Utility LU to test some aspects of the job queue.
12863 # Must be lower than default timeout for WaitForJobChange to see whether it
12864 # notices changed jobs
12865 _CLIENT_CONNECT_TIMEOUT = 20.0
12866 _CLIENT_CONFIRM_TIMEOUT = 60.0
12869 def _NotifyUsingSocket(cls, cb, errcls):
12870 """Opens a Unix socket and waits for another program to connect.
12873 @param cb: Callback to send socket name to client
12874 @type errcls: class
12875 @param errcls: Exception class to use for errors
12878 # Using a temporary directory as there's no easy way to create temporary
12879 # sockets without writing a custom loop around tempfile.mktemp and
12881 tmpdir = tempfile.mkdtemp()
12883 tmpsock = utils.PathJoin(tmpdir, "sock")
12885 logging.debug("Creating temporary socket at %s", tmpsock)
12886 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12891 # Send details to client
12894 # Wait for client to connect before continuing
12895 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12897 (conn, _) = sock.accept()
12898 except socket.error, err:
12899 raise errcls("Client didn't connect in time (%s)" % err)
12903 # Remove as soon as client is connected
12904 shutil.rmtree(tmpdir)
12906 # Wait for client to close
12909 # pylint: disable=E1101
12910 # Instance of '_socketobject' has no ... member
12911 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12913 except socket.error, err:
12914 raise errcls("Client failed to confirm notification (%s)" % err)
12918 def _SendNotification(self, test, arg, sockname):
12919 """Sends a notification to the client.
12922 @param test: Test name
12923 @param arg: Test argument (depends on test)
12924 @type sockname: string
12925 @param sockname: Socket path
12928 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12930 def _Notify(self, prereq, test, arg):
12931 """Notifies the client of a test.
12934 @param prereq: Whether this is a prereq-phase test
12936 @param test: Test name
12937 @param arg: Test argument (depends on test)
12941 errcls = errors.OpPrereqError
12943 errcls = errors.OpExecError
12945 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12949 def CheckArguments(self):
12950 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12951 self.expandnames_calls = 0
12953 def ExpandNames(self):
12954 checkargs_calls = getattr(self, "checkargs_calls", 0)
12955 if checkargs_calls < 1:
12956 raise errors.ProgrammerError("CheckArguments was not called")
12958 self.expandnames_calls += 1
12960 if self.op.notify_waitlock:
12961 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12963 self.LogInfo("Expanding names")
12965 # Get lock on master node (just to get a lock, not for a particular reason)
12966 self.needed_locks = {
12967 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12970 def Exec(self, feedback_fn):
12971 if self.expandnames_calls < 1:
12972 raise errors.ProgrammerError("ExpandNames was not called")
12974 if self.op.notify_exec:
12975 self._Notify(False, constants.JQT_EXEC, None)
12977 self.LogInfo("Executing")
12979 if self.op.log_messages:
12980 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12981 for idx, msg in enumerate(self.op.log_messages):
12982 self.LogInfo("Sending log message %s", idx + 1)
12983 feedback_fn(constants.JQT_MSGPREFIX + msg)
12984 # Report how many test messages have been sent
12985 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12988 raise errors.OpExecError("Opcode failure was requested")
12993 class IAllocator(object):
12994 """IAllocator framework.
12996 An IAllocator instance has three sets of attributes:
12997 - cfg that is needed to query the cluster
12998 - input data (all members of the _KEYS class attribute are required)
12999 - four buffer attributes (in|out_data|text), that represent the
13000 input (to the external script) in text and data structure format,
13001 and the output from it, again in two formats
13002 - the result variables from the script (success, info, nodes) for
13006 # pylint: disable=R0902
13007 # lots of instance attributes
13009 def __init__(self, cfg, rpc, mode, **kwargs):
13012 # init buffer variables
13013 self.in_text = self.out_text = self.in_data = self.out_data = None
13014 # init all input fields so that pylint is happy
13016 self.memory = self.disks = self.disk_template = None
13017 self.os = self.tags = self.nics = self.vcpus = None
13018 self.hypervisor = None
13019 self.relocate_from = None
13021 self.instances = None
13022 self.evac_mode = None
13023 self.target_groups = []
13025 self.required_nodes = None
13026 # init result fields
13027 self.success = self.info = self.result = None
13030 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13032 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13033 " IAllocator" % self.mode)
13035 keyset = [n for (n, _) in keydata]
13038 if key not in keyset:
13039 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13040 " IAllocator" % key)
13041 setattr(self, key, kwargs[key])
13044 if key not in kwargs:
13045 raise errors.ProgrammerError("Missing input parameter '%s' to"
13046 " IAllocator" % key)
13047 self._BuildInputData(compat.partial(fn, self), keydata)
13049 def _ComputeClusterData(self):
13050 """Compute the generic allocator input data.
13052 This is the data that is independent of the actual operation.
13056 cluster_info = cfg.GetClusterInfo()
13059 "version": constants.IALLOCATOR_VERSION,
13060 "cluster_name": cfg.GetClusterName(),
13061 "cluster_tags": list(cluster_info.GetTags()),
13062 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13063 # we don't have job IDs
13065 ninfo = cfg.GetAllNodesInfo()
13066 iinfo = cfg.GetAllInstancesInfo().values()
13067 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13070 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13072 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13073 hypervisor_name = self.hypervisor
13074 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13075 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13077 hypervisor_name = cluster_info.enabled_hypervisors[0]
13079 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13082 self.rpc.call_all_instances_info(node_list,
13083 cluster_info.enabled_hypervisors)
13085 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13087 config_ndata = self._ComputeBasicNodeData(ninfo)
13088 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13089 i_list, config_ndata)
13090 assert len(data["nodes"]) == len(ninfo), \
13091 "Incomplete node data computed"
13093 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13095 self.in_data = data
13098 def _ComputeNodeGroupData(cfg):
13099 """Compute node groups data.
13102 ng = dict((guuid, {
13103 "name": gdata.name,
13104 "alloc_policy": gdata.alloc_policy,
13106 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13111 def _ComputeBasicNodeData(node_cfg):
13112 """Compute global node data.
13115 @returns: a dict of name: (node dict, node config)
13118 # fill in static (config-based) values
13119 node_results = dict((ninfo.name, {
13120 "tags": list(ninfo.GetTags()),
13121 "primary_ip": ninfo.primary_ip,
13122 "secondary_ip": ninfo.secondary_ip,
13123 "offline": ninfo.offline,
13124 "drained": ninfo.drained,
13125 "master_candidate": ninfo.master_candidate,
13126 "group": ninfo.group,
13127 "master_capable": ninfo.master_capable,
13128 "vm_capable": ninfo.vm_capable,
13130 for ninfo in node_cfg.values())
13132 return node_results
13135 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13137 """Compute global node data.
13139 @param node_results: the basic node structures as filled from the config
13142 # make a copy of the current dict
13143 node_results = dict(node_results)
13144 for nname, nresult in node_data.items():
13145 assert nname in node_results, "Missing basic data for node %s" % nname
13146 ninfo = node_cfg[nname]
13148 if not (ninfo.offline or ninfo.drained):
13149 nresult.Raise("Can't get data for node %s" % nname)
13150 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13152 remote_info = nresult.payload
13154 for attr in ["memory_total", "memory_free", "memory_dom0",
13155 "vg_size", "vg_free", "cpu_total"]:
13156 if attr not in remote_info:
13157 raise errors.OpExecError("Node '%s' didn't return attribute"
13158 " '%s'" % (nname, attr))
13159 if not isinstance(remote_info[attr], int):
13160 raise errors.OpExecError("Node '%s' returned invalid value"
13162 (nname, attr, remote_info[attr]))
13163 # compute memory used by primary instances
13164 i_p_mem = i_p_up_mem = 0
13165 for iinfo, beinfo in i_list:
13166 if iinfo.primary_node == nname:
13167 i_p_mem += beinfo[constants.BE_MEMORY]
13168 if iinfo.name not in node_iinfo[nname].payload:
13171 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13172 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13173 remote_info["memory_free"] -= max(0, i_mem_diff)
13176 i_p_up_mem += beinfo[constants.BE_MEMORY]
13178 # compute memory used by instances
13180 "total_memory": remote_info["memory_total"],
13181 "reserved_memory": remote_info["memory_dom0"],
13182 "free_memory": remote_info["memory_free"],
13183 "total_disk": remote_info["vg_size"],
13184 "free_disk": remote_info["vg_free"],
13185 "total_cpus": remote_info["cpu_total"],
13186 "i_pri_memory": i_p_mem,
13187 "i_pri_up_memory": i_p_up_mem,
13189 pnr_dyn.update(node_results[nname])
13190 node_results[nname] = pnr_dyn
13192 return node_results
13195 def _ComputeInstanceData(cluster_info, i_list):
13196 """Compute global instance data.
13200 for iinfo, beinfo in i_list:
13202 for nic in iinfo.nics:
13203 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13207 "mode": filled_params[constants.NIC_MODE],
13208 "link": filled_params[constants.NIC_LINK],
13210 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13211 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13212 nic_data.append(nic_dict)
13214 "tags": list(iinfo.GetTags()),
13215 "admin_up": iinfo.admin_up,
13216 "vcpus": beinfo[constants.BE_VCPUS],
13217 "memory": beinfo[constants.BE_MEMORY],
13219 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13221 "disks": [{constants.IDISK_SIZE: dsk.size,
13222 constants.IDISK_MODE: dsk.mode}
13223 for dsk in iinfo.disks],
13224 "disk_template": iinfo.disk_template,
13225 "hypervisor": iinfo.hypervisor,
13227 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13229 instance_data[iinfo.name] = pir
13231 return instance_data
13233 def _AddNewInstance(self):
13234 """Add new instance data to allocator structure.
13236 This in combination with _AllocatorGetClusterData will create the
13237 correct structure needed as input for the allocator.
13239 The checks for the completeness of the opcode must have already been
13243 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13245 if self.disk_template in constants.DTS_INT_MIRROR:
13246 self.required_nodes = 2
13248 self.required_nodes = 1
13252 "disk_template": self.disk_template,
13255 "vcpus": self.vcpus,
13256 "memory": self.memory,
13257 "disks": self.disks,
13258 "disk_space_total": disk_space,
13260 "required_nodes": self.required_nodes,
13261 "hypervisor": self.hypervisor,
13266 def _AddRelocateInstance(self):
13267 """Add relocate instance data to allocator structure.
13269 This in combination with _IAllocatorGetClusterData will create the
13270 correct structure needed as input for the allocator.
13272 The checks for the completeness of the opcode must have already been
13276 instance = self.cfg.GetInstanceInfo(self.name)
13277 if instance is None:
13278 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13279 " IAllocator" % self.name)
13281 if instance.disk_template not in constants.DTS_MIRRORED:
13282 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13283 errors.ECODE_INVAL)
13285 if instance.disk_template in constants.DTS_INT_MIRROR and \
13286 len(instance.secondary_nodes) != 1:
13287 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13288 errors.ECODE_STATE)
13290 self.required_nodes = 1
13291 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13292 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13296 "disk_space_total": disk_space,
13297 "required_nodes": self.required_nodes,
13298 "relocate_from": self.relocate_from,
13302 def _AddNodeEvacuate(self):
13303 """Get data for node-evacuate requests.
13307 "instances": self.instances,
13308 "evac_mode": self.evac_mode,
13311 def _AddChangeGroup(self):
13312 """Get data for node-evacuate requests.
13316 "instances": self.instances,
13317 "target_groups": self.target_groups,
13320 def _BuildInputData(self, fn, keydata):
13321 """Build input data structures.
13324 self._ComputeClusterData()
13327 request["type"] = self.mode
13328 for keyname, keytype in keydata:
13329 if keyname not in request:
13330 raise errors.ProgrammerError("Request parameter %s is missing" %
13332 val = request[keyname]
13333 if not keytype(val):
13334 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13335 " validation, value %s, expected"
13336 " type %s" % (keyname, val, keytype))
13337 self.in_data["request"] = request
13339 self.in_text = serializer.Dump(self.in_data)
13341 _STRING_LIST = ht.TListOf(ht.TString)
13342 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13343 # pylint: disable=E1101
13344 # Class '...' has no 'OP_ID' member
13345 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13346 opcodes.OpInstanceMigrate.OP_ID,
13347 opcodes.OpInstanceReplaceDisks.OP_ID])
13351 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13352 ht.TItems([ht.TNonEmptyString,
13353 ht.TNonEmptyString,
13354 ht.TListOf(ht.TNonEmptyString),
13357 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13358 ht.TItems([ht.TNonEmptyString,
13361 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13362 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13365 constants.IALLOCATOR_MODE_ALLOC:
13368 ("name", ht.TString),
13369 ("memory", ht.TInt),
13370 ("disks", ht.TListOf(ht.TDict)),
13371 ("disk_template", ht.TString),
13372 ("os", ht.TString),
13373 ("tags", _STRING_LIST),
13374 ("nics", ht.TListOf(ht.TDict)),
13375 ("vcpus", ht.TInt),
13376 ("hypervisor", ht.TString),
13378 constants.IALLOCATOR_MODE_RELOC:
13379 (_AddRelocateInstance,
13380 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13382 constants.IALLOCATOR_MODE_NODE_EVAC:
13383 (_AddNodeEvacuate, [
13384 ("instances", _STRING_LIST),
13385 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13387 constants.IALLOCATOR_MODE_CHG_GROUP:
13388 (_AddChangeGroup, [
13389 ("instances", _STRING_LIST),
13390 ("target_groups", _STRING_LIST),
13394 def Run(self, name, validate=True, call_fn=None):
13395 """Run an instance allocator and return the results.
13398 if call_fn is None:
13399 call_fn = self.rpc.call_iallocator_runner
13401 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13402 result.Raise("Failure while running the iallocator script")
13404 self.out_text = result.payload
13406 self._ValidateResult()
13408 def _ValidateResult(self):
13409 """Process the allocator results.
13411 This will process and if successful save the result in
13412 self.out_data and the other parameters.
13416 rdict = serializer.Load(self.out_text)
13417 except Exception, err:
13418 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13420 if not isinstance(rdict, dict):
13421 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13423 # TODO: remove backwards compatiblity in later versions
13424 if "nodes" in rdict and "result" not in rdict:
13425 rdict["result"] = rdict["nodes"]
13428 for key in "success", "info", "result":
13429 if key not in rdict:
13430 raise errors.OpExecError("Can't parse iallocator results:"
13431 " missing key '%s'" % key)
13432 setattr(self, key, rdict[key])
13434 if not self._result_check(self.result):
13435 raise errors.OpExecError("Iallocator returned invalid result,"
13436 " expected %s, got %s" %
13437 (self._result_check, self.result),
13438 errors.ECODE_INVAL)
13440 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13441 assert self.relocate_from is not None
13442 assert self.required_nodes == 1
13444 node2group = dict((name, ndata["group"])
13445 for (name, ndata) in self.in_data["nodes"].items())
13447 fn = compat.partial(self._NodesToGroups, node2group,
13448 self.in_data["nodegroups"])
13450 instance = self.cfg.GetInstanceInfo(self.name)
13451 request_groups = fn(self.relocate_from + [instance.primary_node])
13452 result_groups = fn(rdict["result"] + [instance.primary_node])
13454 if self.success and not set(result_groups).issubset(request_groups):
13455 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13456 " differ from original groups (%s)" %
13457 (utils.CommaJoin(result_groups),
13458 utils.CommaJoin(request_groups)))
13460 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13461 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13463 self.out_data = rdict
13466 def _NodesToGroups(node2group, groups, nodes):
13467 """Returns a list of unique group names for a list of nodes.
13469 @type node2group: dict
13470 @param node2group: Map from node name to group UUID
13472 @param groups: Group information
13474 @param nodes: Node names
13481 group_uuid = node2group[node]
13483 # Ignore unknown node
13487 group = groups[group_uuid]
13489 # Can't find group, let's use UUID
13490 group_name = group_uuid
13492 group_name = group["name"]
13494 result.add(group_name)
13496 return sorted(result)
13499 class LUTestAllocator(NoHooksLU):
13500 """Run allocator tests.
13502 This LU runs the allocator tests
13505 def CheckPrereq(self):
13506 """Check prerequisites.
13508 This checks the opcode parameters depending on the director and mode test.
13511 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13512 for attr in ["memory", "disks", "disk_template",
13513 "os", "tags", "nics", "vcpus"]:
13514 if not hasattr(self.op, attr):
13515 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13516 attr, errors.ECODE_INVAL)
13517 iname = self.cfg.ExpandInstanceName(self.op.name)
13518 if iname is not None:
13519 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13520 iname, errors.ECODE_EXISTS)
13521 if not isinstance(self.op.nics, list):
13522 raise errors.OpPrereqError("Invalid parameter 'nics'",
13523 errors.ECODE_INVAL)
13524 if not isinstance(self.op.disks, list):
13525 raise errors.OpPrereqError("Invalid parameter 'disks'",
13526 errors.ECODE_INVAL)
13527 for row in self.op.disks:
13528 if (not isinstance(row, dict) or
13529 constants.IDISK_SIZE not in row or
13530 not isinstance(row[constants.IDISK_SIZE], int) or
13531 constants.IDISK_MODE not in row or
13532 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13533 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13534 " parameter", errors.ECODE_INVAL)
13535 if self.op.hypervisor is None:
13536 self.op.hypervisor = self.cfg.GetHypervisorType()
13537 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13538 fname = _ExpandInstanceName(self.cfg, self.op.name)
13539 self.op.name = fname
13540 self.relocate_from = \
13541 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13542 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13543 constants.IALLOCATOR_MODE_NODE_EVAC):
13544 if not self.op.instances:
13545 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13546 self.op.instances = _GetWantedInstances(self, self.op.instances)
13548 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13549 self.op.mode, errors.ECODE_INVAL)
13551 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13552 if self.op.allocator is None:
13553 raise errors.OpPrereqError("Missing allocator name",
13554 errors.ECODE_INVAL)
13555 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13556 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13557 self.op.direction, errors.ECODE_INVAL)
13559 def Exec(self, feedback_fn):
13560 """Run the allocator test.
13563 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13564 ial = IAllocator(self.cfg, self.rpc,
13567 memory=self.op.memory,
13568 disks=self.op.disks,
13569 disk_template=self.op.disk_template,
13573 vcpus=self.op.vcpus,
13574 hypervisor=self.op.hypervisor,
13576 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13577 ial = IAllocator(self.cfg, self.rpc,
13580 relocate_from=list(self.relocate_from),
13582 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13583 ial = IAllocator(self.cfg, self.rpc,
13585 instances=self.op.instances,
13586 target_groups=self.op.target_groups)
13587 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13588 ial = IAllocator(self.cfg, self.rpc,
13590 instances=self.op.instances,
13591 evac_mode=self.op.evac_mode)
13593 raise errors.ProgrammerError("Uncatched mode %s in"
13594 " LUTestAllocator.Exec", self.op.mode)
13596 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13597 result = ial.in_text
13599 ial.Run(self.op.allocator, validate=False)
13600 result = ial.out_text
13604 #: Query type implementations
13606 constants.QR_INSTANCE: _InstanceQuery,
13607 constants.QR_NODE: _NodeQuery,
13608 constants.QR_GROUP: _GroupQuery,
13609 constants.QR_OS: _OsQuery,
13612 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13615 def _GetQueryImplementation(name):
13616 """Returns the implemtnation for a query type.
13618 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13622 return _QUERY_IMPL[name]
13624 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13625 errors.ECODE_INVAL)