4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import serializer
52 from ganeti import ssconf
53 from ganeti import uidpool
54 from ganeti import compat
55 from ganeti import masterd
56 from ganeti import netutils
57 from ganeti import query
58 from ganeti import qlang
59 from ganeti import opcodes
61 from ganeti import runtime
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
562 """Checks if node groups for locked instances are still correct.
564 @type cfg: L{config.ConfigWriter}
565 @param cfg: Cluster configuration
566 @type instances: dict; string as key, L{objects.Instance} as value
567 @param instances: Dictionary, instance name as key, instance object as value
568 @type owned_groups: iterable of string
569 @param owned_groups: List of owned groups
570 @type owned_nodes: iterable of string
571 @param owned_nodes: List of owned nodes
572 @type cur_group_uuid: string or None
573 @param cur_group_uuid: Optional group UUID to check against instance's groups
576 for (name, inst) in instances.items():
577 assert owned_nodes.issuperset(inst.all_nodes), \
578 "Instance %s's nodes changed while we kept the lock" % name
580 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
582 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
583 "Instance %s has no node in group %s" % (name, cur_group_uuid)
586 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
587 """Checks if the owned node groups are still correct for an instance.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type instance_name: string
592 @param instance_name: Instance name
593 @type owned_groups: set or frozenset
594 @param owned_groups: List of currently owned node groups
597 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
599 if not owned_groups.issuperset(inst_groups):
600 raise errors.OpPrereqError("Instance %s's node groups changed since"
601 " locks were acquired, current groups are"
602 " are '%s', owning groups '%s'; retry the"
605 utils.CommaJoin(inst_groups),
606 utils.CommaJoin(owned_groups)),
612 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
613 """Checks if the instances in a node group are still correct.
615 @type cfg: L{config.ConfigWriter}
616 @param cfg: The cluster configuration
617 @type group_uuid: string
618 @param group_uuid: Node group UUID
619 @type owned_instances: set or frozenset
620 @param owned_instances: List of currently owned instances
623 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
624 if owned_instances != wanted_instances:
625 raise errors.OpPrereqError("Instances in node group '%s' changed since"
626 " locks were acquired, wanted '%s', have '%s';"
627 " retry the operation" %
629 utils.CommaJoin(wanted_instances),
630 utils.CommaJoin(owned_instances)),
633 return wanted_instances
636 def _SupportsOob(cfg, node):
637 """Tells if node supports OOB.
639 @type cfg: L{config.ConfigWriter}
640 @param cfg: The cluster configuration
641 @type node: L{objects.Node}
642 @param node: The node
643 @return: The OOB script if supported or an empty string otherwise
646 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
649 def _GetWantedNodes(lu, nodes):
650 """Returns list of checked and expanded node names.
652 @type lu: L{LogicalUnit}
653 @param lu: the logical unit on whose behalf we execute
655 @param nodes: list of node names or None for all nodes
657 @return: the list of nodes, sorted
658 @raise errors.ProgrammerError: if the nodes parameter is wrong type
662 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
664 return utils.NiceSort(lu.cfg.GetNodeList())
667 def _GetWantedInstances(lu, instances):
668 """Returns list of checked and expanded instance names.
670 @type lu: L{LogicalUnit}
671 @param lu: the logical unit on whose behalf we execute
672 @type instances: list
673 @param instances: list of instance names or None for all instances
675 @return: the list of instances, sorted
676 @raise errors.OpPrereqError: if the instances parameter is wrong type
677 @raise errors.OpPrereqError: if any of the passed instances is not found
681 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
683 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
687 def _GetUpdatedParams(old_params, update_dict,
688 use_default=True, use_none=False):
689 """Return the new version of a parameter dictionary.
691 @type old_params: dict
692 @param old_params: old parameters
693 @type update_dict: dict
694 @param update_dict: dict containing new parameter values, or
695 constants.VALUE_DEFAULT to reset the parameter to its default
697 @param use_default: boolean
698 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
699 values as 'to be deleted' values
700 @param use_none: boolean
701 @type use_none: whether to recognise C{None} values as 'to be
704 @return: the new parameter dictionary
707 params_copy = copy.deepcopy(old_params)
708 for key, val in update_dict.iteritems():
709 if ((use_default and val == constants.VALUE_DEFAULT) or
710 (use_none and val is None)):
716 params_copy[key] = val
720 def _ReleaseLocks(lu, level, names=None, keep=None):
721 """Releases locks owned by an LU.
723 @type lu: L{LogicalUnit}
724 @param level: Lock level
725 @type names: list or None
726 @param names: Names of locks to release
727 @type keep: list or None
728 @param keep: Names of locks to retain
731 assert not (keep is not None and names is not None), \
732 "Only one of the 'names' and the 'keep' parameters can be given"
734 if names is not None:
735 should_release = names.__contains__
737 should_release = lambda name: name not in keep
739 should_release = None
745 # Determine which locks to release
746 for name in lu.owned_locks(level):
747 if should_release(name):
752 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
754 # Release just some locks
755 lu.glm.release(level, names=release)
757 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
760 lu.glm.release(level)
762 assert not lu.glm.is_owned(level), "No locks should be owned"
765 def _MapInstanceDisksToNodes(instances):
766 """Creates a map from (node, volume) to instance name.
768 @type instances: list of L{objects.Instance}
769 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
772 return dict(((node, vol), inst.name)
773 for inst in instances
774 for (node, vols) in inst.MapLVsByNode().items()
778 def _RunPostHook(lu, node_name):
779 """Runs the post-hook for an opcode on a single node.
782 hm = lu.proc.BuildHooksManager(lu)
784 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
786 # pylint: disable=W0702
787 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
790 def _CheckOutputFields(static, dynamic, selected):
791 """Checks whether all selected fields are valid.
793 @type static: L{utils.FieldSet}
794 @param static: static fields set
795 @type dynamic: L{utils.FieldSet}
796 @param dynamic: dynamic fields set
803 delta = f.NonMatching(selected)
805 raise errors.OpPrereqError("Unknown output fields selected: %s"
806 % ",".join(delta), errors.ECODE_INVAL)
809 def _CheckGlobalHvParams(params):
810 """Validates that given hypervisor params are not global ones.
812 This will ensure that instances don't get customised versions of
816 used_globals = constants.HVC_GLOBALS.intersection(params)
818 msg = ("The following hypervisor parameters are global and cannot"
819 " be customized at instance level, please modify them at"
820 " cluster level: %s" % utils.CommaJoin(used_globals))
821 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
824 def _CheckNodeOnline(lu, node, msg=None):
825 """Ensure that a given node is online.
827 @param lu: the LU on behalf of which we make the check
828 @param node: the node to check
829 @param msg: if passed, should be a message to replace the default one
830 @raise errors.OpPrereqError: if the node is offline
834 msg = "Can't use offline node"
835 if lu.cfg.GetNodeInfo(node).offline:
836 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
839 def _CheckNodeNotDrained(lu, node):
840 """Ensure that a given node is not drained.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @raise errors.OpPrereqError: if the node is drained
847 if lu.cfg.GetNodeInfo(node).drained:
848 raise errors.OpPrereqError("Can't use drained node %s" % node,
852 def _CheckNodeVmCapable(lu, node):
853 """Ensure that a given node is vm capable.
855 @param lu: the LU on behalf of which we make the check
856 @param node: the node to check
857 @raise errors.OpPrereqError: if the node is not vm capable
860 if not lu.cfg.GetNodeInfo(node).vm_capable:
861 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
865 def _CheckNodeHasOS(lu, node, os_name, force_variant):
866 """Ensure that a node supports a given OS.
868 @param lu: the LU on behalf of which we make the check
869 @param node: the node to check
870 @param os_name: the OS to query about
871 @param force_variant: whether to ignore variant errors
872 @raise errors.OpPrereqError: if the node is not supporting the OS
875 result = lu.rpc.call_os_get(node, os_name)
876 result.Raise("OS '%s' not in supported OS list for node %s" %
878 prereq=True, ecode=errors.ECODE_INVAL)
879 if not force_variant:
880 _CheckOSVariant(result.payload, os_name)
883 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
884 """Ensure that a node has the given secondary ip.
886 @type lu: L{LogicalUnit}
887 @param lu: the LU on behalf of which we make the check
889 @param node: the node to check
890 @type secondary_ip: string
891 @param secondary_ip: the ip to check
892 @type prereq: boolean
893 @param prereq: whether to throw a prerequisite or an execute error
894 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
895 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
898 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
899 result.Raise("Failure checking secondary ip on node %s" % node,
900 prereq=prereq, ecode=errors.ECODE_ENVIRON)
901 if not result.payload:
902 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
903 " please fix and re-run this command" % secondary_ip)
905 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
907 raise errors.OpExecError(msg)
910 def _GetClusterDomainSecret():
911 """Reads the cluster domain secret.
914 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
918 def _CheckInstanceDown(lu, instance, reason):
919 """Ensure that an instance is not running."""
920 if instance.admin_up:
921 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
922 (instance.name, reason), errors.ECODE_STATE)
924 pnode = instance.primary_node
925 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
926 ins_l.Raise("Can't contact node %s for instance information" % pnode,
927 prereq=True, ecode=errors.ECODE_ENVIRON)
929 if instance.name in ins_l.payload:
930 raise errors.OpPrereqError("Instance %s is running, %s" %
931 (instance.name, reason), errors.ECODE_STATE)
934 def _ExpandItemName(fn, name, kind):
935 """Expand an item name.
937 @param fn: the function to use for expansion
938 @param name: requested item name
939 @param kind: text description ('Node' or 'Instance')
940 @return: the resolved (full) name
941 @raise errors.OpPrereqError: if the item is not found
945 if full_name is None:
946 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
951 def _ExpandNodeName(cfg, name):
952 """Wrapper over L{_ExpandItemName} for nodes."""
953 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
956 def _ExpandInstanceName(cfg, name):
957 """Wrapper over L{_ExpandItemName} for instance."""
958 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
961 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
962 memory, vcpus, nics, disk_template, disks,
963 bep, hvp, hypervisor_name, tags):
964 """Builds instance related env variables for hooks
966 This builds the hook environment from individual variables.
969 @param name: the name of the instance
970 @type primary_node: string
971 @param primary_node: the name of the instance's primary node
972 @type secondary_nodes: list
973 @param secondary_nodes: list of secondary nodes as strings
974 @type os_type: string
975 @param os_type: the name of the instance's OS
976 @type status: boolean
977 @param status: the should_run status of the instance
979 @param memory: the memory size of the instance
981 @param vcpus: the count of VCPUs the instance has
983 @param nics: list of tuples (ip, mac, mode, link) representing
984 the NICs the instance has
985 @type disk_template: string
986 @param disk_template: the disk template of the instance
988 @param disks: the list of (size, mode) pairs
990 @param bep: the backend parameters for the instance
992 @param hvp: the hypervisor parameters for the instance
993 @type hypervisor_name: string
994 @param hypervisor_name: the hypervisor for the instance
996 @param tags: list of instance tags as strings
998 @return: the hook environment for this instance
1007 "INSTANCE_NAME": name,
1008 "INSTANCE_PRIMARY": primary_node,
1009 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1010 "INSTANCE_OS_TYPE": os_type,
1011 "INSTANCE_STATUS": str_status,
1012 "INSTANCE_MEMORY": memory,
1013 "INSTANCE_VCPUS": vcpus,
1014 "INSTANCE_DISK_TEMPLATE": disk_template,
1015 "INSTANCE_HYPERVISOR": hypervisor_name,
1019 nic_count = len(nics)
1020 for idx, (ip, mac, mode, link) in enumerate(nics):
1023 env["INSTANCE_NIC%d_IP" % idx] = ip
1024 env["INSTANCE_NIC%d_MAC" % idx] = mac
1025 env["INSTANCE_NIC%d_MODE" % idx] = mode
1026 env["INSTANCE_NIC%d_LINK" % idx] = link
1027 if mode == constants.NIC_MODE_BRIDGED:
1028 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1032 env["INSTANCE_NIC_COUNT"] = nic_count
1035 disk_count = len(disks)
1036 for idx, (size, mode) in enumerate(disks):
1037 env["INSTANCE_DISK%d_SIZE" % idx] = size
1038 env["INSTANCE_DISK%d_MODE" % idx] = mode
1042 env["INSTANCE_DISK_COUNT"] = disk_count
1047 env["INSTANCE_TAGS"] = " ".join(tags)
1049 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1050 for key, value in source.items():
1051 env["INSTANCE_%s_%s" % (kind, key)] = value
1056 def _NICListToTuple(lu, nics):
1057 """Build a list of nic information tuples.
1059 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1060 value in LUInstanceQueryData.
1062 @type lu: L{LogicalUnit}
1063 @param lu: the logical unit on whose behalf we execute
1064 @type nics: list of L{objects.NIC}
1065 @param nics: list of nics to convert to hooks tuples
1069 cluster = lu.cfg.GetClusterInfo()
1073 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1074 mode = filled_params[constants.NIC_MODE]
1075 link = filled_params[constants.NIC_LINK]
1076 hooks_nics.append((ip, mac, mode, link))
1080 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1081 """Builds instance related env variables for hooks from an object.
1083 @type lu: L{LogicalUnit}
1084 @param lu: the logical unit on whose behalf we execute
1085 @type instance: L{objects.Instance}
1086 @param instance: the instance for which we should build the
1088 @type override: dict
1089 @param override: dictionary with key/values that will override
1092 @return: the hook environment dictionary
1095 cluster = lu.cfg.GetClusterInfo()
1096 bep = cluster.FillBE(instance)
1097 hvp = cluster.FillHV(instance)
1099 "name": instance.name,
1100 "primary_node": instance.primary_node,
1101 "secondary_nodes": instance.secondary_nodes,
1102 "os_type": instance.os,
1103 "status": instance.admin_up,
1104 "memory": bep[constants.BE_MEMORY],
1105 "vcpus": bep[constants.BE_VCPUS],
1106 "nics": _NICListToTuple(lu, instance.nics),
1107 "disk_template": instance.disk_template,
1108 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1111 "hypervisor_name": instance.hypervisor,
1112 "tags": instance.tags,
1115 args.update(override)
1116 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1119 def _AdjustCandidatePool(lu, exceptions):
1120 """Adjust the candidate pool after node operations.
1123 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1125 lu.LogInfo("Promoted nodes to master candidate role: %s",
1126 utils.CommaJoin(node.name for node in mod_list))
1127 for name in mod_list:
1128 lu.context.ReaddNode(name)
1129 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1131 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1135 def _DecideSelfPromotion(lu, exceptions=None):
1136 """Decide whether I should promote myself as a master candidate.
1139 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1140 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1141 # the new node will increase mc_max with one, so:
1142 mc_should = min(mc_should + 1, cp_size)
1143 return mc_now < mc_should
1146 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1147 """Check that the brigdes needed by a list of nics exist.
1150 cluster = lu.cfg.GetClusterInfo()
1151 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1152 brlist = [params[constants.NIC_LINK] for params in paramslist
1153 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1155 result = lu.rpc.call_bridges_exist(target_node, brlist)
1156 result.Raise("Error checking bridges on destination node '%s'" %
1157 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1160 def _CheckInstanceBridgesExist(lu, instance, node=None):
1161 """Check that the brigdes needed by an instance exist.
1165 node = instance.primary_node
1166 _CheckNicsBridgesExist(lu, instance.nics, node)
1169 def _CheckOSVariant(os_obj, name):
1170 """Check whether an OS name conforms to the os variants specification.
1172 @type os_obj: L{objects.OS}
1173 @param os_obj: OS object to check
1175 @param name: OS name passed by the user, to check for validity
1178 variant = objects.OS.GetVariant(name)
1179 if not os_obj.supported_variants:
1181 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1182 " passed)" % (os_obj.name, variant),
1186 raise errors.OpPrereqError("OS name must include a variant",
1189 if variant not in os_obj.supported_variants:
1190 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1193 def _GetNodeInstancesInner(cfg, fn):
1194 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1197 def _GetNodeInstances(cfg, node_name):
1198 """Returns a list of all primary and secondary instances on a node.
1202 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1205 def _GetNodePrimaryInstances(cfg, node_name):
1206 """Returns primary instances on a node.
1209 return _GetNodeInstancesInner(cfg,
1210 lambda inst: node_name == inst.primary_node)
1213 def _GetNodeSecondaryInstances(cfg, node_name):
1214 """Returns secondary instances on a node.
1217 return _GetNodeInstancesInner(cfg,
1218 lambda inst: node_name in inst.secondary_nodes)
1221 def _GetStorageTypeArgs(cfg, storage_type):
1222 """Returns the arguments for a storage type.
1225 # Special case for file storage
1226 if storage_type == constants.ST_FILE:
1227 # storage.FileStorage wants a list of storage directories
1228 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1233 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1236 for dev in instance.disks:
1237 cfg.SetDiskID(dev, node_name)
1239 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1240 result.Raise("Failed to get disk status from node %s" % node_name,
1241 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1243 for idx, bdev_status in enumerate(result.payload):
1244 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1250 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1251 """Check the sanity of iallocator and node arguments and use the
1252 cluster-wide iallocator if appropriate.
1254 Check that at most one of (iallocator, node) is specified. If none is
1255 specified, then the LU's opcode's iallocator slot is filled with the
1256 cluster-wide default iallocator.
1258 @type iallocator_slot: string
1259 @param iallocator_slot: the name of the opcode iallocator slot
1260 @type node_slot: string
1261 @param node_slot: the name of the opcode target node slot
1264 node = getattr(lu.op, node_slot, None)
1265 iallocator = getattr(lu.op, iallocator_slot, None)
1267 if node is not None and iallocator is not None:
1268 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1270 elif node is None and iallocator is None:
1271 default_iallocator = lu.cfg.GetDefaultIAllocator()
1272 if default_iallocator:
1273 setattr(lu.op, iallocator_slot, default_iallocator)
1275 raise errors.OpPrereqError("No iallocator or node given and no"
1276 " cluster-wide default iallocator found;"
1277 " please specify either an iallocator or a"
1278 " node, or set a cluster-wide default"
1282 def _GetDefaultIAllocator(cfg, iallocator):
1283 """Decides on which iallocator to use.
1285 @type cfg: L{config.ConfigWriter}
1286 @param cfg: Cluster configuration object
1287 @type iallocator: string or None
1288 @param iallocator: Iallocator specified in opcode
1290 @return: Iallocator name
1294 # Use default iallocator
1295 iallocator = cfg.GetDefaultIAllocator()
1298 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1299 " opcode nor as a cluster-wide default",
1305 class LUClusterPostInit(LogicalUnit):
1306 """Logical unit for running hooks after cluster initialization.
1309 HPATH = "cluster-init"
1310 HTYPE = constants.HTYPE_CLUSTER
1312 def BuildHooksEnv(self):
1317 "OP_TARGET": self.cfg.GetClusterName(),
1320 def BuildHooksNodes(self):
1321 """Build hooks nodes.
1324 return ([], [self.cfg.GetMasterNode()])
1326 def Exec(self, feedback_fn):
1333 class LUClusterDestroy(LogicalUnit):
1334 """Logical unit for destroying the cluster.
1337 HPATH = "cluster-destroy"
1338 HTYPE = constants.HTYPE_CLUSTER
1340 def BuildHooksEnv(self):
1345 "OP_TARGET": self.cfg.GetClusterName(),
1348 def BuildHooksNodes(self):
1349 """Build hooks nodes.
1354 def CheckPrereq(self):
1355 """Check prerequisites.
1357 This checks whether the cluster is empty.
1359 Any errors are signaled by raising errors.OpPrereqError.
1362 master = self.cfg.GetMasterNode()
1364 nodelist = self.cfg.GetNodeList()
1365 if len(nodelist) != 1 or nodelist[0] != master:
1366 raise errors.OpPrereqError("There are still %d node(s) in"
1367 " this cluster." % (len(nodelist) - 1),
1369 instancelist = self.cfg.GetInstanceList()
1371 raise errors.OpPrereqError("There are still %d instance(s) in"
1372 " this cluster." % len(instancelist),
1375 def Exec(self, feedback_fn):
1376 """Destroys the cluster.
1379 master = self.cfg.GetMasterNode()
1381 # Run post hooks on master node before it's removed
1382 _RunPostHook(self, master)
1384 result = self.rpc.call_node_deactivate_master_ip(master)
1385 result.Raise("Could not disable the master role")
1390 def _VerifyCertificate(filename):
1391 """Verifies a certificate for L{LUClusterVerifyConfig}.
1393 @type filename: string
1394 @param filename: Path to PEM file
1398 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1399 utils.ReadFile(filename))
1400 except Exception, err: # pylint: disable=W0703
1401 return (LUClusterVerifyConfig.ETYPE_ERROR,
1402 "Failed to load X509 certificate %s: %s" % (filename, err))
1405 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1406 constants.SSL_CERT_EXPIRATION_ERROR)
1409 fnamemsg = "While verifying %s: %s" % (filename, msg)
1414 return (None, fnamemsg)
1415 elif errcode == utils.CERT_WARNING:
1416 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1417 elif errcode == utils.CERT_ERROR:
1418 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1420 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1423 def _GetAllHypervisorParameters(cluster, instances):
1424 """Compute the set of all hypervisor parameters.
1426 @type cluster: L{objects.Cluster}
1427 @param cluster: the cluster object
1428 @param instances: list of L{objects.Instance}
1429 @param instances: additional instances from which to obtain parameters
1430 @rtype: list of (origin, hypervisor, parameters)
1431 @return: a list with all parameters found, indicating the hypervisor they
1432 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1437 for hv_name in cluster.enabled_hypervisors:
1438 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1440 for os_name, os_hvp in cluster.os_hvp.items():
1441 for hv_name, hv_params in os_hvp.items():
1443 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1444 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1446 # TODO: collapse identical parameter values in a single one
1447 for instance in instances:
1448 if instance.hvparams:
1449 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1450 cluster.FillHV(instance)))
1455 class _VerifyErrors(object):
1456 """Mix-in for cluster/group verify LUs.
1458 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1459 self.op and self._feedback_fn to be available.)
1462 TCLUSTER = "cluster"
1464 TINSTANCE = "instance"
1466 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1467 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1468 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1469 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1470 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1471 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1472 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1473 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1474 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1475 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1476 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1477 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1478 ENODEDRBD = (TNODE, "ENODEDRBD")
1479 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1480 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1481 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1482 ENODEHV = (TNODE, "ENODEHV")
1483 ENODELVM = (TNODE, "ENODELVM")
1484 ENODEN1 = (TNODE, "ENODEN1")
1485 ENODENET = (TNODE, "ENODENET")
1486 ENODEOS = (TNODE, "ENODEOS")
1487 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1488 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1489 ENODERPC = (TNODE, "ENODERPC")
1490 ENODESSH = (TNODE, "ENODESSH")
1491 ENODEVERSION = (TNODE, "ENODEVERSION")
1492 ENODESETUP = (TNODE, "ENODESETUP")
1493 ENODETIME = (TNODE, "ENODETIME")
1494 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1496 ETYPE_FIELD = "code"
1497 ETYPE_ERROR = "ERROR"
1498 ETYPE_WARNING = "WARNING"
1500 def _Error(self, ecode, item, msg, *args, **kwargs):
1501 """Format an error message.
1503 Based on the opcode's error_codes parameter, either format a
1504 parseable error code, or a simpler error string.
1506 This must be called only from Exec and functions called from Exec.
1509 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1511 # first complete the msg
1514 # then format the whole message
1515 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1516 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1522 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1523 # and finally report it via the feedback_fn
1524 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1526 def _ErrorIf(self, cond, *args, **kwargs):
1527 """Log an error message if the passed condition is True.
1531 or self.op.debug_simulate_errors) # pylint: disable=E1101
1533 self._Error(*args, **kwargs)
1534 # do not mark the operation as failed for WARN cases only
1535 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1536 self.bad = self.bad or cond
1539 class LUClusterVerify(NoHooksLU):
1540 """Submits all jobs necessary to verify the cluster.
1545 def ExpandNames(self):
1546 self.needed_locks = {}
1548 def Exec(self, feedback_fn):
1551 if self.op.group_name:
1552 groups = [self.op.group_name]
1553 depends_fn = lambda: None
1555 groups = self.cfg.GetNodeGroupList()
1557 # Verify global configuration
1558 jobs.append([opcodes.OpClusterVerifyConfig()])
1560 # Always depend on global verification
1561 depends_fn = lambda: [(-len(jobs), [])]
1563 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564 depends=depends_fn())]
1565 for group in groups)
1567 # Fix up all parameters
1568 for op in itertools.chain(*jobs): # pylint: disable=W0142
1569 op.debug_simulate_errors = self.op.debug_simulate_errors
1570 op.verbose = self.op.verbose
1571 op.error_codes = self.op.error_codes
1573 op.skip_checks = self.op.skip_checks
1574 except AttributeError:
1575 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1577 return ResultWithJobs(jobs)
1580 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1581 """Verifies the cluster config.
1586 def _VerifyHVP(self, hvp_data):
1587 """Verifies locally the syntax of the hypervisor parameters.
1590 for item, hv_name, hv_params in hvp_data:
1591 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1594 hv_class = hypervisor.GetHypervisor(hv_name)
1595 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1596 hv_class.CheckParameterSyntax(hv_params)
1597 except errors.GenericError, err:
1598 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1600 def ExpandNames(self):
1601 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1602 self.share_locks = _ShareAll()
1604 def CheckPrereq(self):
1605 """Check prerequisites.
1608 # Retrieve all information
1609 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1610 self.all_node_info = self.cfg.GetAllNodesInfo()
1611 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1613 def Exec(self, feedback_fn):
1614 """Verify integrity of cluster, performing various test on nodes.
1618 self._feedback_fn = feedback_fn
1620 feedback_fn("* Verifying cluster config")
1622 for msg in self.cfg.VerifyConfig():
1623 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1625 feedback_fn("* Verifying cluster certificate files")
1627 for cert_filename in constants.ALL_CERT_FILES:
1628 (errcode, msg) = _VerifyCertificate(cert_filename)
1629 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1631 feedback_fn("* Verifying hypervisor parameters")
1633 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1634 self.all_inst_info.values()))
1636 feedback_fn("* Verifying all nodes belong to an existing group")
1638 # We do this verification here because, should this bogus circumstance
1639 # occur, it would never be caught by VerifyGroup, which only acts on
1640 # nodes/instances reachable from existing node groups.
1642 dangling_nodes = set(node.name for node in self.all_node_info.values()
1643 if node.group not in self.all_group_info)
1645 dangling_instances = {}
1646 no_node_instances = []
1648 for inst in self.all_inst_info.values():
1649 if inst.primary_node in dangling_nodes:
1650 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1651 elif inst.primary_node not in self.all_node_info:
1652 no_node_instances.append(inst.name)
1657 utils.CommaJoin(dangling_instances.get(node.name,
1659 for node in dangling_nodes]
1661 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1662 "the following nodes (and their instances) belong to a non"
1663 " existing group: %s", utils.CommaJoin(pretty_dangling))
1665 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1666 "the following instances have a non-existing primary-node:"
1667 " %s", utils.CommaJoin(no_node_instances))
1672 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1673 """Verifies the status of a node group.
1676 HPATH = "cluster-verify"
1677 HTYPE = constants.HTYPE_CLUSTER
1680 _HOOKS_INDENT_RE = re.compile("^", re.M)
1682 class NodeImage(object):
1683 """A class representing the logical and physical status of a node.
1686 @ivar name: the node name to which this object refers
1687 @ivar volumes: a structure as returned from
1688 L{ganeti.backend.GetVolumeList} (runtime)
1689 @ivar instances: a list of running instances (runtime)
1690 @ivar pinst: list of configured primary instances (config)
1691 @ivar sinst: list of configured secondary instances (config)
1692 @ivar sbp: dictionary of {primary-node: list of instances} for all
1693 instances for which this node is secondary (config)
1694 @ivar mfree: free memory, as reported by hypervisor (runtime)
1695 @ivar dfree: free disk, as reported by the node (runtime)
1696 @ivar offline: the offline status (config)
1697 @type rpc_fail: boolean
1698 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1699 not whether the individual keys were correct) (runtime)
1700 @type lvm_fail: boolean
1701 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1702 @type hyp_fail: boolean
1703 @ivar hyp_fail: whether the RPC call didn't return the instance list
1704 @type ghost: boolean
1705 @ivar ghost: whether this is a known node or not (config)
1706 @type os_fail: boolean
1707 @ivar os_fail: whether the RPC call didn't return valid OS data
1709 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1710 @type vm_capable: boolean
1711 @ivar vm_capable: whether the node can host instances
1714 def __init__(self, offline=False, name=None, vm_capable=True):
1723 self.offline = offline
1724 self.vm_capable = vm_capable
1725 self.rpc_fail = False
1726 self.lvm_fail = False
1727 self.hyp_fail = False
1729 self.os_fail = False
1732 def ExpandNames(self):
1733 # This raises errors.OpPrereqError on its own:
1734 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1736 # Get instances in node group; this is unsafe and needs verification later
1738 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1740 self.needed_locks = {
1741 locking.LEVEL_INSTANCE: inst_names,
1742 locking.LEVEL_NODEGROUP: [self.group_uuid],
1743 locking.LEVEL_NODE: [],
1746 self.share_locks = _ShareAll()
1748 def DeclareLocks(self, level):
1749 if level == locking.LEVEL_NODE:
1750 # Get members of node group; this is unsafe and needs verification later
1751 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1753 all_inst_info = self.cfg.GetAllInstancesInfo()
1755 # In Exec(), we warn about mirrored instances that have primary and
1756 # secondary living in separate node groups. To fully verify that
1757 # volumes for these instances are healthy, we will need to do an
1758 # extra call to their secondaries. We ensure here those nodes will
1760 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1761 # Important: access only the instances whose lock is owned
1762 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1763 nodes.update(all_inst_info[inst].secondary_nodes)
1765 self.needed_locks[locking.LEVEL_NODE] = nodes
1767 def CheckPrereq(self):
1768 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1769 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1771 group_nodes = set(self.group_info.members)
1773 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1776 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1778 unlocked_instances = \
1779 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1782 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1783 utils.CommaJoin(unlocked_nodes),
1786 if unlocked_instances:
1787 raise errors.OpPrereqError("Missing lock for instances: %s" %
1788 utils.CommaJoin(unlocked_instances),
1791 self.all_node_info = self.cfg.GetAllNodesInfo()
1792 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1794 self.my_node_names = utils.NiceSort(group_nodes)
1795 self.my_inst_names = utils.NiceSort(group_instances)
1797 self.my_node_info = dict((name, self.all_node_info[name])
1798 for name in self.my_node_names)
1800 self.my_inst_info = dict((name, self.all_inst_info[name])
1801 for name in self.my_inst_names)
1803 # We detect here the nodes that will need the extra RPC calls for verifying
1804 # split LV volumes; they should be locked.
1805 extra_lv_nodes = set()
1807 for inst in self.my_inst_info.values():
1808 if inst.disk_template in constants.DTS_INT_MIRROR:
1809 for nname in inst.all_nodes:
1810 if self.all_node_info[nname].group != self.group_uuid:
1811 extra_lv_nodes.add(nname)
1813 unlocked_lv_nodes = \
1814 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1816 if unlocked_lv_nodes:
1817 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1818 utils.CommaJoin(unlocked_lv_nodes),
1820 self.extra_lv_nodes = list(extra_lv_nodes)
1822 def _VerifyNode(self, ninfo, nresult):
1823 """Perform some basic validation on data returned from a node.
1825 - check the result data structure is well formed and has all the
1827 - check ganeti version
1829 @type ninfo: L{objects.Node}
1830 @param ninfo: the node to check
1831 @param nresult: the results from the node
1833 @return: whether overall this call was successful (and we can expect
1834 reasonable values in the respose)
1838 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1840 # main result, nresult should be a non-empty dict
1841 test = not nresult or not isinstance(nresult, dict)
1842 _ErrorIf(test, self.ENODERPC, node,
1843 "unable to verify node: no data returned")
1847 # compares ganeti version
1848 local_version = constants.PROTOCOL_VERSION
1849 remote_version = nresult.get("version", None)
1850 test = not (remote_version and
1851 isinstance(remote_version, (list, tuple)) and
1852 len(remote_version) == 2)
1853 _ErrorIf(test, self.ENODERPC, node,
1854 "connection to node returned invalid data")
1858 test = local_version != remote_version[0]
1859 _ErrorIf(test, self.ENODEVERSION, node,
1860 "incompatible protocol versions: master %s,"
1861 " node %s", local_version, remote_version[0])
1865 # node seems compatible, we can actually try to look into its results
1867 # full package version
1868 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1869 self.ENODEVERSION, node,
1870 "software version mismatch: master %s, node %s",
1871 constants.RELEASE_VERSION, remote_version[1],
1872 code=self.ETYPE_WARNING)
1874 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1875 if ninfo.vm_capable and isinstance(hyp_result, dict):
1876 for hv_name, hv_result in hyp_result.iteritems():
1877 test = hv_result is not None
1878 _ErrorIf(test, self.ENODEHV, node,
1879 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1881 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1882 if ninfo.vm_capable and isinstance(hvp_result, list):
1883 for item, hv_name, hv_result in hvp_result:
1884 _ErrorIf(True, self.ENODEHV, node,
1885 "hypervisor %s parameter verify failure (source %s): %s",
1886 hv_name, item, hv_result)
1888 test = nresult.get(constants.NV_NODESETUP,
1889 ["Missing NODESETUP results"])
1890 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1895 def _VerifyNodeTime(self, ninfo, nresult,
1896 nvinfo_starttime, nvinfo_endtime):
1897 """Check the node time.
1899 @type ninfo: L{objects.Node}
1900 @param ninfo: the node to check
1901 @param nresult: the remote results for the node
1902 @param nvinfo_starttime: the start time of the RPC call
1903 @param nvinfo_endtime: the end time of the RPC call
1907 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1909 ntime = nresult.get(constants.NV_TIME, None)
1911 ntime_merged = utils.MergeTime(ntime)
1912 except (ValueError, TypeError):
1913 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1916 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1917 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1918 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1919 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1923 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1924 "Node time diverges by at least %s from master node time",
1927 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1928 """Check the node LVM results.
1930 @type ninfo: L{objects.Node}
1931 @param ninfo: the node to check
1932 @param nresult: the remote results for the node
1933 @param vg_name: the configured VG name
1940 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1942 # checks vg existence and size > 20G
1943 vglist = nresult.get(constants.NV_VGLIST, None)
1945 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1947 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1948 constants.MIN_VG_SIZE)
1949 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1952 pvlist = nresult.get(constants.NV_PVLIST, None)
1953 test = pvlist is None
1954 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1956 # check that ':' is not present in PV names, since it's a
1957 # special character for lvcreate (denotes the range of PEs to
1959 for _, pvname, owner_vg in pvlist:
1960 test = ":" in pvname
1961 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1962 " '%s' of VG '%s'", pvname, owner_vg)
1964 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1965 """Check the node bridges.
1967 @type ninfo: L{objects.Node}
1968 @param ninfo: the node to check
1969 @param nresult: the remote results for the node
1970 @param bridges: the expected list of bridges
1977 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1979 missing = nresult.get(constants.NV_BRIDGES, None)
1980 test = not isinstance(missing, list)
1981 _ErrorIf(test, self.ENODENET, node,
1982 "did not return valid bridge information")
1984 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1985 utils.CommaJoin(sorted(missing)))
1987 def _VerifyNodeNetwork(self, ninfo, nresult):
1988 """Check the node network connectivity results.
1990 @type ninfo: L{objects.Node}
1991 @param ninfo: the node to check
1992 @param nresult: the remote results for the node
1996 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1998 test = constants.NV_NODELIST not in nresult
1999 _ErrorIf(test, self.ENODESSH, node,
2000 "node hasn't returned node ssh connectivity data")
2002 if nresult[constants.NV_NODELIST]:
2003 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2004 _ErrorIf(True, self.ENODESSH, node,
2005 "ssh communication with node '%s': %s", a_node, a_msg)
2007 test = constants.NV_NODENETTEST not in nresult
2008 _ErrorIf(test, self.ENODENET, node,
2009 "node hasn't returned node tcp connectivity data")
2011 if nresult[constants.NV_NODENETTEST]:
2012 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2014 _ErrorIf(True, self.ENODENET, node,
2015 "tcp communication with node '%s': %s",
2016 anode, nresult[constants.NV_NODENETTEST][anode])
2018 test = constants.NV_MASTERIP not in nresult
2019 _ErrorIf(test, self.ENODENET, node,
2020 "node hasn't returned node master IP reachability data")
2022 if not nresult[constants.NV_MASTERIP]:
2023 if node == self.master_node:
2024 msg = "the master node cannot reach the master IP (not configured?)"
2026 msg = "cannot reach the master IP"
2027 _ErrorIf(True, self.ENODENET, node, msg)
2029 def _VerifyInstance(self, instance, instanceconfig, node_image,
2031 """Verify an instance.
2033 This function checks to see if the required block devices are
2034 available on the instance's node.
2037 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2038 node_current = instanceconfig.primary_node
2040 node_vol_should = {}
2041 instanceconfig.MapLVsByNode(node_vol_should)
2043 for node in node_vol_should:
2044 n_img = node_image[node]
2045 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2046 # ignore missing volumes on offline or broken nodes
2048 for volume in node_vol_should[node]:
2049 test = volume not in n_img.volumes
2050 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2051 "volume %s missing on node %s", volume, node)
2053 if instanceconfig.admin_up:
2054 pri_img = node_image[node_current]
2055 test = instance not in pri_img.instances and not pri_img.offline
2056 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2057 "instance not running on its primary node %s",
2060 diskdata = [(nname, success, status, idx)
2061 for (nname, disks) in diskstatus.items()
2062 for idx, (success, status) in enumerate(disks)]
2064 for nname, success, bdev_status, idx in diskdata:
2065 # the 'ghost node' construction in Exec() ensures that we have a
2067 snode = node_image[nname]
2068 bad_snode = snode.ghost or snode.offline
2069 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2070 self.EINSTANCEFAULTYDISK, instance,
2071 "couldn't retrieve status for disk/%s on %s: %s",
2072 idx, nname, bdev_status)
2073 _ErrorIf((instanceconfig.admin_up and success and
2074 bdev_status.ldisk_status == constants.LDS_FAULTY),
2075 self.EINSTANCEFAULTYDISK, instance,
2076 "disk/%s on %s is faulty", idx, nname)
2078 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2079 """Verify if there are any unknown volumes in the cluster.
2081 The .os, .swap and backup volumes are ignored. All other volumes are
2082 reported as unknown.
2084 @type reserved: L{ganeti.utils.FieldSet}
2085 @param reserved: a FieldSet of reserved volume names
2088 for node, n_img in node_image.items():
2089 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2090 self.all_node_info[node].group != self.group_uuid):
2091 # skip non-healthy nodes
2093 for volume in n_img.volumes:
2094 test = ((node not in node_vol_should or
2095 volume not in node_vol_should[node]) and
2096 not reserved.Matches(volume))
2097 self._ErrorIf(test, self.ENODEORPHANLV, node,
2098 "volume %s is unknown", volume)
2100 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2101 """Verify N+1 Memory Resilience.
2103 Check that if one single node dies we can still start all the
2104 instances it was primary for.
2107 cluster_info = self.cfg.GetClusterInfo()
2108 for node, n_img in node_image.items():
2109 # This code checks that every node which is now listed as
2110 # secondary has enough memory to host all instances it is
2111 # supposed to should a single other node in the cluster fail.
2112 # FIXME: not ready for failover to an arbitrary node
2113 # FIXME: does not support file-backed instances
2114 # WARNING: we currently take into account down instances as well
2115 # as up ones, considering that even if they're down someone
2116 # might want to start them even in the event of a node failure.
2117 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2118 # we're skipping nodes marked offline and nodes in other groups from
2119 # the N+1 warning, since most likely we don't have good memory
2120 # infromation from them; we already list instances living on such
2121 # nodes, and that's enough warning
2123 for prinode, instances in n_img.sbp.items():
2125 for instance in instances:
2126 bep = cluster_info.FillBE(instance_cfg[instance])
2127 if bep[constants.BE_AUTO_BALANCE]:
2128 needed_mem += bep[constants.BE_MEMORY]
2129 test = n_img.mfree < needed_mem
2130 self._ErrorIf(test, self.ENODEN1, node,
2131 "not enough memory to accomodate instance failovers"
2132 " should node %s fail (%dMiB needed, %dMiB available)",
2133 prinode, needed_mem, n_img.mfree)
2136 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2137 (files_all, files_opt, files_mc, files_vm)):
2138 """Verifies file checksums collected from all nodes.
2140 @param errorif: Callback for reporting errors
2141 @param nodeinfo: List of L{objects.Node} objects
2142 @param master_node: Name of master node
2143 @param all_nvinfo: RPC results
2146 # Define functions determining which nodes to consider for a file
2149 (files_mc, lambda node: (node.master_candidate or
2150 node.name == master_node)),
2151 (files_vm, lambda node: node.vm_capable),
2154 # Build mapping from filename to list of nodes which should have the file
2156 for (files, fn) in files2nodefn:
2158 filenodes = nodeinfo
2160 filenodes = filter(fn, nodeinfo)
2161 nodefiles.update((filename,
2162 frozenset(map(operator.attrgetter("name"), filenodes)))
2163 for filename in files)
2165 assert set(nodefiles) == (files_all | files_mc | files_vm)
2167 fileinfo = dict((filename, {}) for filename in nodefiles)
2168 ignore_nodes = set()
2170 for node in nodeinfo:
2172 ignore_nodes.add(node.name)
2175 nresult = all_nvinfo[node.name]
2177 if nresult.fail_msg or not nresult.payload:
2180 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2182 test = not (node_files and isinstance(node_files, dict))
2183 errorif(test, cls.ENODEFILECHECK, node.name,
2184 "Node did not return file checksum data")
2186 ignore_nodes.add(node.name)
2189 # Build per-checksum mapping from filename to nodes having it
2190 for (filename, checksum) in node_files.items():
2191 assert filename in nodefiles
2192 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2194 for (filename, checksums) in fileinfo.items():
2195 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2197 # Nodes having the file
2198 with_file = frozenset(node_name
2199 for nodes in fileinfo[filename].values()
2200 for node_name in nodes) - ignore_nodes
2202 expected_nodes = nodefiles[filename] - ignore_nodes
2204 # Nodes missing file
2205 missing_file = expected_nodes - with_file
2207 if filename in files_opt:
2209 errorif(missing_file and missing_file != expected_nodes,
2210 cls.ECLUSTERFILECHECK, None,
2211 "File %s is optional, but it must exist on all or no"
2212 " nodes (not found on %s)",
2213 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2215 # Non-optional files
2216 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2217 "File %s is missing from node(s) %s", filename,
2218 utils.CommaJoin(utils.NiceSort(missing_file)))
2220 # Warn if a node has a file it shouldn't
2221 unexpected = with_file - expected_nodes
2223 cls.ECLUSTERFILECHECK, None,
2224 "File %s should not exist on node(s) %s",
2225 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2227 # See if there are multiple versions of the file
2228 test = len(checksums) > 1
2230 variants = ["variant %s on %s" %
2231 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2232 for (idx, (checksum, nodes)) in
2233 enumerate(sorted(checksums.items()))]
2237 errorif(test, cls.ECLUSTERFILECHECK, None,
2238 "File %s found with %s different checksums (%s)",
2239 filename, len(checksums), "; ".join(variants))
2241 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2243 """Verifies and the node DRBD status.
2245 @type ninfo: L{objects.Node}
2246 @param ninfo: the node to check
2247 @param nresult: the remote results for the node
2248 @param instanceinfo: the dict of instances
2249 @param drbd_helper: the configured DRBD usermode helper
2250 @param drbd_map: the DRBD map as returned by
2251 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2255 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2258 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2259 test = (helper_result == None)
2260 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2261 "no drbd usermode helper returned")
2263 status, payload = helper_result
2265 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2266 "drbd usermode helper check unsuccessful: %s", payload)
2267 test = status and (payload != drbd_helper)
2268 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2269 "wrong drbd usermode helper: %s", payload)
2271 # compute the DRBD minors
2273 for minor, instance in drbd_map[node].items():
2274 test = instance not in instanceinfo
2275 _ErrorIf(test, self.ECLUSTERCFG, None,
2276 "ghost instance '%s' in temporary DRBD map", instance)
2277 # ghost instance should not be running, but otherwise we
2278 # don't give double warnings (both ghost instance and
2279 # unallocated minor in use)
2281 node_drbd[minor] = (instance, False)
2283 instance = instanceinfo[instance]
2284 node_drbd[minor] = (instance.name, instance.admin_up)
2286 # and now check them
2287 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2288 test = not isinstance(used_minors, (tuple, list))
2289 _ErrorIf(test, self.ENODEDRBD, node,
2290 "cannot parse drbd status file: %s", str(used_minors))
2292 # we cannot check drbd status
2295 for minor, (iname, must_exist) in node_drbd.items():
2296 test = minor not in used_minors and must_exist
2297 _ErrorIf(test, self.ENODEDRBD, node,
2298 "drbd minor %d of instance %s is not active", minor, iname)
2299 for minor in used_minors:
2300 test = minor not in node_drbd
2301 _ErrorIf(test, self.ENODEDRBD, node,
2302 "unallocated drbd minor %d is in use", minor)
2304 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2305 """Builds the node OS structures.
2307 @type ninfo: L{objects.Node}
2308 @param ninfo: the node to check
2309 @param nresult: the remote results for the node
2310 @param nimg: the node image object
2314 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2316 remote_os = nresult.get(constants.NV_OSLIST, None)
2317 test = (not isinstance(remote_os, list) or
2318 not compat.all(isinstance(v, list) and len(v) == 7
2319 for v in remote_os))
2321 _ErrorIf(test, self.ENODEOS, node,
2322 "node hasn't returned valid OS data")
2331 for (name, os_path, status, diagnose,
2332 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2334 if name not in os_dict:
2337 # parameters is a list of lists instead of list of tuples due to
2338 # JSON lacking a real tuple type, fix it:
2339 parameters = [tuple(v) for v in parameters]
2340 os_dict[name].append((os_path, status, diagnose,
2341 set(variants), set(parameters), set(api_ver)))
2343 nimg.oslist = os_dict
2345 def _VerifyNodeOS(self, ninfo, nimg, base):
2346 """Verifies the node OS list.
2348 @type ninfo: L{objects.Node}
2349 @param ninfo: the node to check
2350 @param nimg: the node image object
2351 @param base: the 'template' node we match against (e.g. from the master)
2355 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2357 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2359 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2360 for os_name, os_data in nimg.oslist.items():
2361 assert os_data, "Empty OS status for OS %s?!" % os_name
2362 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2363 _ErrorIf(not f_status, self.ENODEOS, node,
2364 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2365 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2366 "OS '%s' has multiple entries (first one shadows the rest): %s",
2367 os_name, utils.CommaJoin([v[0] for v in os_data]))
2368 # comparisons with the 'base' image
2369 test = os_name not in base.oslist
2370 _ErrorIf(test, self.ENODEOS, node,
2371 "Extra OS %s not present on reference node (%s)",
2375 assert base.oslist[os_name], "Base node has empty OS status?"
2376 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2378 # base OS is invalid, skipping
2380 for kind, a, b in [("API version", f_api, b_api),
2381 ("variants list", f_var, b_var),
2382 ("parameters", beautify_params(f_param),
2383 beautify_params(b_param))]:
2384 _ErrorIf(a != b, self.ENODEOS, node,
2385 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2386 kind, os_name, base.name,
2387 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2389 # check any missing OSes
2390 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2391 _ErrorIf(missing, self.ENODEOS, node,
2392 "OSes present on reference node %s but missing on this node: %s",
2393 base.name, utils.CommaJoin(missing))
2395 def _VerifyOob(self, ninfo, nresult):
2396 """Verifies out of band functionality of a node.
2398 @type ninfo: L{objects.Node}
2399 @param ninfo: the node to check
2400 @param nresult: the remote results for the node
2404 # We just have to verify the paths on master and/or master candidates
2405 # as the oob helper is invoked on the master
2406 if ((ninfo.master_candidate or ninfo.master_capable) and
2407 constants.NV_OOB_PATHS in nresult):
2408 for path_result in nresult[constants.NV_OOB_PATHS]:
2409 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2411 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2412 """Verifies and updates the node volume data.
2414 This function will update a L{NodeImage}'s internal structures
2415 with data from the remote call.
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param nimg: the node image object
2421 @param vg_name: the configured VG name
2425 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2427 nimg.lvm_fail = True
2428 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2431 elif isinstance(lvdata, basestring):
2432 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2433 utils.SafeEncode(lvdata))
2434 elif not isinstance(lvdata, dict):
2435 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2437 nimg.volumes = lvdata
2438 nimg.lvm_fail = False
2440 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2441 """Verifies and updates the node instance list.
2443 If the listing was successful, then updates this node's instance
2444 list. Otherwise, it marks the RPC call as failed for the instance
2447 @type ninfo: L{objects.Node}
2448 @param ninfo: the node to check
2449 @param nresult: the remote results for the node
2450 @param nimg: the node image object
2453 idata = nresult.get(constants.NV_INSTANCELIST, None)
2454 test = not isinstance(idata, list)
2455 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2456 " (instancelist): %s", utils.SafeEncode(str(idata)))
2458 nimg.hyp_fail = True
2460 nimg.instances = idata
2462 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2463 """Verifies and computes a node information map
2465 @type ninfo: L{objects.Node}
2466 @param ninfo: the node to check
2467 @param nresult: the remote results for the node
2468 @param nimg: the node image object
2469 @param vg_name: the configured VG name
2473 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2475 # try to read free memory (from the hypervisor)
2476 hv_info = nresult.get(constants.NV_HVINFO, None)
2477 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2478 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2481 nimg.mfree = int(hv_info["memory_free"])
2482 except (ValueError, TypeError):
2483 _ErrorIf(True, self.ENODERPC, node,
2484 "node returned invalid nodeinfo, check hypervisor")
2486 # FIXME: devise a free space model for file based instances as well
2487 if vg_name is not None:
2488 test = (constants.NV_VGLIST not in nresult or
2489 vg_name not in nresult[constants.NV_VGLIST])
2490 _ErrorIf(test, self.ENODELVM, node,
2491 "node didn't return data for the volume group '%s'"
2492 " - it is either missing or broken", vg_name)
2495 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496 except (ValueError, TypeError):
2497 _ErrorIf(True, self.ENODERPC, node,
2498 "node returned invalid LVM info, check LVM status")
2500 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501 """Gets per-disk status information for all instances.
2503 @type nodelist: list of strings
2504 @param nodelist: Node names
2505 @type node_image: dict of (name, L{objects.Node})
2506 @param node_image: Node objects
2507 @type instanceinfo: dict of (name, L{objects.Instance})
2508 @param instanceinfo: Instance objects
2509 @rtype: {instance: {node: [(succes, payload)]}}
2510 @return: a dictionary of per-instance dictionaries with nodes as
2511 keys and disk information as values; the disk information is a
2512 list of tuples (success, payload)
2515 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2518 node_disks_devonly = {}
2519 diskless_instances = set()
2520 diskless = constants.DT_DISKLESS
2522 for nname in nodelist:
2523 node_instances = list(itertools.chain(node_image[nname].pinst,
2524 node_image[nname].sinst))
2525 diskless_instances.update(inst for inst in node_instances
2526 if instanceinfo[inst].disk_template == diskless)
2527 disks = [(inst, disk)
2528 for inst in node_instances
2529 for disk in instanceinfo[inst].disks]
2532 # No need to collect data
2535 node_disks[nname] = disks
2537 # Creating copies as SetDiskID below will modify the objects and that can
2538 # lead to incorrect data returned from nodes
2539 devonly = [dev.Copy() for (_, dev) in disks]
2542 self.cfg.SetDiskID(dev, nname)
2544 node_disks_devonly[nname] = devonly
2546 assert len(node_disks) == len(node_disks_devonly)
2548 # Collect data from all nodes with disks
2549 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2552 assert len(result) == len(node_disks)
2556 for (nname, nres) in result.items():
2557 disks = node_disks[nname]
2560 # No data from this node
2561 data = len(disks) * [(False, "node offline")]
2564 _ErrorIf(msg, self.ENODERPC, nname,
2565 "while getting disk information: %s", msg)
2567 # No data from this node
2568 data = len(disks) * [(False, msg)]
2571 for idx, i in enumerate(nres.payload):
2572 if isinstance(i, (tuple, list)) and len(i) == 2:
2575 logging.warning("Invalid result from node %s, entry %d: %s",
2577 data.append((False, "Invalid result from the remote node"))
2579 for ((inst, _), status) in zip(disks, data):
2580 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2582 # Add empty entries for diskless instances.
2583 for inst in diskless_instances:
2584 assert inst not in instdisk
2587 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589 compat.all(isinstance(s, (tuple, list)) and
2590 len(s) == 2 for s in statuses)
2591 for inst, nnames in instdisk.items()
2592 for nname, statuses in nnames.items())
2593 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2598 def _SshNodeSelector(group_uuid, all_nodes):
2599 """Create endless iterators for all potential SSH check hosts.
2602 nodes = [node for node in all_nodes
2603 if (node.group != group_uuid and
2605 keyfunc = operator.attrgetter("group")
2607 return map(itertools.cycle,
2608 [sorted(map(operator.attrgetter("name"), names))
2609 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2613 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614 """Choose which nodes should talk to which other nodes.
2616 We will make nodes contact all nodes in their group, and one node from
2619 @warning: This algorithm has a known issue if one node group is much
2620 smaller than others (e.g. just one node). In such a case all other
2621 nodes will talk to the single node.
2624 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2627 return (online_nodes,
2628 dict((name, sorted([i.next() for i in sel]))
2629 for name in online_nodes))
2631 def BuildHooksEnv(self):
2634 Cluster-Verify hooks just ran in the post phase and their failure makes
2635 the output be logged in the verify output and the verification to fail.
2639 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2642 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643 for node in self.my_node_info.values())
2647 def BuildHooksNodes(self):
2648 """Build hooks nodes.
2651 return ([], self.my_node_names)
2653 def Exec(self, feedback_fn):
2654 """Verify integrity of the node group, performing various test on nodes.
2657 # This method has too many local variables. pylint: disable=R0914
2658 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2660 if not self.my_node_names:
2662 feedback_fn("* Empty node group, skipping verification")
2666 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667 verbose = self.op.verbose
2668 self._feedback_fn = feedback_fn
2670 vg_name = self.cfg.GetVGName()
2671 drbd_helper = self.cfg.GetDRBDHelper()
2672 cluster = self.cfg.GetClusterInfo()
2673 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674 hypervisors = cluster.enabled_hypervisors
2675 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2677 i_non_redundant = [] # Non redundant instances
2678 i_non_a_balanced = [] # Non auto-balanced instances
2679 n_offline = 0 # Count of offline nodes
2680 n_drained = 0 # Count of nodes being drained
2681 node_vol_should = {}
2683 # FIXME: verify OS list
2686 filemap = _ComputeAncillaryFiles(cluster, False)
2688 # do local checksums
2689 master_node = self.master_node = self.cfg.GetMasterNode()
2690 master_ip = self.cfg.GetMasterIP()
2692 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2694 node_verify_param = {
2695 constants.NV_FILELIST:
2696 utils.UniqueSequence(filename
2697 for files in filemap
2698 for filename in files),
2699 constants.NV_NODELIST:
2700 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2701 self.all_node_info.values()),
2702 constants.NV_HYPERVISOR: hypervisors,
2703 constants.NV_HVPARAMS:
2704 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2705 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2706 for node in node_data_list
2707 if not node.offline],
2708 constants.NV_INSTANCELIST: hypervisors,
2709 constants.NV_VERSION: None,
2710 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2711 constants.NV_NODESETUP: None,
2712 constants.NV_TIME: None,
2713 constants.NV_MASTERIP: (master_node, master_ip),
2714 constants.NV_OSLIST: None,
2715 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2718 if vg_name is not None:
2719 node_verify_param[constants.NV_VGLIST] = None
2720 node_verify_param[constants.NV_LVLIST] = vg_name
2721 node_verify_param[constants.NV_PVLIST] = [vg_name]
2722 node_verify_param[constants.NV_DRBDLIST] = None
2725 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2728 # FIXME: this needs to be changed per node-group, not cluster-wide
2730 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2731 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2732 bridges.add(default_nicpp[constants.NIC_LINK])
2733 for instance in self.my_inst_info.values():
2734 for nic in instance.nics:
2735 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2736 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2737 bridges.add(full_nic[constants.NIC_LINK])
2740 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2742 # Build our expected cluster state
2743 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2745 vm_capable=node.vm_capable))
2746 for node in node_data_list)
2750 for node in self.all_node_info.values():
2751 path = _SupportsOob(self.cfg, node)
2752 if path and path not in oob_paths:
2753 oob_paths.append(path)
2756 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2758 for instance in self.my_inst_names:
2759 inst_config = self.my_inst_info[instance]
2761 for nname in inst_config.all_nodes:
2762 if nname not in node_image:
2763 gnode = self.NodeImage(name=nname)
2764 gnode.ghost = (nname not in self.all_node_info)
2765 node_image[nname] = gnode
2767 inst_config.MapLVsByNode(node_vol_should)
2769 pnode = inst_config.primary_node
2770 node_image[pnode].pinst.append(instance)
2772 for snode in inst_config.secondary_nodes:
2773 nimg = node_image[snode]
2774 nimg.sinst.append(instance)
2775 if pnode not in nimg.sbp:
2776 nimg.sbp[pnode] = []
2777 nimg.sbp[pnode].append(instance)
2779 # At this point, we have the in-memory data structures complete,
2780 # except for the runtime information, which we'll gather next
2782 # Due to the way our RPC system works, exact response times cannot be
2783 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2784 # time before and after executing the request, we can at least have a time
2786 nvinfo_starttime = time.time()
2787 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2789 self.cfg.GetClusterName())
2790 nvinfo_endtime = time.time()
2792 if self.extra_lv_nodes and vg_name is not None:
2794 self.rpc.call_node_verify(self.extra_lv_nodes,
2795 {constants.NV_LVLIST: vg_name},
2796 self.cfg.GetClusterName())
2798 extra_lv_nvinfo = {}
2800 all_drbd_map = self.cfg.ComputeDRBDMap()
2802 feedback_fn("* Gathering disk information (%s nodes)" %
2803 len(self.my_node_names))
2804 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2807 feedback_fn("* Verifying configuration file consistency")
2809 # If not all nodes are being checked, we need to make sure the master node
2810 # and a non-checked vm_capable node are in the list.
2811 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2813 vf_nvinfo = all_nvinfo.copy()
2814 vf_node_info = list(self.my_node_info.values())
2815 additional_nodes = []
2816 if master_node not in self.my_node_info:
2817 additional_nodes.append(master_node)
2818 vf_node_info.append(self.all_node_info[master_node])
2819 # Add the first vm_capable node we find which is not included
2820 for node in absent_nodes:
2821 nodeinfo = self.all_node_info[node]
2822 if nodeinfo.vm_capable and not nodeinfo.offline:
2823 additional_nodes.append(node)
2824 vf_node_info.append(self.all_node_info[node])
2826 key = constants.NV_FILELIST
2827 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2828 {key: node_verify_param[key]},
2829 self.cfg.GetClusterName()))
2831 vf_nvinfo = all_nvinfo
2832 vf_node_info = self.my_node_info.values()
2834 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2836 feedback_fn("* Verifying node status")
2840 for node_i in node_data_list:
2842 nimg = node_image[node]
2846 feedback_fn("* Skipping offline node %s" % (node,))
2850 if node == master_node:
2852 elif node_i.master_candidate:
2853 ntype = "master candidate"
2854 elif node_i.drained:
2860 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2862 msg = all_nvinfo[node].fail_msg
2863 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2865 nimg.rpc_fail = True
2868 nresult = all_nvinfo[node].payload
2870 nimg.call_ok = self._VerifyNode(node_i, nresult)
2871 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2872 self._VerifyNodeNetwork(node_i, nresult)
2873 self._VerifyOob(node_i, nresult)
2876 self._VerifyNodeLVM(node_i, nresult, vg_name)
2877 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2880 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2881 self._UpdateNodeInstances(node_i, nresult, nimg)
2882 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2883 self._UpdateNodeOS(node_i, nresult, nimg)
2885 if not nimg.os_fail:
2886 if refos_img is None:
2888 self._VerifyNodeOS(node_i, nimg, refos_img)
2889 self._VerifyNodeBridges(node_i, nresult, bridges)
2891 # Check whether all running instancies are primary for the node. (This
2892 # can no longer be done from _VerifyInstance below, since some of the
2893 # wrong instances could be from other node groups.)
2894 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2896 for inst in non_primary_inst:
2897 test = inst in self.all_inst_info
2898 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2899 "instance should not run on node %s", node_i.name)
2900 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2901 "node is running unknown instance %s", inst)
2903 for node, result in extra_lv_nvinfo.items():
2904 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2905 node_image[node], vg_name)
2907 feedback_fn("* Verifying instance status")
2908 for instance in self.my_inst_names:
2910 feedback_fn("* Verifying instance %s" % instance)
2911 inst_config = self.my_inst_info[instance]
2912 self._VerifyInstance(instance, inst_config, node_image,
2914 inst_nodes_offline = []
2916 pnode = inst_config.primary_node
2917 pnode_img = node_image[pnode]
2918 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2919 self.ENODERPC, pnode, "instance %s, connection to"
2920 " primary node failed", instance)
2922 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2923 self.EINSTANCEBADNODE, instance,
2924 "instance is marked as running and lives on offline node %s",
2925 inst_config.primary_node)
2927 # If the instance is non-redundant we cannot survive losing its primary
2928 # node, so we are not N+1 compliant. On the other hand we have no disk
2929 # templates with more than one secondary so that situation is not well
2931 # FIXME: does not support file-backed instances
2932 if not inst_config.secondary_nodes:
2933 i_non_redundant.append(instance)
2935 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2936 instance, "instance has multiple secondary nodes: %s",
2937 utils.CommaJoin(inst_config.secondary_nodes),
2938 code=self.ETYPE_WARNING)
2940 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2941 pnode = inst_config.primary_node
2942 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2943 instance_groups = {}
2945 for node in instance_nodes:
2946 instance_groups.setdefault(self.all_node_info[node].group,
2950 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2951 # Sort so that we always list the primary node first.
2952 for group, nodes in sorted(instance_groups.items(),
2953 key=lambda (_, nodes): pnode in nodes,
2956 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2957 instance, "instance has primary and secondary nodes in"
2958 " different groups: %s", utils.CommaJoin(pretty_list),
2959 code=self.ETYPE_WARNING)
2961 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2962 i_non_a_balanced.append(instance)
2964 for snode in inst_config.secondary_nodes:
2965 s_img = node_image[snode]
2966 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2967 "instance %s, connection to secondary node failed", instance)
2970 inst_nodes_offline.append(snode)
2972 # warn that the instance lives on offline nodes
2973 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2974 "instance has offline secondary node(s) %s",
2975 utils.CommaJoin(inst_nodes_offline))
2976 # ... or ghost/non-vm_capable nodes
2977 for node in inst_config.all_nodes:
2978 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2979 "instance lives on ghost node %s", node)
2980 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2981 instance, "instance lives on non-vm_capable node %s", node)
2983 feedback_fn("* Verifying orphan volumes")
2984 reserved = utils.FieldSet(*cluster.reserved_lvs)
2986 # We will get spurious "unknown volume" warnings if any node of this group
2987 # is secondary for an instance whose primary is in another group. To avoid
2988 # them, we find these instances and add their volumes to node_vol_should.
2989 for inst in self.all_inst_info.values():
2990 for secondary in inst.secondary_nodes:
2991 if (secondary in self.my_node_info
2992 and inst.name not in self.my_inst_info):
2993 inst.MapLVsByNode(node_vol_should)
2996 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2998 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2999 feedback_fn("* Verifying N+1 Memory redundancy")
3000 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3002 feedback_fn("* Other Notes")
3004 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3005 % len(i_non_redundant))
3007 if i_non_a_balanced:
3008 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3009 % len(i_non_a_balanced))
3012 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3015 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3019 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3020 """Analyze the post-hooks' result
3022 This method analyses the hook result, handles it, and sends some
3023 nicely-formatted feedback back to the user.
3025 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3026 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3027 @param hooks_results: the results of the multi-node hooks rpc call
3028 @param feedback_fn: function used send feedback back to the caller
3029 @param lu_result: previous Exec result
3030 @return: the new Exec result, based on the previous result
3034 # We only really run POST phase hooks, only for non-empty groups,
3035 # and are only interested in their results
3036 if not self.my_node_names:
3039 elif phase == constants.HOOKS_PHASE_POST:
3040 # Used to change hooks' output to proper indentation
3041 feedback_fn("* Hooks Results")
3042 assert hooks_results, "invalid result from hooks"
3044 for node_name in hooks_results:
3045 res = hooks_results[node_name]
3047 test = msg and not res.offline
3048 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3049 "Communication failure in hooks execution: %s", msg)
3050 if res.offline or msg:
3051 # No need to investigate payload if node is offline or gave
3054 for script, hkr, output in res.payload:
3055 test = hkr == constants.HKR_FAIL
3056 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3057 "Script %s failed, output:", script)
3059 output = self._HOOKS_INDENT_RE.sub(" ", output)
3060 feedback_fn("%s" % output)
3066 class LUClusterVerifyDisks(NoHooksLU):
3067 """Verifies the cluster disks status.
3072 def ExpandNames(self):
3073 self.share_locks = _ShareAll()
3074 self.needed_locks = {
3075 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3078 def Exec(self, feedback_fn):
3079 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3081 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3082 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3083 for group in group_names])
3086 class LUGroupVerifyDisks(NoHooksLU):
3087 """Verifies the status of all disks in a node group.
3092 def ExpandNames(self):
3093 # Raises errors.OpPrereqError on its own if group can't be found
3094 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3096 self.share_locks = _ShareAll()
3097 self.needed_locks = {
3098 locking.LEVEL_INSTANCE: [],
3099 locking.LEVEL_NODEGROUP: [],
3100 locking.LEVEL_NODE: [],
3103 def DeclareLocks(self, level):
3104 if level == locking.LEVEL_INSTANCE:
3105 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3107 # Lock instances optimistically, needs verification once node and group
3108 # locks have been acquired
3109 self.needed_locks[locking.LEVEL_INSTANCE] = \
3110 self.cfg.GetNodeGroupInstances(self.group_uuid)
3112 elif level == locking.LEVEL_NODEGROUP:
3113 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3115 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3116 set([self.group_uuid] +
3117 # Lock all groups used by instances optimistically; this requires
3118 # going via the node before it's locked, requiring verification
3121 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3122 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3124 elif level == locking.LEVEL_NODE:
3125 # This will only lock the nodes in the group to be verified which contain
3127 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3128 self._LockInstancesNodes()
3130 # Lock all nodes in group to be verified
3131 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3132 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3133 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3135 def CheckPrereq(self):
3136 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3137 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3138 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3140 assert self.group_uuid in owned_groups
3142 # Check if locked instances are still correct
3143 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3145 # Get instance information
3146 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3148 # Check if node groups for locked instances are still correct
3149 _CheckInstancesNodeGroups(self.cfg, self.instances,
3150 owned_groups, owned_nodes, self.group_uuid)
3152 def Exec(self, feedback_fn):
3153 """Verify integrity of cluster disks.
3155 @rtype: tuple of three items
3156 @return: a tuple of (dict of node-to-node_error, list of instances
3157 which need activate-disks, dict of instance: (node, volume) for
3162 res_instances = set()
3165 nv_dict = _MapInstanceDisksToNodes([inst
3166 for inst in self.instances.values()
3170 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3171 set(self.cfg.GetVmCapableNodeList()))
3173 node_lvs = self.rpc.call_lv_list(nodes, [])
3175 for (node, node_res) in node_lvs.items():
3176 if node_res.offline:
3179 msg = node_res.fail_msg
3181 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3182 res_nodes[node] = msg
3185 for lv_name, (_, _, lv_online) in node_res.payload.items():
3186 inst = nv_dict.pop((node, lv_name), None)
3187 if not (lv_online or inst is None):
3188 res_instances.add(inst)
3190 # any leftover items in nv_dict are missing LVs, let's arrange the data
3192 for key, inst in nv_dict.iteritems():
3193 res_missing.setdefault(inst, []).append(list(key))
3195 return (res_nodes, list(res_instances), res_missing)
3198 class LUClusterRepairDiskSizes(NoHooksLU):
3199 """Verifies the cluster disks sizes.
3204 def ExpandNames(self):
3205 if self.op.instances:
3206 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3207 self.needed_locks = {
3208 locking.LEVEL_NODE: [],
3209 locking.LEVEL_INSTANCE: self.wanted_names,
3211 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3213 self.wanted_names = None
3214 self.needed_locks = {
3215 locking.LEVEL_NODE: locking.ALL_SET,
3216 locking.LEVEL_INSTANCE: locking.ALL_SET,
3218 self.share_locks = {
3219 locking.LEVEL_NODE: 1,
3220 locking.LEVEL_INSTANCE: 0,
3223 def DeclareLocks(self, level):
3224 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3225 self._LockInstancesNodes(primary_only=True)
3227 def CheckPrereq(self):
3228 """Check prerequisites.
3230 This only checks the optional instance list against the existing names.
3233 if self.wanted_names is None:
3234 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3236 self.wanted_instances = \
3237 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3239 def _EnsureChildSizes(self, disk):
3240 """Ensure children of the disk have the needed disk size.
3242 This is valid mainly for DRBD8 and fixes an issue where the
3243 children have smaller disk size.
3245 @param disk: an L{ganeti.objects.Disk} object
3248 if disk.dev_type == constants.LD_DRBD8:
3249 assert disk.children, "Empty children for DRBD8?"
3250 fchild = disk.children[0]
3251 mismatch = fchild.size < disk.size
3253 self.LogInfo("Child disk has size %d, parent %d, fixing",
3254 fchild.size, disk.size)
3255 fchild.size = disk.size
3257 # and we recurse on this child only, not on the metadev
3258 return self._EnsureChildSizes(fchild) or mismatch
3262 def Exec(self, feedback_fn):
3263 """Verify the size of cluster disks.
3266 # TODO: check child disks too
3267 # TODO: check differences in size between primary/secondary nodes
3269 for instance in self.wanted_instances:
3270 pnode = instance.primary_node
3271 if pnode not in per_node_disks:
3272 per_node_disks[pnode] = []
3273 for idx, disk in enumerate(instance.disks):
3274 per_node_disks[pnode].append((instance, idx, disk))
3277 for node, dskl in per_node_disks.items():
3278 newl = [v[2].Copy() for v in dskl]
3280 self.cfg.SetDiskID(dsk, node)
3281 result = self.rpc.call_blockdev_getsize(node, newl)
3283 self.LogWarning("Failure in blockdev_getsize call to node"
3284 " %s, ignoring", node)
3286 if len(result.payload) != len(dskl):
3287 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3288 " result.payload=%s", node, len(dskl), result.payload)
3289 self.LogWarning("Invalid result from node %s, ignoring node results",
3292 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3294 self.LogWarning("Disk %d of instance %s did not return size"
3295 " information, ignoring", idx, instance.name)
3297 if not isinstance(size, (int, long)):
3298 self.LogWarning("Disk %d of instance %s did not return valid"
3299 " size information, ignoring", idx, instance.name)
3302 if size != disk.size:
3303 self.LogInfo("Disk %d of instance %s has mismatched size,"
3304 " correcting: recorded %d, actual %d", idx,
3305 instance.name, disk.size, size)
3307 self.cfg.Update(instance, feedback_fn)
3308 changed.append((instance.name, idx, size))
3309 if self._EnsureChildSizes(disk):
3310 self.cfg.Update(instance, feedback_fn)
3311 changed.append((instance.name, idx, disk.size))
3315 class LUClusterRename(LogicalUnit):
3316 """Rename the cluster.
3319 HPATH = "cluster-rename"
3320 HTYPE = constants.HTYPE_CLUSTER
3322 def BuildHooksEnv(self):
3327 "OP_TARGET": self.cfg.GetClusterName(),
3328 "NEW_NAME": self.op.name,
3331 def BuildHooksNodes(self):
3332 """Build hooks nodes.
3335 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3337 def CheckPrereq(self):
3338 """Verify that the passed name is a valid one.
3341 hostname = netutils.GetHostname(name=self.op.name,
3342 family=self.cfg.GetPrimaryIPFamily())
3344 new_name = hostname.name
3345 self.ip = new_ip = hostname.ip
3346 old_name = self.cfg.GetClusterName()
3347 old_ip = self.cfg.GetMasterIP()
3348 if new_name == old_name and new_ip == old_ip:
3349 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3350 " cluster has changed",
3352 if new_ip != old_ip:
3353 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3354 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3355 " reachable on the network" %
3356 new_ip, errors.ECODE_NOTUNIQUE)
3358 self.op.name = new_name
3360 def Exec(self, feedback_fn):
3361 """Rename the cluster.
3364 clustername = self.op.name
3367 # shutdown the master IP
3368 master = self.cfg.GetMasterNode()
3369 result = self.rpc.call_node_deactivate_master_ip(master)
3370 result.Raise("Could not disable the master role")
3373 cluster = self.cfg.GetClusterInfo()
3374 cluster.cluster_name = clustername
3375 cluster.master_ip = ip
3376 self.cfg.Update(cluster, feedback_fn)
3378 # update the known hosts file
3379 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3380 node_list = self.cfg.GetOnlineNodeList()
3382 node_list.remove(master)
3385 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3387 result = self.rpc.call_node_activate_master_ip(master)
3388 msg = result.fail_msg
3390 self.LogWarning("Could not re-enable the master role on"
3391 " the master, please restart manually: %s", msg)
3396 class LUClusterSetParams(LogicalUnit):
3397 """Change the parameters of the cluster.
3400 HPATH = "cluster-modify"
3401 HTYPE = constants.HTYPE_CLUSTER
3404 def CheckArguments(self):
3408 if self.op.uid_pool:
3409 uidpool.CheckUidPool(self.op.uid_pool)
3411 if self.op.add_uids:
3412 uidpool.CheckUidPool(self.op.add_uids)
3414 if self.op.remove_uids:
3415 uidpool.CheckUidPool(self.op.remove_uids)
3417 def ExpandNames(self):
3418 # FIXME: in the future maybe other cluster params won't require checking on
3419 # all nodes to be modified.
3420 self.needed_locks = {
3421 locking.LEVEL_NODE: locking.ALL_SET,
3423 self.share_locks[locking.LEVEL_NODE] = 1
3425 def BuildHooksEnv(self):
3430 "OP_TARGET": self.cfg.GetClusterName(),
3431 "NEW_VG_NAME": self.op.vg_name,
3434 def BuildHooksNodes(self):
3435 """Build hooks nodes.
3438 mn = self.cfg.GetMasterNode()
3441 def CheckPrereq(self):
3442 """Check prerequisites.
3444 This checks whether the given params don't conflict and
3445 if the given volume group is valid.
3448 if self.op.vg_name is not None and not self.op.vg_name:
3449 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3450 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3451 " instances exist", errors.ECODE_INVAL)
3453 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3454 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3455 raise errors.OpPrereqError("Cannot disable drbd helper while"
3456 " drbd-based instances exist",
3459 node_list = self.owned_locks(locking.LEVEL_NODE)
3461 # if vg_name not None, checks given volume group on all nodes
3463 vglist = self.rpc.call_vg_list(node_list)
3464 for node in node_list:
3465 msg = vglist[node].fail_msg
3467 # ignoring down node
3468 self.LogWarning("Error while gathering data on node %s"
3469 " (ignoring node): %s", node, msg)
3471 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3473 constants.MIN_VG_SIZE)
3475 raise errors.OpPrereqError("Error on node '%s': %s" %
3476 (node, vgstatus), errors.ECODE_ENVIRON)
3478 if self.op.drbd_helper:
3479 # checks given drbd helper on all nodes
3480 helpers = self.rpc.call_drbd_helper(node_list)
3481 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3483 self.LogInfo("Not checking drbd helper on offline node %s", node)
3485 msg = helpers[node].fail_msg
3487 raise errors.OpPrereqError("Error checking drbd helper on node"
3488 " '%s': %s" % (node, msg),
3489 errors.ECODE_ENVIRON)
3490 node_helper = helpers[node].payload
3491 if node_helper != self.op.drbd_helper:
3492 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3493 (node, node_helper), errors.ECODE_ENVIRON)
3495 self.cluster = cluster = self.cfg.GetClusterInfo()
3496 # validate params changes
3497 if self.op.beparams:
3498 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3499 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3501 if self.op.ndparams:
3502 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3503 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3505 # TODO: we need a more general way to handle resetting
3506 # cluster-level parameters to default values
3507 if self.new_ndparams["oob_program"] == "":
3508 self.new_ndparams["oob_program"] = \
3509 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3511 if self.op.nicparams:
3512 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3513 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3514 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3517 # check all instances for consistency
3518 for instance in self.cfg.GetAllInstancesInfo().values():
3519 for nic_idx, nic in enumerate(instance.nics):
3520 params_copy = copy.deepcopy(nic.nicparams)
3521 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3523 # check parameter syntax
3525 objects.NIC.CheckParameterSyntax(params_filled)
3526 except errors.ConfigurationError, err:
3527 nic_errors.append("Instance %s, nic/%d: %s" %
3528 (instance.name, nic_idx, err))
3530 # if we're moving instances to routed, check that they have an ip
3531 target_mode = params_filled[constants.NIC_MODE]
3532 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3533 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3534 " address" % (instance.name, nic_idx))
3536 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3537 "\n".join(nic_errors))
3539 # hypervisor list/parameters
3540 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3541 if self.op.hvparams:
3542 for hv_name, hv_dict in self.op.hvparams.items():
3543 if hv_name not in self.new_hvparams:
3544 self.new_hvparams[hv_name] = hv_dict
3546 self.new_hvparams[hv_name].update(hv_dict)
3548 # os hypervisor parameters
3549 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3551 for os_name, hvs in self.op.os_hvp.items():
3552 if os_name not in self.new_os_hvp:
3553 self.new_os_hvp[os_name] = hvs
3555 for hv_name, hv_dict in hvs.items():
3556 if hv_name not in self.new_os_hvp[os_name]:
3557 self.new_os_hvp[os_name][hv_name] = hv_dict
3559 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3562 self.new_osp = objects.FillDict(cluster.osparams, {})
3563 if self.op.osparams:
3564 for os_name, osp in self.op.osparams.items():
3565 if os_name not in self.new_osp:
3566 self.new_osp[os_name] = {}
3568 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3571 if not self.new_osp[os_name]:
3572 # we removed all parameters
3573 del self.new_osp[os_name]
3575 # check the parameter validity (remote check)
3576 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3577 os_name, self.new_osp[os_name])
3579 # changes to the hypervisor list
3580 if self.op.enabled_hypervisors is not None:
3581 self.hv_list = self.op.enabled_hypervisors
3582 for hv in self.hv_list:
3583 # if the hypervisor doesn't already exist in the cluster
3584 # hvparams, we initialize it to empty, and then (in both
3585 # cases) we make sure to fill the defaults, as we might not
3586 # have a complete defaults list if the hypervisor wasn't
3588 if hv not in new_hvp:
3590 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3591 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3593 self.hv_list = cluster.enabled_hypervisors
3595 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3596 # either the enabled list has changed, or the parameters have, validate
3597 for hv_name, hv_params in self.new_hvparams.items():
3598 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3599 (self.op.enabled_hypervisors and
3600 hv_name in self.op.enabled_hypervisors)):
3601 # either this is a new hypervisor, or its parameters have changed
3602 hv_class = hypervisor.GetHypervisor(hv_name)
3603 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3604 hv_class.CheckParameterSyntax(hv_params)
3605 _CheckHVParams(self, node_list, hv_name, hv_params)
3608 # no need to check any newly-enabled hypervisors, since the
3609 # defaults have already been checked in the above code-block
3610 for os_name, os_hvp in self.new_os_hvp.items():
3611 for hv_name, hv_params in os_hvp.items():
3612 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3613 # we need to fill in the new os_hvp on top of the actual hv_p
3614 cluster_defaults = self.new_hvparams.get(hv_name, {})
3615 new_osp = objects.FillDict(cluster_defaults, hv_params)
3616 hv_class = hypervisor.GetHypervisor(hv_name)
3617 hv_class.CheckParameterSyntax(new_osp)
3618 _CheckHVParams(self, node_list, hv_name, new_osp)
3620 if self.op.default_iallocator:
3621 alloc_script = utils.FindFile(self.op.default_iallocator,
3622 constants.IALLOCATOR_SEARCH_PATH,
3624 if alloc_script is None:
3625 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3626 " specified" % self.op.default_iallocator,
3629 def Exec(self, feedback_fn):
3630 """Change the parameters of the cluster.
3633 if self.op.vg_name is not None:
3634 new_volume = self.op.vg_name
3637 if new_volume != self.cfg.GetVGName():
3638 self.cfg.SetVGName(new_volume)
3640 feedback_fn("Cluster LVM configuration already in desired"
3641 " state, not changing")
3642 if self.op.drbd_helper is not None:
3643 new_helper = self.op.drbd_helper
3646 if new_helper != self.cfg.GetDRBDHelper():
3647 self.cfg.SetDRBDHelper(new_helper)
3649 feedback_fn("Cluster DRBD helper already in desired state,"
3651 if self.op.hvparams:
3652 self.cluster.hvparams = self.new_hvparams
3654 self.cluster.os_hvp = self.new_os_hvp
3655 if self.op.enabled_hypervisors is not None:
3656 self.cluster.hvparams = self.new_hvparams
3657 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3658 if self.op.beparams:
3659 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3660 if self.op.nicparams:
3661 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3662 if self.op.osparams:
3663 self.cluster.osparams = self.new_osp
3664 if self.op.ndparams:
3665 self.cluster.ndparams = self.new_ndparams
3667 if self.op.candidate_pool_size is not None:
3668 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3669 # we need to update the pool size here, otherwise the save will fail
3670 _AdjustCandidatePool(self, [])
3672 if self.op.maintain_node_health is not None:
3673 self.cluster.maintain_node_health = self.op.maintain_node_health
3675 if self.op.prealloc_wipe_disks is not None:
3676 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3678 if self.op.add_uids is not None:
3679 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3681 if self.op.remove_uids is not None:
3682 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3684 if self.op.uid_pool is not None:
3685 self.cluster.uid_pool = self.op.uid_pool
3687 if self.op.default_iallocator is not None:
3688 self.cluster.default_iallocator = self.op.default_iallocator
3690 if self.op.reserved_lvs is not None:
3691 self.cluster.reserved_lvs = self.op.reserved_lvs
3693 def helper_os(aname, mods, desc):
3695 lst = getattr(self.cluster, aname)
3696 for key, val in mods:
3697 if key == constants.DDM_ADD:
3699 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3702 elif key == constants.DDM_REMOVE:
3706 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3708 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3710 if self.op.hidden_os:
3711 helper_os("hidden_os", self.op.hidden_os, "hidden")
3713 if self.op.blacklisted_os:
3714 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3716 if self.op.master_netdev:
3717 master = self.cfg.GetMasterNode()
3718 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3719 self.cluster.master_netdev)
3720 result = self.rpc.call_node_deactivate_master_ip(master)
3721 result.Raise("Could not disable the master ip")
3722 feedback_fn("Changing master_netdev from %s to %s" %
3723 (self.cluster.master_netdev, self.op.master_netdev))
3724 self.cluster.master_netdev = self.op.master_netdev
3726 self.cfg.Update(self.cluster, feedback_fn)
3728 if self.op.master_netdev:
3729 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3730 self.op.master_netdev)
3731 result = self.rpc.call_node_activate_master_ip(master)
3733 self.LogWarning("Could not re-enable the master ip on"
3734 " the master, please restart manually: %s",
3738 def _UploadHelper(lu, nodes, fname):
3739 """Helper for uploading a file and showing warnings.
3742 if os.path.exists(fname):
3743 result = lu.rpc.call_upload_file(nodes, fname)
3744 for to_node, to_result in result.items():
3745 msg = to_result.fail_msg
3747 msg = ("Copy of file %s to node %s failed: %s" %
3748 (fname, to_node, msg))
3749 lu.proc.LogWarning(msg)
3752 def _ComputeAncillaryFiles(cluster, redist):
3753 """Compute files external to Ganeti which need to be consistent.
3755 @type redist: boolean
3756 @param redist: Whether to include files which need to be redistributed
3759 # Compute files for all nodes
3761 constants.SSH_KNOWN_HOSTS_FILE,
3762 constants.CONFD_HMAC_KEY,
3763 constants.CLUSTER_DOMAIN_SECRET_FILE,
3764 constants.RAPI_USERS_FILE,
3768 files_all.update(constants.ALL_CERT_FILES)
3769 files_all.update(ssconf.SimpleStore().GetFileList())
3771 # we need to ship at least the RAPI certificate
3772 files_all.add(constants.RAPI_CERT_FILE)
3774 if cluster.modify_etc_hosts:
3775 files_all.add(constants.ETC_HOSTS)
3777 # Files which are optional, these must:
3778 # - be present in one other category as well
3779 # - either exist or not exist on all nodes of that category (mc, vm all)
3781 constants.RAPI_USERS_FILE,
3784 # Files which should only be on master candidates
3787 files_mc.add(constants.CLUSTER_CONF_FILE)
3789 # Files which should only be on VM-capable nodes
3790 files_vm = set(filename
3791 for hv_name in cluster.enabled_hypervisors
3792 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3794 files_opt |= set(filename
3795 for hv_name in cluster.enabled_hypervisors
3796 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3798 # Filenames in each category must be unique
3799 all_files_set = files_all | files_mc | files_vm
3800 assert (len(all_files_set) ==
3801 sum(map(len, [files_all, files_mc, files_vm]))), \
3802 "Found file listed in more than one file list"
3804 # Optional files must be present in one other category
3805 assert all_files_set.issuperset(files_opt), \
3806 "Optional file not in a different required list"
3808 return (files_all, files_opt, files_mc, files_vm)
3811 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3812 """Distribute additional files which are part of the cluster configuration.
3814 ConfigWriter takes care of distributing the config and ssconf files, but
3815 there are more files which should be distributed to all nodes. This function
3816 makes sure those are copied.
3818 @param lu: calling logical unit
3819 @param additional_nodes: list of nodes not in the config to distribute to
3820 @type additional_vm: boolean
3821 @param additional_vm: whether the additional nodes are vm-capable or not
3824 # Gather target nodes
3825 cluster = lu.cfg.GetClusterInfo()
3826 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3828 online_nodes = lu.cfg.GetOnlineNodeList()
3829 vm_nodes = lu.cfg.GetVmCapableNodeList()
3831 if additional_nodes is not None:
3832 online_nodes.extend(additional_nodes)
3834 vm_nodes.extend(additional_nodes)
3836 # Never distribute to master node
3837 for nodelist in [online_nodes, vm_nodes]:
3838 if master_info.name in nodelist:
3839 nodelist.remove(master_info.name)
3842 (files_all, _, files_mc, files_vm) = \
3843 _ComputeAncillaryFiles(cluster, True)
3845 # Never re-distribute configuration file from here
3846 assert not (constants.CLUSTER_CONF_FILE in files_all or
3847 constants.CLUSTER_CONF_FILE in files_vm)
3848 assert not files_mc, "Master candidates not handled in this function"
3851 (online_nodes, files_all),
3852 (vm_nodes, files_vm),
3856 for (node_list, files) in filemap:
3858 _UploadHelper(lu, node_list, fname)
3861 class LUClusterRedistConf(NoHooksLU):
3862 """Force the redistribution of cluster configuration.
3864 This is a very simple LU.
3869 def ExpandNames(self):
3870 self.needed_locks = {
3871 locking.LEVEL_NODE: locking.ALL_SET,
3873 self.share_locks[locking.LEVEL_NODE] = 1
3875 def Exec(self, feedback_fn):
3876 """Redistribute the configuration.
3879 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3880 _RedistributeAncillaryFiles(self)
3883 class LUClusterActivateMasterIp(NoHooksLU):
3884 """Activate the master IP on the master node.
3887 def Exec(self, feedback_fn):
3888 """Activate the master IP.
3891 master = self.cfg.GetMasterNode()
3892 result = self.rpc.call_node_activate_master_ip(master)
3893 result.Raise("Could not activate the master IP")
3896 class LUClusterDeactivateMasterIp(NoHooksLU):
3897 """Deactivate the master IP on the master node.
3900 def Exec(self, feedback_fn):
3901 """Deactivate the master IP.
3904 master = self.cfg.GetMasterNode()
3905 result = self.rpc.call_node_deactivate_master_ip(master)
3906 result.Raise("Could not deactivate the master IP")
3909 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3910 """Sleep and poll for an instance's disk to sync.
3913 if not instance.disks or disks is not None and not disks:
3916 disks = _ExpandCheckDisks(instance, disks)
3919 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3921 node = instance.primary_node
3924 lu.cfg.SetDiskID(dev, node)
3926 # TODO: Convert to utils.Retry
3929 degr_retries = 10 # in seconds, as we sleep 1 second each time
3933 cumul_degraded = False
3934 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3935 msg = rstats.fail_msg
3937 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3940 raise errors.RemoteError("Can't contact node %s for mirror data,"
3941 " aborting." % node)
3944 rstats = rstats.payload
3946 for i, mstat in enumerate(rstats):
3948 lu.LogWarning("Can't compute data for node %s/%s",
3949 node, disks[i].iv_name)
3952 cumul_degraded = (cumul_degraded or
3953 (mstat.is_degraded and mstat.sync_percent is None))
3954 if mstat.sync_percent is not None:
3956 if mstat.estimated_time is not None:
3957 rem_time = ("%s remaining (estimated)" %
3958 utils.FormatSeconds(mstat.estimated_time))
3959 max_time = mstat.estimated_time
3961 rem_time = "no time estimate"
3962 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3963 (disks[i].iv_name, mstat.sync_percent, rem_time))
3965 # if we're done but degraded, let's do a few small retries, to
3966 # make sure we see a stable and not transient situation; therefore
3967 # we force restart of the loop
3968 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3969 logging.info("Degraded disks found, %d retries left", degr_retries)
3977 time.sleep(min(60, max_time))
3980 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3981 return not cumul_degraded
3984 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3985 """Check that mirrors are not degraded.
3987 The ldisk parameter, if True, will change the test from the
3988 is_degraded attribute (which represents overall non-ok status for
3989 the device(s)) to the ldisk (representing the local storage status).
3992 lu.cfg.SetDiskID(dev, node)
3996 if on_primary or dev.AssembleOnSecondary():
3997 rstats = lu.rpc.call_blockdev_find(node, dev)
3998 msg = rstats.fail_msg
4000 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4002 elif not rstats.payload:
4003 lu.LogWarning("Can't find disk on node %s", node)
4007 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4009 result = result and not rstats.payload.is_degraded
4012 for child in dev.children:
4013 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4018 class LUOobCommand(NoHooksLU):
4019 """Logical unit for OOB handling.
4023 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4025 def ExpandNames(self):
4026 """Gather locks we need.
4029 if self.op.node_names:
4030 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4031 lock_names = self.op.node_names
4033 lock_names = locking.ALL_SET
4035 self.needed_locks = {
4036 locking.LEVEL_NODE: lock_names,
4039 def CheckPrereq(self):
4040 """Check prerequisites.
4043 - the node exists in the configuration
4046 Any errors are signaled by raising errors.OpPrereqError.
4050 self.master_node = self.cfg.GetMasterNode()
4052 assert self.op.power_delay >= 0.0
4054 if self.op.node_names:
4055 if (self.op.command in self._SKIP_MASTER and
4056 self.master_node in self.op.node_names):
4057 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4058 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4060 if master_oob_handler:
4061 additional_text = ("run '%s %s %s' if you want to operate on the"
4062 " master regardless") % (master_oob_handler,
4066 additional_text = "it does not support out-of-band operations"
4068 raise errors.OpPrereqError(("Operating on the master node %s is not"
4069 " allowed for %s; %s") %
4070 (self.master_node, self.op.command,
4071 additional_text), errors.ECODE_INVAL)
4073 self.op.node_names = self.cfg.GetNodeList()
4074 if self.op.command in self._SKIP_MASTER:
4075 self.op.node_names.remove(self.master_node)
4077 if self.op.command in self._SKIP_MASTER:
4078 assert self.master_node not in self.op.node_names
4080 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4082 raise errors.OpPrereqError("Node %s not found" % node_name,
4085 self.nodes.append(node)
4087 if (not self.op.ignore_status and
4088 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4089 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4090 " not marked offline") % node_name,
4093 def Exec(self, feedback_fn):
4094 """Execute OOB and return result if we expect any.
4097 master_node = self.master_node
4100 for idx, node in enumerate(utils.NiceSort(self.nodes,
4101 key=lambda node: node.name)):
4102 node_entry = [(constants.RS_NORMAL, node.name)]
4103 ret.append(node_entry)
4105 oob_program = _SupportsOob(self.cfg, node)
4108 node_entry.append((constants.RS_UNAVAIL, None))
4111 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4112 self.op.command, oob_program, node.name)
4113 result = self.rpc.call_run_oob(master_node, oob_program,
4114 self.op.command, node.name,
4118 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4119 node.name, result.fail_msg)
4120 node_entry.append((constants.RS_NODATA, None))
4123 self._CheckPayload(result)
4124 except errors.OpExecError, err:
4125 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4127 node_entry.append((constants.RS_NODATA, None))
4129 if self.op.command == constants.OOB_HEALTH:
4130 # For health we should log important events
4131 for item, status in result.payload:
4132 if status in [constants.OOB_STATUS_WARNING,
4133 constants.OOB_STATUS_CRITICAL]:
4134 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4135 item, node.name, status)
4137 if self.op.command == constants.OOB_POWER_ON:
4139 elif self.op.command == constants.OOB_POWER_OFF:
4140 node.powered = False
4141 elif self.op.command == constants.OOB_POWER_STATUS:
4142 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4143 if powered != node.powered:
4144 logging.warning(("Recorded power state (%s) of node '%s' does not"
4145 " match actual power state (%s)"), node.powered,
4148 # For configuration changing commands we should update the node
4149 if self.op.command in (constants.OOB_POWER_ON,
4150 constants.OOB_POWER_OFF):
4151 self.cfg.Update(node, feedback_fn)
4153 node_entry.append((constants.RS_NORMAL, result.payload))
4155 if (self.op.command == constants.OOB_POWER_ON and
4156 idx < len(self.nodes) - 1):
4157 time.sleep(self.op.power_delay)
4161 def _CheckPayload(self, result):
4162 """Checks if the payload is valid.
4164 @param result: RPC result
4165 @raises errors.OpExecError: If payload is not valid
4169 if self.op.command == constants.OOB_HEALTH:
4170 if not isinstance(result.payload, list):
4171 errs.append("command 'health' is expected to return a list but got %s" %
4172 type(result.payload))
4174 for item, status in result.payload:
4175 if status not in constants.OOB_STATUSES:
4176 errs.append("health item '%s' has invalid status '%s'" %
4179 if self.op.command == constants.OOB_POWER_STATUS:
4180 if not isinstance(result.payload, dict):
4181 errs.append("power-status is expected to return a dict but got %s" %
4182 type(result.payload))
4184 if self.op.command in [
4185 constants.OOB_POWER_ON,
4186 constants.OOB_POWER_OFF,
4187 constants.OOB_POWER_CYCLE,
4189 if result.payload is not None:
4190 errs.append("%s is expected to not return payload but got '%s'" %
4191 (self.op.command, result.payload))
4194 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4195 utils.CommaJoin(errs))
4198 class _OsQuery(_QueryBase):
4199 FIELDS = query.OS_FIELDS
4201 def ExpandNames(self, lu):
4202 # Lock all nodes in shared mode
4203 # Temporary removal of locks, should be reverted later
4204 # TODO: reintroduce locks when they are lighter-weight
4205 lu.needed_locks = {}
4206 #self.share_locks[locking.LEVEL_NODE] = 1
4207 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4209 # The following variables interact with _QueryBase._GetNames
4211 self.wanted = self.names
4213 self.wanted = locking.ALL_SET
4215 self.do_locking = self.use_locking
4217 def DeclareLocks(self, lu, level):
4221 def _DiagnoseByOS(rlist):
4222 """Remaps a per-node return list into an a per-os per-node dictionary
4224 @param rlist: a map with node names as keys and OS objects as values
4227 @return: a dictionary with osnames as keys and as value another
4228 map, with nodes as keys and tuples of (path, status, diagnose,
4229 variants, parameters, api_versions) as values, eg::
4231 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4232 (/srv/..., False, "invalid api")],
4233 "node2": [(/srv/..., True, "", [], [])]}
4238 # we build here the list of nodes that didn't fail the RPC (at RPC
4239 # level), so that nodes with a non-responding node daemon don't
4240 # make all OSes invalid
4241 good_nodes = [node_name for node_name in rlist
4242 if not rlist[node_name].fail_msg]
4243 for node_name, nr in rlist.items():
4244 if nr.fail_msg or not nr.payload:
4246 for (name, path, status, diagnose, variants,
4247 params, api_versions) in nr.payload:
4248 if name not in all_os:
4249 # build a list of nodes for this os containing empty lists
4250 # for each node in node_list
4252 for nname in good_nodes:
4253 all_os[name][nname] = []
4254 # convert params from [name, help] to (name, help)
4255 params = [tuple(v) for v in params]
4256 all_os[name][node_name].append((path, status, diagnose,
4257 variants, params, api_versions))
4260 def _GetQueryData(self, lu):
4261 """Computes the list of nodes and their attributes.
4264 # Locking is not used
4265 assert not (compat.any(lu.glm.is_owned(level)
4266 for level in locking.LEVELS
4267 if level != locking.LEVEL_CLUSTER) or
4268 self.do_locking or self.use_locking)
4270 valid_nodes = [node.name
4271 for node in lu.cfg.GetAllNodesInfo().values()
4272 if not node.offline and node.vm_capable]
4273 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4274 cluster = lu.cfg.GetClusterInfo()
4278 for (os_name, os_data) in pol.items():
4279 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4280 hidden=(os_name in cluster.hidden_os),
4281 blacklisted=(os_name in cluster.blacklisted_os))
4285 api_versions = set()
4287 for idx, osl in enumerate(os_data.values()):
4288 info.valid = bool(info.valid and osl and osl[0][1])
4292 (node_variants, node_params, node_api) = osl[0][3:6]
4295 variants.update(node_variants)
4296 parameters.update(node_params)
4297 api_versions.update(node_api)
4299 # Filter out inconsistent values
4300 variants.intersection_update(node_variants)
4301 parameters.intersection_update(node_params)
4302 api_versions.intersection_update(node_api)
4304 info.variants = list(variants)
4305 info.parameters = list(parameters)
4306 info.api_versions = list(api_versions)
4308 data[os_name] = info
4310 # Prepare data in requested order
4311 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4315 class LUOsDiagnose(NoHooksLU):
4316 """Logical unit for OS diagnose/query.
4322 def _BuildFilter(fields, names):
4323 """Builds a filter for querying OSes.
4326 name_filter = qlang.MakeSimpleFilter("name", names)
4328 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4329 # respective field is not requested
4330 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4331 for fname in ["hidden", "blacklisted"]
4332 if fname not in fields]
4333 if "valid" not in fields:
4334 status_filter.append([qlang.OP_TRUE, "valid"])
4337 status_filter.insert(0, qlang.OP_AND)
4339 status_filter = None
4341 if name_filter and status_filter:
4342 return [qlang.OP_AND, name_filter, status_filter]
4346 return status_filter
4348 def CheckArguments(self):
4349 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4350 self.op.output_fields, False)
4352 def ExpandNames(self):
4353 self.oq.ExpandNames(self)
4355 def Exec(self, feedback_fn):
4356 return self.oq.OldStyleQuery(self)
4359 class LUNodeRemove(LogicalUnit):
4360 """Logical unit for removing a node.
4363 HPATH = "node-remove"
4364 HTYPE = constants.HTYPE_NODE
4366 def BuildHooksEnv(self):
4369 This doesn't run on the target node in the pre phase as a failed
4370 node would then be impossible to remove.
4374 "OP_TARGET": self.op.node_name,
4375 "NODE_NAME": self.op.node_name,
4378 def BuildHooksNodes(self):
4379 """Build hooks nodes.
4382 all_nodes = self.cfg.GetNodeList()
4384 all_nodes.remove(self.op.node_name)
4386 logging.warning("Node '%s', which is about to be removed, was not found"
4387 " in the list of all nodes", self.op.node_name)
4388 return (all_nodes, all_nodes)
4390 def CheckPrereq(self):
4391 """Check prerequisites.
4394 - the node exists in the configuration
4395 - it does not have primary or secondary instances
4396 - it's not the master
4398 Any errors are signaled by raising errors.OpPrereqError.
4401 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4402 node = self.cfg.GetNodeInfo(self.op.node_name)
4403 assert node is not None
4405 masternode = self.cfg.GetMasterNode()
4406 if node.name == masternode:
4407 raise errors.OpPrereqError("Node is the master node, failover to another"
4408 " node is required", errors.ECODE_INVAL)
4410 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4411 if node.name in instance.all_nodes:
4412 raise errors.OpPrereqError("Instance %s is still running on the node,"
4413 " please remove first" % instance_name,
4415 self.op.node_name = node.name
4418 def Exec(self, feedback_fn):
4419 """Removes the node from the cluster.
4423 logging.info("Stopping the node daemon and removing configs from node %s",
4426 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4428 # Promote nodes to master candidate as needed
4429 _AdjustCandidatePool(self, exceptions=[node.name])
4430 self.context.RemoveNode(node.name)
4432 # Run post hooks on the node before it's removed
4433 _RunPostHook(self, node.name)
4435 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4436 msg = result.fail_msg
4438 self.LogWarning("Errors encountered on the remote node while leaving"
4439 " the cluster: %s", msg)
4441 # Remove node from our /etc/hosts
4442 if self.cfg.GetClusterInfo().modify_etc_hosts:
4443 master_node = self.cfg.GetMasterNode()
4444 result = self.rpc.call_etc_hosts_modify(master_node,
4445 constants.ETC_HOSTS_REMOVE,
4447 result.Raise("Can't update hosts file with new host data")
4448 _RedistributeAncillaryFiles(self)
4451 class _NodeQuery(_QueryBase):
4452 FIELDS = query.NODE_FIELDS
4454 def ExpandNames(self, lu):
4455 lu.needed_locks = {}
4456 lu.share_locks = _ShareAll()
4459 self.wanted = _GetWantedNodes(lu, self.names)
4461 self.wanted = locking.ALL_SET
4463 self.do_locking = (self.use_locking and
4464 query.NQ_LIVE in self.requested_data)
4467 # If any non-static field is requested we need to lock the nodes
4468 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4470 def DeclareLocks(self, lu, level):
4473 def _GetQueryData(self, lu):
4474 """Computes the list of nodes and their attributes.
4477 all_info = lu.cfg.GetAllNodesInfo()
4479 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4481 # Gather data as requested
4482 if query.NQ_LIVE in self.requested_data:
4483 # filter out non-vm_capable nodes
4484 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4486 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4487 lu.cfg.GetHypervisorType())
4488 live_data = dict((name, nresult.payload)
4489 for (name, nresult) in node_data.items()
4490 if not nresult.fail_msg and nresult.payload)
4494 if query.NQ_INST in self.requested_data:
4495 node_to_primary = dict([(name, set()) for name in nodenames])
4496 node_to_secondary = dict([(name, set()) for name in nodenames])
4498 inst_data = lu.cfg.GetAllInstancesInfo()
4500 for inst in inst_data.values():
4501 if inst.primary_node in node_to_primary:
4502 node_to_primary[inst.primary_node].add(inst.name)
4503 for secnode in inst.secondary_nodes:
4504 if secnode in node_to_secondary:
4505 node_to_secondary[secnode].add(inst.name)
4507 node_to_primary = None
4508 node_to_secondary = None
4510 if query.NQ_OOB in self.requested_data:
4511 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4512 for name, node in all_info.iteritems())
4516 if query.NQ_GROUP in self.requested_data:
4517 groups = lu.cfg.GetAllNodeGroupsInfo()
4521 return query.NodeQueryData([all_info[name] for name in nodenames],
4522 live_data, lu.cfg.GetMasterNode(),
4523 node_to_primary, node_to_secondary, groups,
4524 oob_support, lu.cfg.GetClusterInfo())
4527 class LUNodeQuery(NoHooksLU):
4528 """Logical unit for querying nodes.
4531 # pylint: disable=W0142
4534 def CheckArguments(self):
4535 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4536 self.op.output_fields, self.op.use_locking)
4538 def ExpandNames(self):
4539 self.nq.ExpandNames(self)
4541 def Exec(self, feedback_fn):
4542 return self.nq.OldStyleQuery(self)
4545 class LUNodeQueryvols(NoHooksLU):
4546 """Logical unit for getting volumes on node(s).
4550 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4551 _FIELDS_STATIC = utils.FieldSet("node")
4553 def CheckArguments(self):
4554 _CheckOutputFields(static=self._FIELDS_STATIC,
4555 dynamic=self._FIELDS_DYNAMIC,
4556 selected=self.op.output_fields)
4558 def ExpandNames(self):
4559 self.needed_locks = {}
4560 self.share_locks[locking.LEVEL_NODE] = 1
4561 if not self.op.nodes:
4562 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4564 self.needed_locks[locking.LEVEL_NODE] = \
4565 _GetWantedNodes(self, self.op.nodes)
4567 def Exec(self, feedback_fn):
4568 """Computes the list of nodes and their attributes.
4571 nodenames = self.owned_locks(locking.LEVEL_NODE)
4572 volumes = self.rpc.call_node_volumes(nodenames)
4574 ilist = self.cfg.GetAllInstancesInfo()
4575 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4578 for node in nodenames:
4579 nresult = volumes[node]
4582 msg = nresult.fail_msg
4584 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4587 node_vols = sorted(nresult.payload,
4588 key=operator.itemgetter("dev"))
4590 for vol in node_vols:
4592 for field in self.op.output_fields:
4595 elif field == "phys":
4599 elif field == "name":
4601 elif field == "size":
4602 val = int(float(vol["size"]))
4603 elif field == "instance":
4604 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4606 raise errors.ParameterError(field)
4607 node_output.append(str(val))
4609 output.append(node_output)
4614 class LUNodeQueryStorage(NoHooksLU):
4615 """Logical unit for getting information on storage units on node(s).
4618 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4621 def CheckArguments(self):
4622 _CheckOutputFields(static=self._FIELDS_STATIC,
4623 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4624 selected=self.op.output_fields)
4626 def ExpandNames(self):
4627 self.needed_locks = {}
4628 self.share_locks[locking.LEVEL_NODE] = 1
4631 self.needed_locks[locking.LEVEL_NODE] = \
4632 _GetWantedNodes(self, self.op.nodes)
4634 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4636 def Exec(self, feedback_fn):
4637 """Computes the list of nodes and their attributes.
4640 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4642 # Always get name to sort by
4643 if constants.SF_NAME in self.op.output_fields:
4644 fields = self.op.output_fields[:]
4646 fields = [constants.SF_NAME] + self.op.output_fields
4648 # Never ask for node or type as it's only known to the LU
4649 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4650 while extra in fields:
4651 fields.remove(extra)
4653 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4654 name_idx = field_idx[constants.SF_NAME]
4656 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4657 data = self.rpc.call_storage_list(self.nodes,
4658 self.op.storage_type, st_args,
4659 self.op.name, fields)
4663 for node in utils.NiceSort(self.nodes):
4664 nresult = data[node]
4668 msg = nresult.fail_msg
4670 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4673 rows = dict([(row[name_idx], row) for row in nresult.payload])
4675 for name in utils.NiceSort(rows.keys()):
4680 for field in self.op.output_fields:
4681 if field == constants.SF_NODE:
4683 elif field == constants.SF_TYPE:
4684 val = self.op.storage_type
4685 elif field in field_idx:
4686 val = row[field_idx[field]]
4688 raise errors.ParameterError(field)
4697 class _InstanceQuery(_QueryBase):
4698 FIELDS = query.INSTANCE_FIELDS
4700 def ExpandNames(self, lu):
4701 lu.needed_locks = {}
4702 lu.share_locks = _ShareAll()
4705 self.wanted = _GetWantedInstances(lu, self.names)
4707 self.wanted = locking.ALL_SET
4709 self.do_locking = (self.use_locking and
4710 query.IQ_LIVE in self.requested_data)
4712 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4713 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4714 lu.needed_locks[locking.LEVEL_NODE] = []
4715 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4717 self.do_grouplocks = (self.do_locking and
4718 query.IQ_NODES in self.requested_data)
4720 def DeclareLocks(self, lu, level):
4722 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4723 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4725 # Lock all groups used by instances optimistically; this requires going
4726 # via the node before it's locked, requiring verification later on
4727 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4729 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4730 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4731 elif level == locking.LEVEL_NODE:
4732 lu._LockInstancesNodes() # pylint: disable=W0212
4735 def _CheckGroupLocks(lu):
4736 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4737 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4739 # Check if node groups for locked instances are still correct
4740 for instance_name in owned_instances:
4741 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4743 def _GetQueryData(self, lu):
4744 """Computes the list of instances and their attributes.
4747 if self.do_grouplocks:
4748 self._CheckGroupLocks(lu)
4750 cluster = lu.cfg.GetClusterInfo()
4751 all_info = lu.cfg.GetAllInstancesInfo()
4753 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4755 instance_list = [all_info[name] for name in instance_names]
4756 nodes = frozenset(itertools.chain(*(inst.all_nodes
4757 for inst in instance_list)))
4758 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4761 wrongnode_inst = set()
4763 # Gather data as requested
4764 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4766 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4768 result = node_data[name]
4770 # offline nodes will be in both lists
4771 assert result.fail_msg
4772 offline_nodes.append(name)
4774 bad_nodes.append(name)
4775 elif result.payload:
4776 for inst in result.payload:
4777 if inst in all_info:
4778 if all_info[inst].primary_node == name:
4779 live_data.update(result.payload)
4781 wrongnode_inst.add(inst)
4783 # orphan instance; we don't list it here as we don't
4784 # handle this case yet in the output of instance listing
4785 logging.warning("Orphan instance '%s' found on node %s",
4787 # else no instance is alive
4791 if query.IQ_DISKUSAGE in self.requested_data:
4792 disk_usage = dict((inst.name,
4793 _ComputeDiskSize(inst.disk_template,
4794 [{constants.IDISK_SIZE: disk.size}
4795 for disk in inst.disks]))
4796 for inst in instance_list)
4800 if query.IQ_CONSOLE in self.requested_data:
4802 for inst in instance_list:
4803 if inst.name in live_data:
4804 # Instance is running
4805 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4807 consinfo[inst.name] = None
4808 assert set(consinfo.keys()) == set(instance_names)
4812 if query.IQ_NODES in self.requested_data:
4813 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4815 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4816 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4817 for uuid in set(map(operator.attrgetter("group"),
4823 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4824 disk_usage, offline_nodes, bad_nodes,
4825 live_data, wrongnode_inst, consinfo,
4829 class LUQuery(NoHooksLU):
4830 """Query for resources/items of a certain kind.
4833 # pylint: disable=W0142
4836 def CheckArguments(self):
4837 qcls = _GetQueryImplementation(self.op.what)
4839 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4841 def ExpandNames(self):
4842 self.impl.ExpandNames(self)
4844 def DeclareLocks(self, level):
4845 self.impl.DeclareLocks(self, level)
4847 def Exec(self, feedback_fn):
4848 return self.impl.NewStyleQuery(self)
4851 class LUQueryFields(NoHooksLU):
4852 """Query for resources/items of a certain kind.
4855 # pylint: disable=W0142
4858 def CheckArguments(self):
4859 self.qcls = _GetQueryImplementation(self.op.what)
4861 def ExpandNames(self):
4862 self.needed_locks = {}
4864 def Exec(self, feedback_fn):
4865 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4868 class LUNodeModifyStorage(NoHooksLU):
4869 """Logical unit for modifying a storage volume on a node.
4874 def CheckArguments(self):
4875 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4877 storage_type = self.op.storage_type
4880 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4882 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4883 " modified" % storage_type,
4886 diff = set(self.op.changes.keys()) - modifiable
4888 raise errors.OpPrereqError("The following fields can not be modified for"
4889 " storage units of type '%s': %r" %
4890 (storage_type, list(diff)),
4893 def ExpandNames(self):
4894 self.needed_locks = {
4895 locking.LEVEL_NODE: self.op.node_name,
4898 def Exec(self, feedback_fn):
4899 """Computes the list of nodes and their attributes.
4902 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4903 result = self.rpc.call_storage_modify(self.op.node_name,
4904 self.op.storage_type, st_args,
4905 self.op.name, self.op.changes)
4906 result.Raise("Failed to modify storage unit '%s' on %s" %
4907 (self.op.name, self.op.node_name))
4910 class LUNodeAdd(LogicalUnit):
4911 """Logical unit for adding node to the cluster.
4915 HTYPE = constants.HTYPE_NODE
4916 _NFLAGS = ["master_capable", "vm_capable"]
4918 def CheckArguments(self):
4919 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4920 # validate/normalize the node name
4921 self.hostname = netutils.GetHostname(name=self.op.node_name,
4922 family=self.primary_ip_family)
4923 self.op.node_name = self.hostname.name
4925 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4926 raise errors.OpPrereqError("Cannot readd the master node",
4929 if self.op.readd and self.op.group:
4930 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4931 " being readded", errors.ECODE_INVAL)
4933 def BuildHooksEnv(self):
4936 This will run on all nodes before, and on all nodes + the new node after.
4940 "OP_TARGET": self.op.node_name,
4941 "NODE_NAME": self.op.node_name,
4942 "NODE_PIP": self.op.primary_ip,
4943 "NODE_SIP": self.op.secondary_ip,
4944 "MASTER_CAPABLE": str(self.op.master_capable),
4945 "VM_CAPABLE": str(self.op.vm_capable),
4948 def BuildHooksNodes(self):
4949 """Build hooks nodes.
4952 # Exclude added node
4953 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4954 post_nodes = pre_nodes + [self.op.node_name, ]
4956 return (pre_nodes, post_nodes)
4958 def CheckPrereq(self):
4959 """Check prerequisites.
4962 - the new node is not already in the config
4964 - its parameters (single/dual homed) matches the cluster
4966 Any errors are signaled by raising errors.OpPrereqError.
4970 hostname = self.hostname
4971 node = hostname.name
4972 primary_ip = self.op.primary_ip = hostname.ip
4973 if self.op.secondary_ip is None:
4974 if self.primary_ip_family == netutils.IP6Address.family:
4975 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4976 " IPv4 address must be given as secondary",
4978 self.op.secondary_ip = primary_ip
4980 secondary_ip = self.op.secondary_ip
4981 if not netutils.IP4Address.IsValid(secondary_ip):
4982 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4983 " address" % secondary_ip, errors.ECODE_INVAL)
4985 node_list = cfg.GetNodeList()
4986 if not self.op.readd and node in node_list:
4987 raise errors.OpPrereqError("Node %s is already in the configuration" %
4988 node, errors.ECODE_EXISTS)
4989 elif self.op.readd and node not in node_list:
4990 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4993 self.changed_primary_ip = False
4995 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4996 if self.op.readd and node == existing_node_name:
4997 if existing_node.secondary_ip != secondary_ip:
4998 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4999 " address configuration as before",
5001 if existing_node.primary_ip != primary_ip:
5002 self.changed_primary_ip = True
5006 if (existing_node.primary_ip == primary_ip or
5007 existing_node.secondary_ip == primary_ip or
5008 existing_node.primary_ip == secondary_ip or
5009 existing_node.secondary_ip == secondary_ip):
5010 raise errors.OpPrereqError("New node ip address(es) conflict with"
5011 " existing node %s" % existing_node.name,
5012 errors.ECODE_NOTUNIQUE)
5014 # After this 'if' block, None is no longer a valid value for the
5015 # _capable op attributes
5017 old_node = self.cfg.GetNodeInfo(node)
5018 assert old_node is not None, "Can't retrieve locked node %s" % node
5019 for attr in self._NFLAGS:
5020 if getattr(self.op, attr) is None:
5021 setattr(self.op, attr, getattr(old_node, attr))
5023 for attr in self._NFLAGS:
5024 if getattr(self.op, attr) is None:
5025 setattr(self.op, attr, True)
5027 if self.op.readd and not self.op.vm_capable:
5028 pri, sec = cfg.GetNodeInstances(node)
5030 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5031 " flag set to false, but it already holds"
5032 " instances" % node,
5035 # check that the type of the node (single versus dual homed) is the
5036 # same as for the master
5037 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5038 master_singlehomed = myself.secondary_ip == myself.primary_ip
5039 newbie_singlehomed = secondary_ip == primary_ip
5040 if master_singlehomed != newbie_singlehomed:
5041 if master_singlehomed:
5042 raise errors.OpPrereqError("The master has no secondary ip but the"
5043 " new node has one",
5046 raise errors.OpPrereqError("The master has a secondary ip but the"
5047 " new node doesn't have one",
5050 # checks reachability
5051 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5052 raise errors.OpPrereqError("Node not reachable by ping",
5053 errors.ECODE_ENVIRON)
5055 if not newbie_singlehomed:
5056 # check reachability from my secondary ip to newbie's secondary ip
5057 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5058 source=myself.secondary_ip):
5059 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5060 " based ping to node daemon port",
5061 errors.ECODE_ENVIRON)
5068 if self.op.master_capable:
5069 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5071 self.master_candidate = False
5074 self.new_node = old_node
5076 node_group = cfg.LookupNodeGroup(self.op.group)
5077 self.new_node = objects.Node(name=node,
5078 primary_ip=primary_ip,
5079 secondary_ip=secondary_ip,
5080 master_candidate=self.master_candidate,
5081 offline=False, drained=False,
5084 if self.op.ndparams:
5085 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5087 # check connectivity
5088 result = self.rpc.call_version([self.new_node.name])[self.new_node.name]
5089 result.Raise("Can't get version information from node %s" % node)
5090 if constants.PROTOCOL_VERSION == result.payload:
5091 logging.info("Communication to node %s fine, sw version %s match",
5092 node, result.payload)
5094 raise errors.OpPrereqError("Version mismatch master version %s,"
5095 " node version %s" %
5096 (constants.PROTOCOL_VERSION, result.payload),
5097 errors.ECODE_ENVIRON)
5099 def Exec(self, feedback_fn):
5100 """Adds the new node to the cluster.
5103 new_node = self.new_node
5104 node = new_node.name
5106 # We adding a new node so we assume it's powered
5107 new_node.powered = True
5109 # for re-adds, reset the offline/drained/master-candidate flags;
5110 # we need to reset here, otherwise offline would prevent RPC calls
5111 # later in the procedure; this also means that if the re-add
5112 # fails, we are left with a non-offlined, broken node
5114 new_node.drained = new_node.offline = False # pylint: disable=W0201
5115 self.LogInfo("Readding a node, the offline/drained flags were reset")
5116 # if we demote the node, we do cleanup later in the procedure
5117 new_node.master_candidate = self.master_candidate
5118 if self.changed_primary_ip:
5119 new_node.primary_ip = self.op.primary_ip
5121 # copy the master/vm_capable flags
5122 for attr in self._NFLAGS:
5123 setattr(new_node, attr, getattr(self.op, attr))
5125 # notify the user about any possible mc promotion
5126 if new_node.master_candidate:
5127 self.LogInfo("Node will be a master candidate")
5129 if self.op.ndparams:
5130 new_node.ndparams = self.op.ndparams
5132 new_node.ndparams = {}
5134 # Add node to our /etc/hosts, and add key to known_hosts
5135 if self.cfg.GetClusterInfo().modify_etc_hosts:
5136 master_node = self.cfg.GetMasterNode()
5137 result = self.rpc.call_etc_hosts_modify(master_node,
5138 constants.ETC_HOSTS_ADD,
5141 result.Raise("Can't update hosts file with new host data")
5143 if new_node.secondary_ip != new_node.primary_ip:
5144 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5147 node_verify_list = [self.cfg.GetMasterNode()]
5148 node_verify_param = {
5149 constants.NV_NODELIST: ([node], {}),
5150 # TODO: do a node-net-test as well?
5153 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5154 self.cfg.GetClusterName())
5155 for verifier in node_verify_list:
5156 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5157 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5159 for failed in nl_payload:
5160 feedback_fn("ssh/hostname verification failed"
5161 " (checking from %s): %s" %
5162 (verifier, nl_payload[failed]))
5163 raise errors.OpExecError("ssh/hostname verification failed")
5166 _RedistributeAncillaryFiles(self)
5167 self.context.ReaddNode(new_node)
5168 # make sure we redistribute the config
5169 self.cfg.Update(new_node, feedback_fn)
5170 # and make sure the new node will not have old files around
5171 if not new_node.master_candidate:
5172 result = self.rpc.call_node_demote_from_mc(new_node.name)
5173 msg = result.fail_msg
5175 self.LogWarning("Node failed to demote itself from master"
5176 " candidate status: %s" % msg)
5178 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5179 additional_vm=self.op.vm_capable)
5180 self.context.AddNode(new_node, self.proc.GetECId())
5183 class LUNodeSetParams(LogicalUnit):
5184 """Modifies the parameters of a node.
5186 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5187 to the node role (as _ROLE_*)
5188 @cvar _R2F: a dictionary from node role to tuples of flags
5189 @cvar _FLAGS: a list of attribute names corresponding to the flags
5192 HPATH = "node-modify"
5193 HTYPE = constants.HTYPE_NODE
5195 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5197 (True, False, False): _ROLE_CANDIDATE,
5198 (False, True, False): _ROLE_DRAINED,
5199 (False, False, True): _ROLE_OFFLINE,
5200 (False, False, False): _ROLE_REGULAR,
5202 _R2F = dict((v, k) for k, v in _F2R.items())
5203 _FLAGS = ["master_candidate", "drained", "offline"]
5205 def CheckArguments(self):
5206 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5207 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5208 self.op.master_capable, self.op.vm_capable,
5209 self.op.secondary_ip, self.op.ndparams]
5210 if all_mods.count(None) == len(all_mods):
5211 raise errors.OpPrereqError("Please pass at least one modification",
5213 if all_mods.count(True) > 1:
5214 raise errors.OpPrereqError("Can't set the node into more than one"
5215 " state at the same time",
5218 # Boolean value that tells us whether we might be demoting from MC
5219 self.might_demote = (self.op.master_candidate == False or
5220 self.op.offline == True or
5221 self.op.drained == True or
5222 self.op.master_capable == False)
5224 if self.op.secondary_ip:
5225 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5226 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5227 " address" % self.op.secondary_ip,
5230 self.lock_all = self.op.auto_promote and self.might_demote
5231 self.lock_instances = self.op.secondary_ip is not None
5233 def ExpandNames(self):
5235 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5237 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5239 if self.lock_instances:
5240 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5242 def DeclareLocks(self, level):
5243 # If we have locked all instances, before waiting to lock nodes, release
5244 # all the ones living on nodes unrelated to the current operation.
5245 if level == locking.LEVEL_NODE and self.lock_instances:
5246 self.affected_instances = []
5247 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5250 # Build list of instances to release
5251 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5252 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5253 if (instance.disk_template in constants.DTS_INT_MIRROR and
5254 self.op.node_name in instance.all_nodes):
5255 instances_keep.append(instance_name)
5256 self.affected_instances.append(instance)
5258 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5260 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5261 set(instances_keep))
5263 def BuildHooksEnv(self):
5266 This runs on the master node.
5270 "OP_TARGET": self.op.node_name,
5271 "MASTER_CANDIDATE": str(self.op.master_candidate),
5272 "OFFLINE": str(self.op.offline),
5273 "DRAINED": str(self.op.drained),
5274 "MASTER_CAPABLE": str(self.op.master_capable),
5275 "VM_CAPABLE": str(self.op.vm_capable),
5278 def BuildHooksNodes(self):
5279 """Build hooks nodes.
5282 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5285 def CheckPrereq(self):
5286 """Check prerequisites.
5288 This only checks the instance list against the existing names.
5291 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5293 if (self.op.master_candidate is not None or
5294 self.op.drained is not None or
5295 self.op.offline is not None):
5296 # we can't change the master's node flags
5297 if self.op.node_name == self.cfg.GetMasterNode():
5298 raise errors.OpPrereqError("The master role can be changed"
5299 " only via master-failover",
5302 if self.op.master_candidate and not node.master_capable:
5303 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5304 " it a master candidate" % node.name,
5307 if self.op.vm_capable == False:
5308 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5310 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5311 " the vm_capable flag" % node.name,
5314 if node.master_candidate and self.might_demote and not self.lock_all:
5315 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5316 # check if after removing the current node, we're missing master
5318 (mc_remaining, mc_should, _) = \
5319 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5320 if mc_remaining < mc_should:
5321 raise errors.OpPrereqError("Not enough master candidates, please"
5322 " pass auto promote option to allow"
5323 " promotion", errors.ECODE_STATE)
5325 self.old_flags = old_flags = (node.master_candidate,
5326 node.drained, node.offline)
5327 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5328 self.old_role = old_role = self._F2R[old_flags]
5330 # Check for ineffective changes
5331 for attr in self._FLAGS:
5332 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5333 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5334 setattr(self.op, attr, None)
5336 # Past this point, any flag change to False means a transition
5337 # away from the respective state, as only real changes are kept
5339 # TODO: We might query the real power state if it supports OOB
5340 if _SupportsOob(self.cfg, node):
5341 if self.op.offline is False and not (node.powered or
5342 self.op.powered == True):
5343 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5344 " offline status can be reset") %
5346 elif self.op.powered is not None:
5347 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5348 " as it does not support out-of-band"
5349 " handling") % self.op.node_name)
5351 # If we're being deofflined/drained, we'll MC ourself if needed
5352 if (self.op.drained == False or self.op.offline == False or
5353 (self.op.master_capable and not node.master_capable)):
5354 if _DecideSelfPromotion(self):
5355 self.op.master_candidate = True
5356 self.LogInfo("Auto-promoting node to master candidate")
5358 # If we're no longer master capable, we'll demote ourselves from MC
5359 if self.op.master_capable == False and node.master_candidate:
5360 self.LogInfo("Demoting from master candidate")
5361 self.op.master_candidate = False
5364 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5365 if self.op.master_candidate:
5366 new_role = self._ROLE_CANDIDATE
5367 elif self.op.drained:
5368 new_role = self._ROLE_DRAINED
5369 elif self.op.offline:
5370 new_role = self._ROLE_OFFLINE
5371 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5372 # False is still in new flags, which means we're un-setting (the
5374 new_role = self._ROLE_REGULAR
5375 else: # no new flags, nothing, keep old role
5378 self.new_role = new_role
5380 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5381 # Trying to transition out of offline status
5382 result = self.rpc.call_version([node.name])[node.name]
5384 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5385 " to report its version: %s" %
5386 (node.name, result.fail_msg),
5389 self.LogWarning("Transitioning node from offline to online state"
5390 " without using re-add. Please make sure the node"
5393 if self.op.secondary_ip:
5394 # Ok even without locking, because this can't be changed by any LU
5395 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5396 master_singlehomed = master.secondary_ip == master.primary_ip
5397 if master_singlehomed and self.op.secondary_ip:
5398 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5399 " homed cluster", errors.ECODE_INVAL)
5402 if self.affected_instances:
5403 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5404 " node has instances (%s) configured"
5405 " to use it" % self.affected_instances)
5407 # On online nodes, check that no instances are running, and that
5408 # the node has the new ip and we can reach it.
5409 for instance in self.affected_instances:
5410 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5412 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5413 if master.name != node.name:
5414 # check reachability from master secondary ip to new secondary ip
5415 if not netutils.TcpPing(self.op.secondary_ip,
5416 constants.DEFAULT_NODED_PORT,
5417 source=master.secondary_ip):
5418 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5419 " based ping to node daemon port",
5420 errors.ECODE_ENVIRON)
5422 if self.op.ndparams:
5423 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5424 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5425 self.new_ndparams = new_ndparams
5427 def Exec(self, feedback_fn):
5432 old_role = self.old_role
5433 new_role = self.new_role
5437 if self.op.ndparams:
5438 node.ndparams = self.new_ndparams
5440 if self.op.powered is not None:
5441 node.powered = self.op.powered
5443 for attr in ["master_capable", "vm_capable"]:
5444 val = getattr(self.op, attr)
5446 setattr(node, attr, val)
5447 result.append((attr, str(val)))
5449 if new_role != old_role:
5450 # Tell the node to demote itself, if no longer MC and not offline
5451 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5452 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5454 self.LogWarning("Node failed to demote itself: %s", msg)
5456 new_flags = self._R2F[new_role]
5457 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5459 result.append((desc, str(nf)))
5460 (node.master_candidate, node.drained, node.offline) = new_flags
5462 # we locked all nodes, we adjust the CP before updating this node
5464 _AdjustCandidatePool(self, [node.name])
5466 if self.op.secondary_ip:
5467 node.secondary_ip = self.op.secondary_ip
5468 result.append(("secondary_ip", self.op.secondary_ip))
5470 # this will trigger configuration file update, if needed
5471 self.cfg.Update(node, feedback_fn)
5473 # this will trigger job queue propagation or cleanup if the mc
5475 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5476 self.context.ReaddNode(node)
5481 class LUNodePowercycle(NoHooksLU):
5482 """Powercycles a node.
5487 def CheckArguments(self):
5488 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5489 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5490 raise errors.OpPrereqError("The node is the master and the force"
5491 " parameter was not set",
5494 def ExpandNames(self):
5495 """Locking for PowercycleNode.
5497 This is a last-resort option and shouldn't block on other
5498 jobs. Therefore, we grab no locks.
5501 self.needed_locks = {}
5503 def Exec(self, feedback_fn):
5507 result = self.rpc.call_node_powercycle(self.op.node_name,
5508 self.cfg.GetHypervisorType())
5509 result.Raise("Failed to schedule the reboot")
5510 return result.payload
5513 class LUClusterQuery(NoHooksLU):
5514 """Query cluster configuration.
5519 def ExpandNames(self):
5520 self.needed_locks = {}
5522 def Exec(self, feedback_fn):
5523 """Return cluster config.
5526 cluster = self.cfg.GetClusterInfo()
5529 # Filter just for enabled hypervisors
5530 for os_name, hv_dict in cluster.os_hvp.items():
5531 os_hvp[os_name] = {}
5532 for hv_name, hv_params in hv_dict.items():
5533 if hv_name in cluster.enabled_hypervisors:
5534 os_hvp[os_name][hv_name] = hv_params
5536 # Convert ip_family to ip_version
5537 primary_ip_version = constants.IP4_VERSION
5538 if cluster.primary_ip_family == netutils.IP6Address.family:
5539 primary_ip_version = constants.IP6_VERSION
5542 "software_version": constants.RELEASE_VERSION,
5543 "protocol_version": constants.PROTOCOL_VERSION,
5544 "config_version": constants.CONFIG_VERSION,
5545 "os_api_version": max(constants.OS_API_VERSIONS),
5546 "export_version": constants.EXPORT_VERSION,
5547 "architecture": runtime.GetArchInfo(),
5548 "name": cluster.cluster_name,
5549 "master": cluster.master_node,
5550 "default_hypervisor": cluster.enabled_hypervisors[0],
5551 "enabled_hypervisors": cluster.enabled_hypervisors,
5552 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5553 for hypervisor_name in cluster.enabled_hypervisors]),
5555 "beparams": cluster.beparams,
5556 "osparams": cluster.osparams,
5557 "nicparams": cluster.nicparams,
5558 "ndparams": cluster.ndparams,
5559 "candidate_pool_size": cluster.candidate_pool_size,
5560 "master_netdev": cluster.master_netdev,
5561 "volume_group_name": cluster.volume_group_name,
5562 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5563 "file_storage_dir": cluster.file_storage_dir,
5564 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5565 "maintain_node_health": cluster.maintain_node_health,
5566 "ctime": cluster.ctime,
5567 "mtime": cluster.mtime,
5568 "uuid": cluster.uuid,
5569 "tags": list(cluster.GetTags()),
5570 "uid_pool": cluster.uid_pool,
5571 "default_iallocator": cluster.default_iallocator,
5572 "reserved_lvs": cluster.reserved_lvs,
5573 "primary_ip_version": primary_ip_version,
5574 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5575 "hidden_os": cluster.hidden_os,
5576 "blacklisted_os": cluster.blacklisted_os,
5582 class LUClusterConfigQuery(NoHooksLU):
5583 """Return configuration values.
5587 _FIELDS_DYNAMIC = utils.FieldSet()
5588 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5589 "watcher_pause", "volume_group_name")
5591 def CheckArguments(self):
5592 _CheckOutputFields(static=self._FIELDS_STATIC,
5593 dynamic=self._FIELDS_DYNAMIC,
5594 selected=self.op.output_fields)
5596 def ExpandNames(self):
5597 self.needed_locks = {}
5599 def Exec(self, feedback_fn):
5600 """Dump a representation of the cluster config to the standard output.
5604 for field in self.op.output_fields:
5605 if field == "cluster_name":
5606 entry = self.cfg.GetClusterName()
5607 elif field == "master_node":
5608 entry = self.cfg.GetMasterNode()
5609 elif field == "drain_flag":
5610 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5611 elif field == "watcher_pause":
5612 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5613 elif field == "volume_group_name":
5614 entry = self.cfg.GetVGName()
5616 raise errors.ParameterError(field)
5617 values.append(entry)
5621 class LUInstanceActivateDisks(NoHooksLU):
5622 """Bring up an instance's disks.
5627 def ExpandNames(self):
5628 self._ExpandAndLockInstance()
5629 self.needed_locks[locking.LEVEL_NODE] = []
5630 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5632 def DeclareLocks(self, level):
5633 if level == locking.LEVEL_NODE:
5634 self._LockInstancesNodes()
5636 def CheckPrereq(self):
5637 """Check prerequisites.
5639 This checks that the instance is in the cluster.
5642 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643 assert self.instance is not None, \
5644 "Cannot retrieve locked instance %s" % self.op.instance_name
5645 _CheckNodeOnline(self, self.instance.primary_node)
5647 def Exec(self, feedback_fn):
5648 """Activate the disks.
5651 disks_ok, disks_info = \
5652 _AssembleInstanceDisks(self, self.instance,
5653 ignore_size=self.op.ignore_size)
5655 raise errors.OpExecError("Cannot activate block devices")
5660 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5662 """Prepare the block devices for an instance.
5664 This sets up the block devices on all nodes.
5666 @type lu: L{LogicalUnit}
5667 @param lu: the logical unit on whose behalf we execute
5668 @type instance: L{objects.Instance}
5669 @param instance: the instance for whose disks we assemble
5670 @type disks: list of L{objects.Disk} or None
5671 @param disks: which disks to assemble (or all, if None)
5672 @type ignore_secondaries: boolean
5673 @param ignore_secondaries: if true, errors on secondary nodes
5674 won't result in an error return from the function
5675 @type ignore_size: boolean
5676 @param ignore_size: if true, the current known size of the disk
5677 will not be used during the disk activation, useful for cases
5678 when the size is wrong
5679 @return: False if the operation failed, otherwise a list of
5680 (host, instance_visible_name, node_visible_name)
5681 with the mapping from node devices to instance devices
5686 iname = instance.name
5687 disks = _ExpandCheckDisks(instance, disks)
5689 # With the two passes mechanism we try to reduce the window of
5690 # opportunity for the race condition of switching DRBD to primary
5691 # before handshaking occured, but we do not eliminate it
5693 # The proper fix would be to wait (with some limits) until the
5694 # connection has been made and drbd transitions from WFConnection
5695 # into any other network-connected state (Connected, SyncTarget,
5698 # 1st pass, assemble on all nodes in secondary mode
5699 for idx, inst_disk in enumerate(disks):
5700 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5702 node_disk = node_disk.Copy()
5703 node_disk.UnsetSize()
5704 lu.cfg.SetDiskID(node_disk, node)
5705 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5706 msg = result.fail_msg
5708 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5709 " (is_primary=False, pass=1): %s",
5710 inst_disk.iv_name, node, msg)
5711 if not ignore_secondaries:
5714 # FIXME: race condition on drbd migration to primary
5716 # 2nd pass, do only the primary node
5717 for idx, inst_disk in enumerate(disks):
5720 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5721 if node != instance.primary_node:
5724 node_disk = node_disk.Copy()
5725 node_disk.UnsetSize()
5726 lu.cfg.SetDiskID(node_disk, node)
5727 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5728 msg = result.fail_msg
5730 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5731 " (is_primary=True, pass=2): %s",
5732 inst_disk.iv_name, node, msg)
5735 dev_path = result.payload
5737 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5739 # leave the disks configured for the primary node
5740 # this is a workaround that would be fixed better by
5741 # improving the logical/physical id handling
5743 lu.cfg.SetDiskID(disk, instance.primary_node)
5745 return disks_ok, device_info
5748 def _StartInstanceDisks(lu, instance, force):
5749 """Start the disks of an instance.
5752 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5753 ignore_secondaries=force)
5755 _ShutdownInstanceDisks(lu, instance)
5756 if force is not None and not force:
5757 lu.proc.LogWarning("", hint="If the message above refers to a"
5759 " you can retry the operation using '--force'.")
5760 raise errors.OpExecError("Disk consistency error")
5763 class LUInstanceDeactivateDisks(NoHooksLU):
5764 """Shutdown an instance's disks.
5769 def ExpandNames(self):
5770 self._ExpandAndLockInstance()
5771 self.needed_locks[locking.LEVEL_NODE] = []
5772 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5774 def DeclareLocks(self, level):
5775 if level == locking.LEVEL_NODE:
5776 self._LockInstancesNodes()
5778 def CheckPrereq(self):
5779 """Check prerequisites.
5781 This checks that the instance is in the cluster.
5784 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5785 assert self.instance is not None, \
5786 "Cannot retrieve locked instance %s" % self.op.instance_name
5788 def Exec(self, feedback_fn):
5789 """Deactivate the disks
5792 instance = self.instance
5794 _ShutdownInstanceDisks(self, instance)
5796 _SafeShutdownInstanceDisks(self, instance)
5799 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5800 """Shutdown block devices of an instance.
5802 This function checks if an instance is running, before calling
5803 _ShutdownInstanceDisks.
5806 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5807 _ShutdownInstanceDisks(lu, instance, disks=disks)
5810 def _ExpandCheckDisks(instance, disks):
5811 """Return the instance disks selected by the disks list
5813 @type disks: list of L{objects.Disk} or None
5814 @param disks: selected disks
5815 @rtype: list of L{objects.Disk}
5816 @return: selected instance disks to act on
5820 return instance.disks
5822 if not set(disks).issubset(instance.disks):
5823 raise errors.ProgrammerError("Can only act on disks belonging to the"
5828 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5829 """Shutdown block devices of an instance.
5831 This does the shutdown on all nodes of the instance.
5833 If the ignore_primary is false, errors on the primary node are
5838 disks = _ExpandCheckDisks(instance, disks)
5841 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5842 lu.cfg.SetDiskID(top_disk, node)
5843 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5844 msg = result.fail_msg
5846 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5847 disk.iv_name, node, msg)
5848 if ((node == instance.primary_node and not ignore_primary) or
5849 (node != instance.primary_node and not result.offline)):
5854 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5855 """Checks if a node has enough free memory.
5857 This function check if a given node has the needed amount of free
5858 memory. In case the node has less memory or we cannot get the
5859 information from the node, this function raise an OpPrereqError
5862 @type lu: C{LogicalUnit}
5863 @param lu: a logical unit from which we get configuration data
5865 @param node: the node to check
5866 @type reason: C{str}
5867 @param reason: string to use in the error message
5868 @type requested: C{int}
5869 @param requested: the amount of memory in MiB to check for
5870 @type hypervisor_name: C{str}
5871 @param hypervisor_name: the hypervisor to ask for memory stats
5872 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5873 we cannot check the node
5876 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5877 nodeinfo[node].Raise("Can't get data from node %s" % node,
5878 prereq=True, ecode=errors.ECODE_ENVIRON)
5879 free_mem = nodeinfo[node].payload.get("memory_free", None)
5880 if not isinstance(free_mem, int):
5881 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5882 " was '%s'" % (node, free_mem),
5883 errors.ECODE_ENVIRON)
5884 if requested > free_mem:
5885 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5886 " needed %s MiB, available %s MiB" %
5887 (node, reason, requested, free_mem),
5891 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5892 """Checks if nodes have enough free disk space in the all VGs.
5894 This function check if all given nodes have the needed amount of
5895 free disk. In case any node has less disk or we cannot get the
5896 information from the node, this function raise an OpPrereqError
5899 @type lu: C{LogicalUnit}
5900 @param lu: a logical unit from which we get configuration data
5901 @type nodenames: C{list}
5902 @param nodenames: the list of node names to check
5903 @type req_sizes: C{dict}
5904 @param req_sizes: the hash of vg and corresponding amount of disk in
5906 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5907 or we cannot check the node
5910 for vg, req_size in req_sizes.items():
5911 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5914 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5915 """Checks if nodes have enough free disk space in the specified VG.
5917 This function check if all given nodes have the needed amount of
5918 free disk. In case any node has less disk or we cannot get the
5919 information from the node, this function raise an OpPrereqError
5922 @type lu: C{LogicalUnit}
5923 @param lu: a logical unit from which we get configuration data
5924 @type nodenames: C{list}
5925 @param nodenames: the list of node names to check
5927 @param vg: the volume group to check
5928 @type requested: C{int}
5929 @param requested: the amount of disk in MiB to check for
5930 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5931 or we cannot check the node
5934 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5935 for node in nodenames:
5936 info = nodeinfo[node]
5937 info.Raise("Cannot get current information from node %s" % node,
5938 prereq=True, ecode=errors.ECODE_ENVIRON)
5939 vg_free = info.payload.get("vg_free", None)
5940 if not isinstance(vg_free, int):
5941 raise errors.OpPrereqError("Can't compute free disk space on node"
5942 " %s for vg %s, result was '%s'" %
5943 (node, vg, vg_free), errors.ECODE_ENVIRON)
5944 if requested > vg_free:
5945 raise errors.OpPrereqError("Not enough disk space on target node %s"
5946 " vg %s: required %d MiB, available %d MiB" %
5947 (node, vg, requested, vg_free),
5951 class LUInstanceStartup(LogicalUnit):
5952 """Starts an instance.
5955 HPATH = "instance-start"
5956 HTYPE = constants.HTYPE_INSTANCE
5959 def CheckArguments(self):
5961 if self.op.beparams:
5962 # fill the beparams dict
5963 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5965 def ExpandNames(self):
5966 self._ExpandAndLockInstance()
5968 def BuildHooksEnv(self):
5971 This runs on master, primary and secondary nodes of the instance.
5975 "FORCE": self.op.force,
5978 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5982 def BuildHooksNodes(self):
5983 """Build hooks nodes.
5986 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5989 def CheckPrereq(self):
5990 """Check prerequisites.
5992 This checks that the instance is in the cluster.
5995 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5996 assert self.instance is not None, \
5997 "Cannot retrieve locked instance %s" % self.op.instance_name
6000 if self.op.hvparams:
6001 # check hypervisor parameter syntax (locally)
6002 cluster = self.cfg.GetClusterInfo()
6003 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6004 filled_hvp = cluster.FillHV(instance)
6005 filled_hvp.update(self.op.hvparams)
6006 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6007 hv_type.CheckParameterSyntax(filled_hvp)
6008 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6010 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6012 if self.primary_offline and self.op.ignore_offline_nodes:
6013 self.proc.LogWarning("Ignoring offline primary node")
6015 if self.op.hvparams or self.op.beparams:
6016 self.proc.LogWarning("Overridden parameters are ignored")
6018 _CheckNodeOnline(self, instance.primary_node)
6020 bep = self.cfg.GetClusterInfo().FillBE(instance)
6022 # check bridges existence
6023 _CheckInstanceBridgesExist(self, instance)
6025 remote_info = self.rpc.call_instance_info(instance.primary_node,
6027 instance.hypervisor)
6028 remote_info.Raise("Error checking node %s" % instance.primary_node,
6029 prereq=True, ecode=errors.ECODE_ENVIRON)
6030 if not remote_info.payload: # not running already
6031 _CheckNodeFreeMemory(self, instance.primary_node,
6032 "starting instance %s" % instance.name,
6033 bep[constants.BE_MEMORY], instance.hypervisor)
6035 def Exec(self, feedback_fn):
6036 """Start the instance.
6039 instance = self.instance
6040 force = self.op.force
6042 if not self.op.no_remember:
6043 self.cfg.MarkInstanceUp(instance.name)
6045 if self.primary_offline:
6046 assert self.op.ignore_offline_nodes
6047 self.proc.LogInfo("Primary node offline, marked instance as started")
6049 node_current = instance.primary_node
6051 _StartInstanceDisks(self, instance, force)
6053 result = self.rpc.call_instance_start(node_current, instance,
6054 self.op.hvparams, self.op.beparams,
6055 self.op.startup_paused)
6056 msg = result.fail_msg
6058 _ShutdownInstanceDisks(self, instance)
6059 raise errors.OpExecError("Could not start instance: %s" % msg)
6062 class LUInstanceReboot(LogicalUnit):
6063 """Reboot an instance.
6066 HPATH = "instance-reboot"
6067 HTYPE = constants.HTYPE_INSTANCE
6070 def ExpandNames(self):
6071 self._ExpandAndLockInstance()
6073 def BuildHooksEnv(self):
6076 This runs on master, primary and secondary nodes of the instance.
6080 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6081 "REBOOT_TYPE": self.op.reboot_type,
6082 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6085 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6089 def BuildHooksNodes(self):
6090 """Build hooks nodes.
6093 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6096 def CheckPrereq(self):
6097 """Check prerequisites.
6099 This checks that the instance is in the cluster.
6102 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6103 assert self.instance is not None, \
6104 "Cannot retrieve locked instance %s" % self.op.instance_name
6106 _CheckNodeOnline(self, instance.primary_node)
6108 # check bridges existence
6109 _CheckInstanceBridgesExist(self, instance)
6111 def Exec(self, feedback_fn):
6112 """Reboot the instance.
6115 instance = self.instance
6116 ignore_secondaries = self.op.ignore_secondaries
6117 reboot_type = self.op.reboot_type
6119 remote_info = self.rpc.call_instance_info(instance.primary_node,
6121 instance.hypervisor)
6122 remote_info.Raise("Error checking node %s" % instance.primary_node)
6123 instance_running = bool(remote_info.payload)
6125 node_current = instance.primary_node
6127 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6128 constants.INSTANCE_REBOOT_HARD]:
6129 for disk in instance.disks:
6130 self.cfg.SetDiskID(disk, node_current)
6131 result = self.rpc.call_instance_reboot(node_current, instance,
6133 self.op.shutdown_timeout)
6134 result.Raise("Could not reboot instance")
6136 if instance_running:
6137 result = self.rpc.call_instance_shutdown(node_current, instance,
6138 self.op.shutdown_timeout)
6139 result.Raise("Could not shutdown instance for full reboot")
6140 _ShutdownInstanceDisks(self, instance)
6142 self.LogInfo("Instance %s was already stopped, starting now",
6144 _StartInstanceDisks(self, instance, ignore_secondaries)
6145 result = self.rpc.call_instance_start(node_current, instance,
6147 msg = result.fail_msg
6149 _ShutdownInstanceDisks(self, instance)
6150 raise errors.OpExecError("Could not start instance for"
6151 " full reboot: %s" % msg)
6153 self.cfg.MarkInstanceUp(instance.name)
6156 class LUInstanceShutdown(LogicalUnit):
6157 """Shutdown an instance.
6160 HPATH = "instance-stop"
6161 HTYPE = constants.HTYPE_INSTANCE
6164 def ExpandNames(self):
6165 self._ExpandAndLockInstance()
6167 def BuildHooksEnv(self):
6170 This runs on master, primary and secondary nodes of the instance.
6173 env = _BuildInstanceHookEnvByObject(self, self.instance)
6174 env["TIMEOUT"] = self.op.timeout
6177 def BuildHooksNodes(self):
6178 """Build hooks nodes.
6181 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6184 def CheckPrereq(self):
6185 """Check prerequisites.
6187 This checks that the instance is in the cluster.
6190 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6191 assert self.instance is not None, \
6192 "Cannot retrieve locked instance %s" % self.op.instance_name
6194 self.primary_offline = \
6195 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6197 if self.primary_offline and self.op.ignore_offline_nodes:
6198 self.proc.LogWarning("Ignoring offline primary node")
6200 _CheckNodeOnline(self, self.instance.primary_node)
6202 def Exec(self, feedback_fn):
6203 """Shutdown the instance.
6206 instance = self.instance
6207 node_current = instance.primary_node
6208 timeout = self.op.timeout
6210 if not self.op.no_remember:
6211 self.cfg.MarkInstanceDown(instance.name)
6213 if self.primary_offline:
6214 assert self.op.ignore_offline_nodes
6215 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6217 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6218 msg = result.fail_msg
6220 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6222 _ShutdownInstanceDisks(self, instance)
6225 class LUInstanceReinstall(LogicalUnit):
6226 """Reinstall an instance.
6229 HPATH = "instance-reinstall"
6230 HTYPE = constants.HTYPE_INSTANCE
6233 def ExpandNames(self):
6234 self._ExpandAndLockInstance()
6236 def BuildHooksEnv(self):
6239 This runs on master, primary and secondary nodes of the instance.
6242 return _BuildInstanceHookEnvByObject(self, self.instance)
6244 def BuildHooksNodes(self):
6245 """Build hooks nodes.
6248 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6251 def CheckPrereq(self):
6252 """Check prerequisites.
6254 This checks that the instance is in the cluster and is not running.
6257 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258 assert instance is not None, \
6259 "Cannot retrieve locked instance %s" % self.op.instance_name
6260 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6261 " offline, cannot reinstall")
6262 for node in instance.secondary_nodes:
6263 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6264 " cannot reinstall")
6266 if instance.disk_template == constants.DT_DISKLESS:
6267 raise errors.OpPrereqError("Instance '%s' has no disks" %
6268 self.op.instance_name,
6270 _CheckInstanceDown(self, instance, "cannot reinstall")
6272 if self.op.os_type is not None:
6274 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6275 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6276 instance_os = self.op.os_type
6278 instance_os = instance.os
6280 nodelist = list(instance.all_nodes)
6282 if self.op.osparams:
6283 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6284 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6285 self.os_inst = i_osdict # the new dict (without defaults)
6289 self.instance = instance
6291 def Exec(self, feedback_fn):
6292 """Reinstall the instance.
6295 inst = self.instance
6297 if self.op.os_type is not None:
6298 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6299 inst.os = self.op.os_type
6300 # Write to configuration
6301 self.cfg.Update(inst, feedback_fn)
6303 _StartInstanceDisks(self, inst, None)
6305 feedback_fn("Running the instance OS create scripts...")
6306 # FIXME: pass debug option from opcode to backend
6307 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6308 self.op.debug_level,
6309 osparams=self.os_inst)
6310 result.Raise("Could not install OS for instance %s on node %s" %
6311 (inst.name, inst.primary_node))
6313 _ShutdownInstanceDisks(self, inst)
6316 class LUInstanceRecreateDisks(LogicalUnit):
6317 """Recreate an instance's missing disks.
6320 HPATH = "instance-recreate-disks"
6321 HTYPE = constants.HTYPE_INSTANCE
6324 def CheckArguments(self):
6325 # normalise the disk list
6326 self.op.disks = sorted(frozenset(self.op.disks))
6328 def ExpandNames(self):
6329 self._ExpandAndLockInstance()
6330 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6332 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6333 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6335 self.needed_locks[locking.LEVEL_NODE] = []
6337 def DeclareLocks(self, level):
6338 if level == locking.LEVEL_NODE:
6339 # if we replace the nodes, we only need to lock the old primary,
6340 # otherwise we need to lock all nodes for disk re-creation
6341 primary_only = bool(self.op.nodes)
6342 self._LockInstancesNodes(primary_only=primary_only)
6344 def BuildHooksEnv(self):
6347 This runs on master, primary and secondary nodes of the instance.
6350 return _BuildInstanceHookEnvByObject(self, self.instance)
6352 def BuildHooksNodes(self):
6353 """Build hooks nodes.
6356 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6359 def CheckPrereq(self):
6360 """Check prerequisites.
6362 This checks that the instance is in the cluster and is not running.
6365 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6366 assert instance is not None, \
6367 "Cannot retrieve locked instance %s" % self.op.instance_name
6369 if len(self.op.nodes) != len(instance.all_nodes):
6370 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6371 " %d replacement nodes were specified" %
6372 (instance.name, len(instance.all_nodes),
6373 len(self.op.nodes)),
6375 assert instance.disk_template != constants.DT_DRBD8 or \
6376 len(self.op.nodes) == 2
6377 assert instance.disk_template != constants.DT_PLAIN or \
6378 len(self.op.nodes) == 1
6379 primary_node = self.op.nodes[0]
6381 primary_node = instance.primary_node
6382 _CheckNodeOnline(self, primary_node)
6384 if instance.disk_template == constants.DT_DISKLESS:
6385 raise errors.OpPrereqError("Instance '%s' has no disks" %
6386 self.op.instance_name, errors.ECODE_INVAL)
6387 # if we replace nodes *and* the old primary is offline, we don't
6389 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6390 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6391 if not (self.op.nodes and old_pnode.offline):
6392 _CheckInstanceDown(self, instance, "cannot recreate disks")
6394 if not self.op.disks:
6395 self.op.disks = range(len(instance.disks))
6397 for idx in self.op.disks:
6398 if idx >= len(instance.disks):
6399 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6401 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6402 raise errors.OpPrereqError("Can't recreate disks partially and"
6403 " change the nodes at the same time",
6405 self.instance = instance
6407 def Exec(self, feedback_fn):
6408 """Recreate the disks.
6411 instance = self.instance
6414 mods = [] # keeps track of needed logical_id changes
6416 for idx, disk in enumerate(instance.disks):
6417 if idx not in self.op.disks: # disk idx has not been passed in
6420 # update secondaries for disks, if needed
6422 if disk.dev_type == constants.LD_DRBD8:
6423 # need to update the nodes and minors
6424 assert len(self.op.nodes) == 2
6425 assert len(disk.logical_id) == 6 # otherwise disk internals
6427 (_, _, old_port, _, _, old_secret) = disk.logical_id
6428 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6429 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6430 new_minors[0], new_minors[1], old_secret)
6431 assert len(disk.logical_id) == len(new_id)
6432 mods.append((idx, new_id))
6434 # now that we have passed all asserts above, we can apply the mods
6435 # in a single run (to avoid partial changes)
6436 for idx, new_id in mods:
6437 instance.disks[idx].logical_id = new_id
6439 # change primary node, if needed
6441 instance.primary_node = self.op.nodes[0]
6442 self.LogWarning("Changing the instance's nodes, you will have to"
6443 " remove any disks left on the older nodes manually")
6446 self.cfg.Update(instance, feedback_fn)
6448 _CreateDisks(self, instance, to_skip=to_skip)
6451 class LUInstanceRename(LogicalUnit):
6452 """Rename an instance.
6455 HPATH = "instance-rename"
6456 HTYPE = constants.HTYPE_INSTANCE
6458 def CheckArguments(self):
6462 if self.op.ip_check and not self.op.name_check:
6463 # TODO: make the ip check more flexible and not depend on the name check
6464 raise errors.OpPrereqError("IP address check requires a name check",
6467 def BuildHooksEnv(self):
6470 This runs on master, primary and secondary nodes of the instance.
6473 env = _BuildInstanceHookEnvByObject(self, self.instance)
6474 env["INSTANCE_NEW_NAME"] = self.op.new_name
6477 def BuildHooksNodes(self):
6478 """Build hooks nodes.
6481 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6484 def CheckPrereq(self):
6485 """Check prerequisites.
6487 This checks that the instance is in the cluster and is not running.
6490 self.op.instance_name = _ExpandInstanceName(self.cfg,
6491 self.op.instance_name)
6492 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6493 assert instance is not None
6494 _CheckNodeOnline(self, instance.primary_node)
6495 _CheckInstanceDown(self, instance, "cannot rename")
6496 self.instance = instance
6498 new_name = self.op.new_name
6499 if self.op.name_check:
6500 hostname = netutils.GetHostname(name=new_name)
6501 if hostname.name != new_name:
6502 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6504 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6505 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6506 " same as given hostname '%s'") %
6507 (hostname.name, self.op.new_name),
6509 new_name = self.op.new_name = hostname.name
6510 if (self.op.ip_check and
6511 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6512 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6513 (hostname.ip, new_name),
6514 errors.ECODE_NOTUNIQUE)
6516 instance_list = self.cfg.GetInstanceList()
6517 if new_name in instance_list and new_name != instance.name:
6518 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6519 new_name, errors.ECODE_EXISTS)
6521 def Exec(self, feedback_fn):
6522 """Rename the instance.
6525 inst = self.instance
6526 old_name = inst.name
6528 rename_file_storage = False
6529 if (inst.disk_template in constants.DTS_FILEBASED and
6530 self.op.new_name != inst.name):
6531 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6532 rename_file_storage = True
6534 self.cfg.RenameInstance(inst.name, self.op.new_name)
6535 # Change the instance lock. This is definitely safe while we hold the BGL.
6536 # Otherwise the new lock would have to be added in acquired mode.
6538 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6539 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6541 # re-read the instance from the configuration after rename
6542 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6544 if rename_file_storage:
6545 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6546 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6547 old_file_storage_dir,
6548 new_file_storage_dir)
6549 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6550 " (but the instance has been renamed in Ganeti)" %
6551 (inst.primary_node, old_file_storage_dir,
6552 new_file_storage_dir))
6554 _StartInstanceDisks(self, inst, None)
6556 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6557 old_name, self.op.debug_level)
6558 msg = result.fail_msg
6560 msg = ("Could not run OS rename script for instance %s on node %s"
6561 " (but the instance has been renamed in Ganeti): %s" %
6562 (inst.name, inst.primary_node, msg))
6563 self.proc.LogWarning(msg)
6565 _ShutdownInstanceDisks(self, inst)
6570 class LUInstanceRemove(LogicalUnit):
6571 """Remove an instance.
6574 HPATH = "instance-remove"
6575 HTYPE = constants.HTYPE_INSTANCE
6578 def ExpandNames(self):
6579 self._ExpandAndLockInstance()
6580 self.needed_locks[locking.LEVEL_NODE] = []
6581 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6583 def DeclareLocks(self, level):
6584 if level == locking.LEVEL_NODE:
6585 self._LockInstancesNodes()
6587 def BuildHooksEnv(self):
6590 This runs on master, primary and secondary nodes of the instance.
6593 env = _BuildInstanceHookEnvByObject(self, self.instance)
6594 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6597 def BuildHooksNodes(self):
6598 """Build hooks nodes.
6601 nl = [self.cfg.GetMasterNode()]
6602 nl_post = list(self.instance.all_nodes) + nl
6603 return (nl, nl_post)
6605 def CheckPrereq(self):
6606 """Check prerequisites.
6608 This checks that the instance is in the cluster.
6611 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6612 assert self.instance is not None, \
6613 "Cannot retrieve locked instance %s" % self.op.instance_name
6615 def Exec(self, feedback_fn):
6616 """Remove the instance.
6619 instance = self.instance
6620 logging.info("Shutting down instance %s on node %s",
6621 instance.name, instance.primary_node)
6623 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6624 self.op.shutdown_timeout)
6625 msg = result.fail_msg
6627 if self.op.ignore_failures:
6628 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6630 raise errors.OpExecError("Could not shutdown instance %s on"
6632 (instance.name, instance.primary_node, msg))
6634 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6637 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6638 """Utility function to remove an instance.
6641 logging.info("Removing block devices for instance %s", instance.name)
6643 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
6644 if not ignore_failures:
6645 raise errors.OpExecError("Can't remove instance's disks")
6646 feedback_fn("Warning: can't remove instance's disks")
6648 logging.info("Removing instance %s out of cluster config", instance.name)
6650 lu.cfg.RemoveInstance(instance.name)
6652 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6653 "Instance lock removal conflict"
6655 # Remove lock for the instance
6656 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6659 class LUInstanceQuery(NoHooksLU):
6660 """Logical unit for querying instances.
6663 # pylint: disable=W0142
6666 def CheckArguments(self):
6667 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6668 self.op.output_fields, self.op.use_locking)
6670 def ExpandNames(self):
6671 self.iq.ExpandNames(self)
6673 def DeclareLocks(self, level):
6674 self.iq.DeclareLocks(self, level)
6676 def Exec(self, feedback_fn):
6677 return self.iq.OldStyleQuery(self)
6680 class LUInstanceFailover(LogicalUnit):
6681 """Failover an instance.
6684 HPATH = "instance-failover"
6685 HTYPE = constants.HTYPE_INSTANCE
6688 def CheckArguments(self):
6689 """Check the arguments.
6692 self.iallocator = getattr(self.op, "iallocator", None)
6693 self.target_node = getattr(self.op, "target_node", None)
6695 def ExpandNames(self):
6696 self._ExpandAndLockInstance()
6698 if self.op.target_node is not None:
6699 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6701 self.needed_locks[locking.LEVEL_NODE] = []
6702 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6704 ignore_consistency = self.op.ignore_consistency
6705 shutdown_timeout = self.op.shutdown_timeout
6706 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6709 ignore_consistency=ignore_consistency,
6710 shutdown_timeout=shutdown_timeout)
6711 self.tasklets = [self._migrater]
6713 def DeclareLocks(self, level):
6714 if level == locking.LEVEL_NODE:
6715 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6716 if instance.disk_template in constants.DTS_EXT_MIRROR:
6717 if self.op.target_node is None:
6718 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6720 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6721 self.op.target_node]
6722 del self.recalculate_locks[locking.LEVEL_NODE]
6724 self._LockInstancesNodes()
6726 def BuildHooksEnv(self):
6729 This runs on master, primary and secondary nodes of the instance.
6732 instance = self._migrater.instance
6733 source_node = instance.primary_node
6734 target_node = self.op.target_node
6736 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6737 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6738 "OLD_PRIMARY": source_node,
6739 "NEW_PRIMARY": target_node,
6742 if instance.disk_template in constants.DTS_INT_MIRROR:
6743 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6744 env["NEW_SECONDARY"] = source_node
6746 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6748 env.update(_BuildInstanceHookEnvByObject(self, instance))
6752 def BuildHooksNodes(self):
6753 """Build hooks nodes.
6756 instance = self._migrater.instance
6757 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6758 return (nl, nl + [instance.primary_node])
6761 class LUInstanceMigrate(LogicalUnit):
6762 """Migrate an instance.
6764 This is migration without shutting down, compared to the failover,
6765 which is done with shutdown.
6768 HPATH = "instance-migrate"
6769 HTYPE = constants.HTYPE_INSTANCE
6772 def ExpandNames(self):
6773 self._ExpandAndLockInstance()
6775 if self.op.target_node is not None:
6776 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6778 self.needed_locks[locking.LEVEL_NODE] = []
6779 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6781 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6782 cleanup=self.op.cleanup,
6784 fallback=self.op.allow_failover)
6785 self.tasklets = [self._migrater]
6787 def DeclareLocks(self, level):
6788 if level == locking.LEVEL_NODE:
6789 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6790 if instance.disk_template in constants.DTS_EXT_MIRROR:
6791 if self.op.target_node is None:
6792 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6794 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6795 self.op.target_node]
6796 del self.recalculate_locks[locking.LEVEL_NODE]
6798 self._LockInstancesNodes()
6800 def BuildHooksEnv(self):
6803 This runs on master, primary and secondary nodes of the instance.
6806 instance = self._migrater.instance
6807 source_node = instance.primary_node
6808 target_node = self.op.target_node
6809 env = _BuildInstanceHookEnvByObject(self, instance)
6811 "MIGRATE_LIVE": self._migrater.live,
6812 "MIGRATE_CLEANUP": self.op.cleanup,
6813 "OLD_PRIMARY": source_node,
6814 "NEW_PRIMARY": target_node,
6817 if instance.disk_template in constants.DTS_INT_MIRROR:
6818 env["OLD_SECONDARY"] = target_node
6819 env["NEW_SECONDARY"] = source_node
6821 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6825 def BuildHooksNodes(self):
6826 """Build hooks nodes.
6829 instance = self._migrater.instance
6830 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6831 return (nl, nl + [instance.primary_node])
6834 class LUInstanceMove(LogicalUnit):
6835 """Move an instance by data-copying.
6838 HPATH = "instance-move"
6839 HTYPE = constants.HTYPE_INSTANCE
6842 def ExpandNames(self):
6843 self._ExpandAndLockInstance()
6844 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6845 self.op.target_node = target_node
6846 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6847 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6849 def DeclareLocks(self, level):
6850 if level == locking.LEVEL_NODE:
6851 self._LockInstancesNodes(primary_only=True)
6853 def BuildHooksEnv(self):
6856 This runs on master, primary and secondary nodes of the instance.
6860 "TARGET_NODE": self.op.target_node,
6861 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6863 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6866 def BuildHooksNodes(self):
6867 """Build hooks nodes.
6871 self.cfg.GetMasterNode(),
6872 self.instance.primary_node,
6873 self.op.target_node,
6877 def CheckPrereq(self):
6878 """Check prerequisites.
6880 This checks that the instance is in the cluster.
6883 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884 assert self.instance is not None, \
6885 "Cannot retrieve locked instance %s" % self.op.instance_name
6887 node = self.cfg.GetNodeInfo(self.op.target_node)
6888 assert node is not None, \
6889 "Cannot retrieve locked node %s" % self.op.target_node
6891 self.target_node = target_node = node.name
6893 if target_node == instance.primary_node:
6894 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6895 (instance.name, target_node),
6898 bep = self.cfg.GetClusterInfo().FillBE(instance)
6900 for idx, dsk in enumerate(instance.disks):
6901 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6902 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6903 " cannot copy" % idx, errors.ECODE_STATE)
6905 _CheckNodeOnline(self, target_node)
6906 _CheckNodeNotDrained(self, target_node)
6907 _CheckNodeVmCapable(self, target_node)
6909 if instance.admin_up:
6910 # check memory requirements on the secondary node
6911 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6912 instance.name, bep[constants.BE_MEMORY],
6913 instance.hypervisor)
6915 self.LogInfo("Not checking memory on the secondary node as"
6916 " instance will not be started")
6918 # check bridge existance
6919 _CheckInstanceBridgesExist(self, instance, node=target_node)
6921 def Exec(self, feedback_fn):
6922 """Move an instance.
6924 The move is done by shutting it down on its present node, copying
6925 the data over (slow) and starting it on the new node.
6928 instance = self.instance
6930 source_node = instance.primary_node
6931 target_node = self.target_node
6933 self.LogInfo("Shutting down instance %s on source node %s",
6934 instance.name, source_node)
6936 result = self.rpc.call_instance_shutdown(source_node, instance,
6937 self.op.shutdown_timeout)
6938 msg = result.fail_msg
6940 if self.op.ignore_consistency:
6941 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6942 " Proceeding anyway. Please make sure node"
6943 " %s is down. Error details: %s",
6944 instance.name, source_node, source_node, msg)
6946 raise errors.OpExecError("Could not shutdown instance %s on"
6948 (instance.name, source_node, msg))
6950 # create the target disks
6952 _CreateDisks(self, instance, target_node=target_node)
6953 except errors.OpExecError:
6954 self.LogWarning("Device creation failed, reverting...")
6956 _RemoveDisks(self, instance, target_node=target_node)
6958 self.cfg.ReleaseDRBDMinors(instance.name)
6961 cluster_name = self.cfg.GetClusterInfo().cluster_name
6964 # activate, get path, copy the data over
6965 for idx, disk in enumerate(instance.disks):
6966 self.LogInfo("Copying data for disk %d", idx)
6967 result = self.rpc.call_blockdev_assemble(target_node, disk,
6968 instance.name, True, idx)
6970 self.LogWarning("Can't assemble newly created disk %d: %s",
6971 idx, result.fail_msg)
6972 errs.append(result.fail_msg)
6974 dev_path = result.payload
6975 result = self.rpc.call_blockdev_export(source_node, disk,
6976 target_node, dev_path,
6979 self.LogWarning("Can't copy data over for disk %d: %s",
6980 idx, result.fail_msg)
6981 errs.append(result.fail_msg)
6985 self.LogWarning("Some disks failed to copy, aborting")
6987 _RemoveDisks(self, instance, target_node=target_node)
6989 self.cfg.ReleaseDRBDMinors(instance.name)
6990 raise errors.OpExecError("Errors during disk copy: %s" %
6993 instance.primary_node = target_node
6994 self.cfg.Update(instance, feedback_fn)
6996 self.LogInfo("Removing the disks on the original node")
6997 _RemoveDisks(self, instance, target_node=source_node)
6999 # Only start the instance if it's marked as up
7000 if instance.admin_up:
7001 self.LogInfo("Starting instance %s on node %s",
7002 instance.name, target_node)
7004 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7005 ignore_secondaries=True)
7007 _ShutdownInstanceDisks(self, instance)
7008 raise errors.OpExecError("Can't activate the instance's disks")
7010 result = self.rpc.call_instance_start(target_node, instance,
7012 msg = result.fail_msg
7014 _ShutdownInstanceDisks(self, instance)
7015 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7016 (instance.name, target_node, msg))
7019 class LUNodeMigrate(LogicalUnit):
7020 """Migrate all instances from a node.
7023 HPATH = "node-migrate"
7024 HTYPE = constants.HTYPE_NODE
7027 def CheckArguments(self):
7030 def ExpandNames(self):
7031 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7033 self.share_locks = _ShareAll()
7034 self.needed_locks = {
7035 locking.LEVEL_NODE: [self.op.node_name],
7038 def BuildHooksEnv(self):
7041 This runs on the master, the primary and all the secondaries.
7045 "NODE_NAME": self.op.node_name,
7048 def BuildHooksNodes(self):
7049 """Build hooks nodes.
7052 nl = [self.cfg.GetMasterNode()]
7055 def CheckPrereq(self):
7058 def Exec(self, feedback_fn):
7059 # Prepare jobs for migration instances
7061 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7064 iallocator=self.op.iallocator,
7065 target_node=self.op.target_node)]
7066 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7069 # TODO: Run iallocator in this opcode and pass correct placement options to
7070 # OpInstanceMigrate. Since other jobs can modify the cluster between
7071 # running the iallocator and the actual migration, a good consistency model
7072 # will have to be found.
7074 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7075 frozenset([self.op.node_name]))
7077 return ResultWithJobs(jobs)
7080 class TLMigrateInstance(Tasklet):
7081 """Tasklet class for instance migration.
7084 @ivar live: whether the migration will be done live or non-live;
7085 this variable is initalized only after CheckPrereq has run
7086 @type cleanup: boolean
7087 @ivar cleanup: Wheater we cleanup from a failed migration
7088 @type iallocator: string
7089 @ivar iallocator: The iallocator used to determine target_node
7090 @type target_node: string
7091 @ivar target_node: If given, the target_node to reallocate the instance to
7092 @type failover: boolean
7093 @ivar failover: Whether operation results in failover or migration
7094 @type fallback: boolean
7095 @ivar fallback: Whether fallback to failover is allowed if migration not
7097 @type ignore_consistency: boolean
7098 @ivar ignore_consistency: Wheter we should ignore consistency between source
7100 @type shutdown_timeout: int
7101 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7104 def __init__(self, lu, instance_name, cleanup=False,
7105 failover=False, fallback=False,
7106 ignore_consistency=False,
7107 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7108 """Initializes this class.
7111 Tasklet.__init__(self, lu)
7114 self.instance_name = instance_name
7115 self.cleanup = cleanup
7116 self.live = False # will be overridden later
7117 self.failover = failover
7118 self.fallback = fallback
7119 self.ignore_consistency = ignore_consistency
7120 self.shutdown_timeout = shutdown_timeout
7122 def CheckPrereq(self):
7123 """Check prerequisites.
7125 This checks that the instance is in the cluster.
7128 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7129 instance = self.cfg.GetInstanceInfo(instance_name)
7130 assert instance is not None
7131 self.instance = instance
7133 if (not self.cleanup and not instance.admin_up and not self.failover and
7135 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7137 self.failover = True
7139 if instance.disk_template not in constants.DTS_MIRRORED:
7144 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7145 " %s" % (instance.disk_template, text),
7148 if instance.disk_template in constants.DTS_EXT_MIRROR:
7149 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7151 if self.lu.op.iallocator:
7152 self._RunAllocator()
7154 # We set set self.target_node as it is required by
7156 self.target_node = self.lu.op.target_node
7158 # self.target_node is already populated, either directly or by the
7160 target_node = self.target_node
7161 if self.target_node == instance.primary_node:
7162 raise errors.OpPrereqError("Cannot migrate instance %s"
7163 " to its primary (%s)" %
7164 (instance.name, instance.primary_node))
7166 if len(self.lu.tasklets) == 1:
7167 # It is safe to release locks only when we're the only tasklet
7169 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7170 keep=[instance.primary_node, self.target_node])
7173 secondary_nodes = instance.secondary_nodes
7174 if not secondary_nodes:
7175 raise errors.ConfigurationError("No secondary node but using"
7176 " %s disk template" %
7177 instance.disk_template)
7178 target_node = secondary_nodes[0]
7179 if self.lu.op.iallocator or (self.lu.op.target_node and
7180 self.lu.op.target_node != target_node):
7182 text = "failed over"
7185 raise errors.OpPrereqError("Instances with disk template %s cannot"
7186 " be %s to arbitrary nodes"
7187 " (neither an iallocator nor a target"
7188 " node can be passed)" %
7189 (instance.disk_template, text),
7192 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7194 # check memory requirements on the secondary node
7195 if not self.cleanup and (not self.failover or instance.admin_up):
7196 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7197 instance.name, i_be[constants.BE_MEMORY],
7198 instance.hypervisor)
7200 self.lu.LogInfo("Not checking memory on the secondary node as"
7201 " instance will not be started")
7203 # check bridge existance
7204 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7206 if not self.cleanup:
7207 _CheckNodeNotDrained(self.lu, target_node)
7208 if not self.failover:
7209 result = self.rpc.call_instance_migratable(instance.primary_node,
7211 if result.fail_msg and self.fallback:
7212 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7214 self.failover = True
7216 result.Raise("Can't migrate, please use failover",
7217 prereq=True, ecode=errors.ECODE_STATE)
7219 assert not (self.failover and self.cleanup)
7221 if not self.failover:
7222 if self.lu.op.live is not None and self.lu.op.mode is not None:
7223 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7224 " parameters are accepted",
7226 if self.lu.op.live is not None:
7228 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7230 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7231 # reset the 'live' parameter to None so that repeated
7232 # invocations of CheckPrereq do not raise an exception
7233 self.lu.op.live = None
7234 elif self.lu.op.mode is None:
7235 # read the default value from the hypervisor
7236 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7238 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7240 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7242 # Failover is never live
7245 def _RunAllocator(self):
7246 """Run the allocator based on input opcode.
7249 ial = IAllocator(self.cfg, self.rpc,
7250 mode=constants.IALLOCATOR_MODE_RELOC,
7251 name=self.instance_name,
7252 # TODO See why hail breaks with a single node below
7253 relocate_from=[self.instance.primary_node,
7254 self.instance.primary_node],
7257 ial.Run(self.lu.op.iallocator)
7260 raise errors.OpPrereqError("Can't compute nodes using"
7261 " iallocator '%s': %s" %
7262 (self.lu.op.iallocator, ial.info),
7264 if len(ial.result) != ial.required_nodes:
7265 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7266 " of nodes (%s), required %s" %
7267 (self.lu.op.iallocator, len(ial.result),
7268 ial.required_nodes), errors.ECODE_FAULT)
7269 self.target_node = ial.result[0]
7270 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7271 self.instance_name, self.lu.op.iallocator,
7272 utils.CommaJoin(ial.result))
7274 def _WaitUntilSync(self):
7275 """Poll with custom rpc for disk sync.
7277 This uses our own step-based rpc call.
7280 self.feedback_fn("* wait until resync is done")
7284 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7286 self.instance.disks)
7288 for node, nres in result.items():
7289 nres.Raise("Cannot resync disks on node %s" % node)
7290 node_done, node_percent = nres.payload
7291 all_done = all_done and node_done
7292 if node_percent is not None:
7293 min_percent = min(min_percent, node_percent)
7295 if min_percent < 100:
7296 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7299 def _EnsureSecondary(self, node):
7300 """Demote a node to secondary.
7303 self.feedback_fn("* switching node %s to secondary mode" % node)
7305 for dev in self.instance.disks:
7306 self.cfg.SetDiskID(dev, node)
7308 result = self.rpc.call_blockdev_close(node, self.instance.name,
7309 self.instance.disks)
7310 result.Raise("Cannot change disk to secondary on node %s" % node)
7312 def _GoStandalone(self):
7313 """Disconnect from the network.
7316 self.feedback_fn("* changing into standalone mode")
7317 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7318 self.instance.disks)
7319 for node, nres in result.items():
7320 nres.Raise("Cannot disconnect disks node %s" % node)
7322 def _GoReconnect(self, multimaster):
7323 """Reconnect to the network.
7329 msg = "single-master"
7330 self.feedback_fn("* changing disks into %s mode" % msg)
7331 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7332 self.instance.disks,
7333 self.instance.name, multimaster)
7334 for node, nres in result.items():
7335 nres.Raise("Cannot change disks config on node %s" % node)
7337 def _ExecCleanup(self):
7338 """Try to cleanup after a failed migration.
7340 The cleanup is done by:
7341 - check that the instance is running only on one node
7342 (and update the config if needed)
7343 - change disks on its secondary node to secondary
7344 - wait until disks are fully synchronized
7345 - disconnect from the network
7346 - change disks into single-master mode
7347 - wait again until disks are fully synchronized
7350 instance = self.instance
7351 target_node = self.target_node
7352 source_node = self.source_node
7354 # check running on only one node
7355 self.feedback_fn("* checking where the instance actually runs"
7356 " (if this hangs, the hypervisor might be in"
7358 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7359 for node, result in ins_l.items():
7360 result.Raise("Can't contact node %s" % node)
7362 runningon_source = instance.name in ins_l[source_node].payload
7363 runningon_target = instance.name in ins_l[target_node].payload
7365 if runningon_source and runningon_target:
7366 raise errors.OpExecError("Instance seems to be running on two nodes,"
7367 " or the hypervisor is confused; you will have"
7368 " to ensure manually that it runs only on one"
7369 " and restart this operation")
7371 if not (runningon_source or runningon_target):
7372 raise errors.OpExecError("Instance does not seem to be running at all;"
7373 " in this case it's safer to repair by"
7374 " running 'gnt-instance stop' to ensure disk"
7375 " shutdown, and then restarting it")
7377 if runningon_target:
7378 # the migration has actually succeeded, we need to update the config
7379 self.feedback_fn("* instance running on secondary node (%s),"
7380 " updating config" % target_node)
7381 instance.primary_node = target_node
7382 self.cfg.Update(instance, self.feedback_fn)
7383 demoted_node = source_node
7385 self.feedback_fn("* instance confirmed to be running on its"
7386 " primary node (%s)" % source_node)
7387 demoted_node = target_node
7389 if instance.disk_template in constants.DTS_INT_MIRROR:
7390 self._EnsureSecondary(demoted_node)
7392 self._WaitUntilSync()
7393 except errors.OpExecError:
7394 # we ignore here errors, since if the device is standalone, it
7395 # won't be able to sync
7397 self._GoStandalone()
7398 self._GoReconnect(False)
7399 self._WaitUntilSync()
7401 self.feedback_fn("* done")
7403 def _RevertDiskStatus(self):
7404 """Try to revert the disk status after a failed migration.
7407 target_node = self.target_node
7408 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7412 self._EnsureSecondary(target_node)
7413 self._GoStandalone()
7414 self._GoReconnect(False)
7415 self._WaitUntilSync()
7416 except errors.OpExecError, err:
7417 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7418 " please try to recover the instance manually;"
7419 " error '%s'" % str(err))
7421 def _AbortMigration(self):
7422 """Call the hypervisor code to abort a started migration.
7425 instance = self.instance
7426 target_node = self.target_node
7427 migration_info = self.migration_info
7429 abort_result = self.rpc.call_finalize_migration(target_node,
7433 abort_msg = abort_result.fail_msg
7435 logging.error("Aborting migration failed on target node %s: %s",
7436 target_node, abort_msg)
7437 # Don't raise an exception here, as we stil have to try to revert the
7438 # disk status, even if this step failed.
7440 def _ExecMigration(self):
7441 """Migrate an instance.
7443 The migrate is done by:
7444 - change the disks into dual-master mode
7445 - wait until disks are fully synchronized again
7446 - migrate the instance
7447 - change disks on the new secondary node (the old primary) to secondary
7448 - wait until disks are fully synchronized
7449 - change disks into single-master mode
7452 instance = self.instance
7453 target_node = self.target_node
7454 source_node = self.source_node
7456 # Check for hypervisor version mismatch and warn the user.
7457 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7458 None, self.instance.hypervisor)
7459 src_info = nodeinfo[source_node]
7460 dst_info = nodeinfo[target_node]
7462 if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7463 (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7464 src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7465 dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7466 if src_version != dst_version:
7467 self.feedback_fn("* warning: hypervisor version mismatch between"
7468 " source (%s) and target (%s) node" %
7469 (src_version, dst_version))
7471 self.feedback_fn("* checking disk consistency between source and target")
7472 for dev in instance.disks:
7473 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7474 raise errors.OpExecError("Disk %s is degraded or not fully"
7475 " synchronized on target node,"
7476 " aborting migration" % dev.iv_name)
7478 # First get the migration information from the remote node
7479 result = self.rpc.call_migration_info(source_node, instance)
7480 msg = result.fail_msg
7482 log_err = ("Failed fetching source migration information from %s: %s" %
7484 logging.error(log_err)
7485 raise errors.OpExecError(log_err)
7487 self.migration_info = migration_info = result.payload
7489 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7490 # Then switch the disks to master/master mode
7491 self._EnsureSecondary(target_node)
7492 self._GoStandalone()
7493 self._GoReconnect(True)
7494 self._WaitUntilSync()
7496 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7497 result = self.rpc.call_accept_instance(target_node,
7500 self.nodes_ip[target_node])
7502 msg = result.fail_msg
7504 logging.error("Instance pre-migration failed, trying to revert"
7505 " disk status: %s", msg)
7506 self.feedback_fn("Pre-migration failed, aborting")
7507 self._AbortMigration()
7508 self._RevertDiskStatus()
7509 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7510 (instance.name, msg))
7512 self.feedback_fn("* migrating instance to %s" % target_node)
7513 result = self.rpc.call_instance_migrate(source_node, instance,
7514 self.nodes_ip[target_node],
7516 msg = result.fail_msg
7518 logging.error("Instance migration failed, trying to revert"
7519 " disk status: %s", msg)
7520 self.feedback_fn("Migration failed, aborting")
7521 self._AbortMigration()
7522 self._RevertDiskStatus()
7523 raise errors.OpExecError("Could not migrate instance %s: %s" %
7524 (instance.name, msg))
7526 instance.primary_node = target_node
7527 # distribute new instance config to the other nodes
7528 self.cfg.Update(instance, self.feedback_fn)
7530 result = self.rpc.call_finalize_migration(target_node,
7534 msg = result.fail_msg
7536 logging.error("Instance migration succeeded, but finalization failed:"
7538 raise errors.OpExecError("Could not finalize instance migration: %s" %
7541 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7542 self._EnsureSecondary(source_node)
7543 self._WaitUntilSync()
7544 self._GoStandalone()
7545 self._GoReconnect(False)
7546 self._WaitUntilSync()
7548 self.feedback_fn("* done")
7550 def _ExecFailover(self):
7551 """Failover an instance.
7553 The failover is done by shutting it down on its present node and
7554 starting it on the secondary.
7557 instance = self.instance
7558 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7560 source_node = instance.primary_node
7561 target_node = self.target_node
7563 if instance.admin_up:
7564 self.feedback_fn("* checking disk consistency between source and target")
7565 for dev in instance.disks:
7566 # for drbd, these are drbd over lvm
7567 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7568 if primary_node.offline:
7569 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7571 (primary_node.name, dev.iv_name, target_node))
7572 elif not self.ignore_consistency:
7573 raise errors.OpExecError("Disk %s is degraded on target node,"
7574 " aborting failover" % dev.iv_name)
7576 self.feedback_fn("* not checking disk consistency as instance is not"
7579 self.feedback_fn("* shutting down instance on source node")
7580 logging.info("Shutting down instance %s on node %s",
7581 instance.name, source_node)
7583 result = self.rpc.call_instance_shutdown(source_node, instance,
7584 self.shutdown_timeout)
7585 msg = result.fail_msg
7587 if self.ignore_consistency or primary_node.offline:
7588 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7589 " proceeding anyway; please make sure node"
7590 " %s is down; error details: %s",
7591 instance.name, source_node, source_node, msg)
7593 raise errors.OpExecError("Could not shutdown instance %s on"
7595 (instance.name, source_node, msg))
7597 self.feedback_fn("* deactivating the instance's disks on source node")
7598 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7599 raise errors.OpExecError("Can't shut down the instance's disks")
7601 instance.primary_node = target_node
7602 # distribute new instance config to the other nodes
7603 self.cfg.Update(instance, self.feedback_fn)
7605 # Only start the instance if it's marked as up
7606 if instance.admin_up:
7607 self.feedback_fn("* activating the instance's disks on target node %s" %
7609 logging.info("Starting instance %s on node %s",
7610 instance.name, target_node)
7612 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7613 ignore_secondaries=True)
7615 _ShutdownInstanceDisks(self.lu, instance)
7616 raise errors.OpExecError("Can't activate the instance's disks")
7618 self.feedback_fn("* starting the instance on the target node %s" %
7620 result = self.rpc.call_instance_start(target_node, instance, None, None,
7622 msg = result.fail_msg
7624 _ShutdownInstanceDisks(self.lu, instance)
7625 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7626 (instance.name, target_node, msg))
7628 def Exec(self, feedback_fn):
7629 """Perform the migration.
7632 self.feedback_fn = feedback_fn
7633 self.source_node = self.instance.primary_node
7635 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7636 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7637 self.target_node = self.instance.secondary_nodes[0]
7638 # Otherwise self.target_node has been populated either
7639 # directly, or through an iallocator.
7641 self.all_nodes = [self.source_node, self.target_node]
7642 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7643 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7646 feedback_fn("Failover instance %s" % self.instance.name)
7647 self._ExecFailover()
7649 feedback_fn("Migrating instance %s" % self.instance.name)
7652 return self._ExecCleanup()
7654 return self._ExecMigration()
7657 def _CreateBlockDev(lu, node, instance, device, force_create,
7659 """Create a tree of block devices on a given node.
7661 If this device type has to be created on secondaries, create it and
7664 If not, just recurse to children keeping the same 'force' value.
7666 @param lu: the lu on whose behalf we execute
7667 @param node: the node on which to create the device
7668 @type instance: L{objects.Instance}
7669 @param instance: the instance which owns the device
7670 @type device: L{objects.Disk}
7671 @param device: the device to create
7672 @type force_create: boolean
7673 @param force_create: whether to force creation of this device; this
7674 will be change to True whenever we find a device which has
7675 CreateOnSecondary() attribute
7676 @param info: the extra 'metadata' we should attach to the device
7677 (this will be represented as a LVM tag)
7678 @type force_open: boolean
7679 @param force_open: this parameter will be passes to the
7680 L{backend.BlockdevCreate} function where it specifies
7681 whether we run on primary or not, and it affects both
7682 the child assembly and the device own Open() execution
7685 if device.CreateOnSecondary():
7689 for child in device.children:
7690 _CreateBlockDev(lu, node, instance, child, force_create,
7693 if not force_create:
7696 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7699 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7700 """Create a single block device on a given node.
7702 This will not recurse over children of the device, so they must be
7705 @param lu: the lu on whose behalf we execute
7706 @param node: the node on which to create the device
7707 @type instance: L{objects.Instance}
7708 @param instance: the instance which owns the device
7709 @type device: L{objects.Disk}
7710 @param device: the device to create
7711 @param info: the extra 'metadata' we should attach to the device
7712 (this will be represented as a LVM tag)
7713 @type force_open: boolean
7714 @param force_open: this parameter will be passes to the
7715 L{backend.BlockdevCreate} function where it specifies
7716 whether we run on primary or not, and it affects both
7717 the child assembly and the device own Open() execution
7720 lu.cfg.SetDiskID(device, node)
7721 result = lu.rpc.call_blockdev_create(node, device, device.size,
7722 instance.name, force_open, info)
7723 result.Raise("Can't create block device %s on"
7724 " node %s for instance %s" % (device, node, instance.name))
7725 if device.physical_id is None:
7726 device.physical_id = result.payload
7729 def _GenerateUniqueNames(lu, exts):
7730 """Generate a suitable LV name.
7732 This will generate a logical volume name for the given instance.
7737 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7738 results.append("%s%s" % (new_id, val))
7742 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7743 iv_name, p_minor, s_minor):
7744 """Generate a drbd8 device complete with its children.
7747 assert len(vgnames) == len(names) == 2
7748 port = lu.cfg.AllocatePort()
7749 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7750 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7751 logical_id=(vgnames[0], names[0]))
7752 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7753 logical_id=(vgnames[1], names[1]))
7754 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7755 logical_id=(primary, secondary, port,
7758 children=[dev_data, dev_meta],
7763 def _GenerateDiskTemplate(lu, template_name,
7764 instance_name, primary_node,
7765 secondary_nodes, disk_info,
7766 file_storage_dir, file_driver,
7767 base_index, feedback_fn):
7768 """Generate the entire disk layout for a given template type.
7771 #TODO: compute space requirements
7773 vgname = lu.cfg.GetVGName()
7774 disk_count = len(disk_info)
7776 if template_name == constants.DT_DISKLESS:
7778 elif template_name == constants.DT_PLAIN:
7779 if len(secondary_nodes) != 0:
7780 raise errors.ProgrammerError("Wrong template configuration")
7782 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7783 for i in range(disk_count)])
7784 for idx, disk in enumerate(disk_info):
7785 disk_index = idx + base_index
7786 vg = disk.get(constants.IDISK_VG, vgname)
7787 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7788 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7789 size=disk[constants.IDISK_SIZE],
7790 logical_id=(vg, names[idx]),
7791 iv_name="disk/%d" % disk_index,
7792 mode=disk[constants.IDISK_MODE])
7793 disks.append(disk_dev)
7794 elif template_name == constants.DT_DRBD8:
7795 if len(secondary_nodes) != 1:
7796 raise errors.ProgrammerError("Wrong template configuration")
7797 remote_node = secondary_nodes[0]
7798 minors = lu.cfg.AllocateDRBDMinor(
7799 [primary_node, remote_node] * len(disk_info), instance_name)
7802 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7803 for i in range(disk_count)]):
7804 names.append(lv_prefix + "_data")
7805 names.append(lv_prefix + "_meta")
7806 for idx, disk in enumerate(disk_info):
7807 disk_index = idx + base_index
7808 data_vg = disk.get(constants.IDISK_VG, vgname)
7809 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7810 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7811 disk[constants.IDISK_SIZE],
7813 names[idx * 2:idx * 2 + 2],
7814 "disk/%d" % disk_index,
7815 minors[idx * 2], minors[idx * 2 + 1])
7816 disk_dev.mode = disk[constants.IDISK_MODE]
7817 disks.append(disk_dev)
7818 elif template_name == constants.DT_FILE:
7819 if len(secondary_nodes) != 0:
7820 raise errors.ProgrammerError("Wrong template configuration")
7822 opcodes.RequireFileStorage()
7824 for idx, disk in enumerate(disk_info):
7825 disk_index = idx + base_index
7826 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7827 size=disk[constants.IDISK_SIZE],
7828 iv_name="disk/%d" % disk_index,
7829 logical_id=(file_driver,
7830 "%s/disk%d" % (file_storage_dir,
7832 mode=disk[constants.IDISK_MODE])
7833 disks.append(disk_dev)
7834 elif template_name == constants.DT_SHARED_FILE:
7835 if len(secondary_nodes) != 0:
7836 raise errors.ProgrammerError("Wrong template configuration")
7838 opcodes.RequireSharedFileStorage()
7840 for idx, disk in enumerate(disk_info):
7841 disk_index = idx + base_index
7842 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7843 size=disk[constants.IDISK_SIZE],
7844 iv_name="disk/%d" % disk_index,
7845 logical_id=(file_driver,
7846 "%s/disk%d" % (file_storage_dir,
7848 mode=disk[constants.IDISK_MODE])
7849 disks.append(disk_dev)
7850 elif template_name == constants.DT_BLOCK:
7851 if len(secondary_nodes) != 0:
7852 raise errors.ProgrammerError("Wrong template configuration")
7854 for idx, disk in enumerate(disk_info):
7855 disk_index = idx + base_index
7856 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7857 size=disk[constants.IDISK_SIZE],
7858 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7859 disk[constants.IDISK_ADOPT]),
7860 iv_name="disk/%d" % disk_index,
7861 mode=disk[constants.IDISK_MODE])
7862 disks.append(disk_dev)
7865 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7869 def _GetInstanceInfoText(instance):
7870 """Compute that text that should be added to the disk's metadata.
7873 return "originstname+%s" % instance.name
7876 def _CalcEta(time_taken, written, total_size):
7877 """Calculates the ETA based on size written and total size.
7879 @param time_taken: The time taken so far
7880 @param written: amount written so far
7881 @param total_size: The total size of data to be written
7882 @return: The remaining time in seconds
7885 avg_time = time_taken / float(written)
7886 return (total_size - written) * avg_time
7889 def _WipeDisks(lu, instance):
7890 """Wipes instance disks.
7892 @type lu: L{LogicalUnit}
7893 @param lu: the logical unit on whose behalf we execute
7894 @type instance: L{objects.Instance}
7895 @param instance: the instance whose disks we should create
7896 @return: the success of the wipe
7899 node = instance.primary_node
7901 for device in instance.disks:
7902 lu.cfg.SetDiskID(device, node)
7904 logging.info("Pause sync of instance %s disks", instance.name)
7905 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7907 for idx, success in enumerate(result.payload):
7909 logging.warn("pause-sync of instance %s for disks %d failed",
7913 for idx, device in enumerate(instance.disks):
7914 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7915 # MAX_WIPE_CHUNK at max
7916 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7917 constants.MIN_WIPE_CHUNK_PERCENT)
7918 # we _must_ make this an int, otherwise rounding errors will
7920 wipe_chunk_size = int(wipe_chunk_size)
7922 lu.LogInfo("* Wiping disk %d", idx)
7923 logging.info("Wiping disk %d for instance %s, node %s using"
7924 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7929 start_time = time.time()
7931 while offset < size:
7932 wipe_size = min(wipe_chunk_size, size - offset)
7933 logging.debug("Wiping disk %d, offset %s, chunk %s",
7934 idx, offset, wipe_size)
7935 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7936 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7937 (idx, offset, wipe_size))
7940 if now - last_output >= 60:
7941 eta = _CalcEta(now - start_time, offset, size)
7942 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7943 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7946 logging.info("Resume sync of instance %s disks", instance.name)
7948 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7950 for idx, success in enumerate(result.payload):
7952 lu.LogWarning("Resume sync of disk %d failed, please have a"
7953 " look at the status and troubleshoot the issue", idx)
7954 logging.warn("resume-sync of instance %s for disks %d failed",
7958 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7959 """Create all disks for an instance.
7961 This abstracts away some work from AddInstance.
7963 @type lu: L{LogicalUnit}
7964 @param lu: the logical unit on whose behalf we execute
7965 @type instance: L{objects.Instance}
7966 @param instance: the instance whose disks we should create
7968 @param to_skip: list of indices to skip
7969 @type target_node: string
7970 @param target_node: if passed, overrides the target node for creation
7972 @return: the success of the creation
7975 info = _GetInstanceInfoText(instance)
7976 if target_node is None:
7977 pnode = instance.primary_node
7978 all_nodes = instance.all_nodes
7983 if instance.disk_template in constants.DTS_FILEBASED:
7984 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7985 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7987 result.Raise("Failed to create directory '%s' on"
7988 " node %s" % (file_storage_dir, pnode))
7990 # Note: this needs to be kept in sync with adding of disks in
7991 # LUInstanceSetParams
7992 for idx, device in enumerate(instance.disks):
7993 if to_skip and idx in to_skip:
7995 logging.info("Creating volume %s for instance %s",
7996 device.iv_name, instance.name)
7998 for node in all_nodes:
7999 f_create = node == pnode
8000 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8003 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
8004 """Remove all disks for an instance.
8006 This abstracts away some work from `AddInstance()` and
8007 `RemoveInstance()`. Note that in case some of the devices couldn't
8008 be removed, the removal will continue with the other ones (compare
8009 with `_CreateDisks()`).
8011 @type lu: L{LogicalUnit}
8012 @param lu: the logical unit on whose behalf we execute
8013 @type instance: L{objects.Instance}
8014 @param instance: the instance whose disks we should remove
8015 @type target_node: string
8016 @param target_node: used to override the node on which to remove the disks
8018 @return: the success of the removal
8021 logging.info("Removing block devices for instance %s", instance.name)
8024 ports_to_release = set()
8025 for device in instance.disks:
8027 edata = [(target_node, device)]
8029 edata = device.ComputeNodeTree(instance.primary_node)
8030 for node, disk in edata:
8031 lu.cfg.SetDiskID(disk, node)
8032 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8034 lu.LogWarning("Could not remove block device %s on node %s,"
8035 " continuing anyway: %s", device.iv_name, node, msg)
8038 # if this is a DRBD disk, return its port to the pool
8039 if device.dev_type in constants.LDS_DRBD:
8040 ports_to_release.add(device.logical_id[2])
8042 if all_result or ignore_failures:
8043 for port in ports_to_release:
8044 lu.cfg.AddTcpUdpPort(port)
8046 if instance.disk_template == constants.DT_FILE:
8047 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8051 tgt = instance.primary_node
8052 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8054 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8055 file_storage_dir, instance.primary_node, result.fail_msg)
8061 def _ComputeDiskSizePerVG(disk_template, disks):
8062 """Compute disk size requirements in the volume group
8065 def _compute(disks, payload):
8066 """Universal algorithm.
8071 vgs[disk[constants.IDISK_VG]] = \
8072 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8076 # Required free disk space as a function of disk and swap space
8078 constants.DT_DISKLESS: {},
8079 constants.DT_PLAIN: _compute(disks, 0),
8080 # 128 MB are added for drbd metadata for each disk
8081 constants.DT_DRBD8: _compute(disks, 128),
8082 constants.DT_FILE: {},
8083 constants.DT_SHARED_FILE: {},
8086 if disk_template not in req_size_dict:
8087 raise errors.ProgrammerError("Disk template '%s' size requirement"
8088 " is unknown" % disk_template)
8090 return req_size_dict[disk_template]
8093 def _ComputeDiskSize(disk_template, disks):
8094 """Compute disk size requirements in the volume group
8097 # Required free disk space as a function of disk and swap space
8099 constants.DT_DISKLESS: None,
8100 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8101 # 128 MB are added for drbd metadata for each disk
8102 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8103 constants.DT_FILE: None,
8104 constants.DT_SHARED_FILE: 0,
8105 constants.DT_BLOCK: 0,
8108 if disk_template not in req_size_dict:
8109 raise errors.ProgrammerError("Disk template '%s' size requirement"
8110 " is unknown" % disk_template)
8112 return req_size_dict[disk_template]
8115 def _FilterVmNodes(lu, nodenames):
8116 """Filters out non-vm_capable nodes from a list.
8118 @type lu: L{LogicalUnit}
8119 @param lu: the logical unit for which we check
8120 @type nodenames: list
8121 @param nodenames: the list of nodes on which we should check
8123 @return: the list of vm-capable nodes
8126 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8127 return [name for name in nodenames if name not in vm_nodes]
8130 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8131 """Hypervisor parameter validation.
8133 This function abstract the hypervisor parameter validation to be
8134 used in both instance create and instance modify.
8136 @type lu: L{LogicalUnit}
8137 @param lu: the logical unit for which we check
8138 @type nodenames: list
8139 @param nodenames: the list of nodes on which we should check
8140 @type hvname: string
8141 @param hvname: the name of the hypervisor we should use
8142 @type hvparams: dict
8143 @param hvparams: the parameters which we need to check
8144 @raise errors.OpPrereqError: if the parameters are not valid
8147 nodenames = _FilterVmNodes(lu, nodenames)
8148 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8151 for node in nodenames:
8155 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8158 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8159 """OS parameters validation.
8161 @type lu: L{LogicalUnit}
8162 @param lu: the logical unit for which we check
8163 @type required: boolean
8164 @param required: whether the validation should fail if the OS is not
8166 @type nodenames: list
8167 @param nodenames: the list of nodes on which we should check
8168 @type osname: string
8169 @param osname: the name of the hypervisor we should use
8170 @type osparams: dict
8171 @param osparams: the parameters which we need to check
8172 @raise errors.OpPrereqError: if the parameters are not valid
8175 nodenames = _FilterVmNodes(lu, nodenames)
8176 result = lu.rpc.call_os_validate(required, nodenames, osname,
8177 [constants.OS_VALIDATE_PARAMETERS],
8179 for node, nres in result.items():
8180 # we don't check for offline cases since this should be run only
8181 # against the master node and/or an instance's nodes
8182 nres.Raise("OS Parameters validation failed on node %s" % node)
8183 if not nres.payload:
8184 lu.LogInfo("OS %s not found on node %s, validation skipped",
8188 class LUInstanceCreate(LogicalUnit):
8189 """Create an instance.
8192 HPATH = "instance-add"
8193 HTYPE = constants.HTYPE_INSTANCE
8196 def CheckArguments(self):
8200 # do not require name_check to ease forward/backward compatibility
8202 if self.op.no_install and self.op.start:
8203 self.LogInfo("No-installation mode selected, disabling startup")
8204 self.op.start = False
8205 # validate/normalize the instance name
8206 self.op.instance_name = \
8207 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8209 if self.op.ip_check and not self.op.name_check:
8210 # TODO: make the ip check more flexible and not depend on the name check
8211 raise errors.OpPrereqError("Cannot do IP address check without a name"
8212 " check", errors.ECODE_INVAL)
8214 # check nics' parameter names
8215 for nic in self.op.nics:
8216 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8218 # check disks. parameter names and consistent adopt/no-adopt strategy
8219 has_adopt = has_no_adopt = False
8220 for disk in self.op.disks:
8221 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8222 if constants.IDISK_ADOPT in disk:
8226 if has_adopt and has_no_adopt:
8227 raise errors.OpPrereqError("Either all disks are adopted or none is",
8230 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8231 raise errors.OpPrereqError("Disk adoption is not supported for the"
8232 " '%s' disk template" %
8233 self.op.disk_template,
8235 if self.op.iallocator is not None:
8236 raise errors.OpPrereqError("Disk adoption not allowed with an"
8237 " iallocator script", errors.ECODE_INVAL)
8238 if self.op.mode == constants.INSTANCE_IMPORT:
8239 raise errors.OpPrereqError("Disk adoption not allowed for"
8240 " instance import", errors.ECODE_INVAL)
8242 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8243 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8244 " but no 'adopt' parameter given" %
8245 self.op.disk_template,
8248 self.adopt_disks = has_adopt
8250 # instance name verification
8251 if self.op.name_check:
8252 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8253 self.op.instance_name = self.hostname1.name
8254 # used in CheckPrereq for ip ping check
8255 self.check_ip = self.hostname1.ip
8257 self.check_ip = None
8259 # file storage checks
8260 if (self.op.file_driver and
8261 not self.op.file_driver in constants.FILE_DRIVER):
8262 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8263 self.op.file_driver, errors.ECODE_INVAL)
8265 if self.op.disk_template == constants.DT_FILE:
8266 opcodes.RequireFileStorage()
8267 elif self.op.disk_template == constants.DT_SHARED_FILE:
8268 opcodes.RequireSharedFileStorage()
8270 ### Node/iallocator related checks
8271 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8273 if self.op.pnode is not None:
8274 if self.op.disk_template in constants.DTS_INT_MIRROR:
8275 if self.op.snode is None:
8276 raise errors.OpPrereqError("The networked disk templates need"
8277 " a mirror node", errors.ECODE_INVAL)
8279 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8281 self.op.snode = None
8283 self._cds = _GetClusterDomainSecret()
8285 if self.op.mode == constants.INSTANCE_IMPORT:
8286 # On import force_variant must be True, because if we forced it at
8287 # initial install, our only chance when importing it back is that it
8289 self.op.force_variant = True
8291 if self.op.no_install:
8292 self.LogInfo("No-installation mode has no effect during import")
8294 elif self.op.mode == constants.INSTANCE_CREATE:
8295 if self.op.os_type is None:
8296 raise errors.OpPrereqError("No guest OS specified",
8298 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8299 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8300 " installation" % self.op.os_type,
8302 if self.op.disk_template is None:
8303 raise errors.OpPrereqError("No disk template specified",
8306 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8307 # Check handshake to ensure both clusters have the same domain secret
8308 src_handshake = self.op.source_handshake
8309 if not src_handshake:
8310 raise errors.OpPrereqError("Missing source handshake",
8313 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8316 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8319 # Load and check source CA
8320 self.source_x509_ca_pem = self.op.source_x509_ca
8321 if not self.source_x509_ca_pem:
8322 raise errors.OpPrereqError("Missing source X509 CA",
8326 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8328 except OpenSSL.crypto.Error, err:
8329 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8330 (err, ), errors.ECODE_INVAL)
8332 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8333 if errcode is not None:
8334 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8337 self.source_x509_ca = cert
8339 src_instance_name = self.op.source_instance_name
8340 if not src_instance_name:
8341 raise errors.OpPrereqError("Missing source instance name",
8344 self.source_instance_name = \
8345 netutils.GetHostname(name=src_instance_name).name
8348 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8349 self.op.mode, errors.ECODE_INVAL)
8351 def ExpandNames(self):
8352 """ExpandNames for CreateInstance.
8354 Figure out the right locks for instance creation.
8357 self.needed_locks = {}
8359 instance_name = self.op.instance_name
8360 # this is just a preventive check, but someone might still add this
8361 # instance in the meantime, and creation will fail at lock-add time
8362 if instance_name in self.cfg.GetInstanceList():
8363 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8364 instance_name, errors.ECODE_EXISTS)
8366 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8368 if self.op.iallocator:
8369 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8371 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8372 nodelist = [self.op.pnode]
8373 if self.op.snode is not None:
8374 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8375 nodelist.append(self.op.snode)
8376 self.needed_locks[locking.LEVEL_NODE] = nodelist
8378 # in case of import lock the source node too
8379 if self.op.mode == constants.INSTANCE_IMPORT:
8380 src_node = self.op.src_node
8381 src_path = self.op.src_path
8383 if src_path is None:
8384 self.op.src_path = src_path = self.op.instance_name
8386 if src_node is None:
8387 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8388 self.op.src_node = None
8389 if os.path.isabs(src_path):
8390 raise errors.OpPrereqError("Importing an instance from a path"
8391 " requires a source node option",
8394 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8395 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8396 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8397 if not os.path.isabs(src_path):
8398 self.op.src_path = src_path = \
8399 utils.PathJoin(constants.EXPORT_DIR, src_path)
8401 def _RunAllocator(self):
8402 """Run the allocator based on input opcode.
8405 nics = [n.ToDict() for n in self.nics]
8406 ial = IAllocator(self.cfg, self.rpc,
8407 mode=constants.IALLOCATOR_MODE_ALLOC,
8408 name=self.op.instance_name,
8409 disk_template=self.op.disk_template,
8412 vcpus=self.be_full[constants.BE_VCPUS],
8413 memory=self.be_full[constants.BE_MEMORY],
8416 hypervisor=self.op.hypervisor,
8419 ial.Run(self.op.iallocator)
8422 raise errors.OpPrereqError("Can't compute nodes using"
8423 " iallocator '%s': %s" %
8424 (self.op.iallocator, ial.info),
8426 if len(ial.result) != ial.required_nodes:
8427 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8428 " of nodes (%s), required %s" %
8429 (self.op.iallocator, len(ial.result),
8430 ial.required_nodes), errors.ECODE_FAULT)
8431 self.op.pnode = ial.result[0]
8432 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8433 self.op.instance_name, self.op.iallocator,
8434 utils.CommaJoin(ial.result))
8435 if ial.required_nodes == 2:
8436 self.op.snode = ial.result[1]
8438 def BuildHooksEnv(self):
8441 This runs on master, primary and secondary nodes of the instance.
8445 "ADD_MODE": self.op.mode,
8447 if self.op.mode == constants.INSTANCE_IMPORT:
8448 env["SRC_NODE"] = self.op.src_node
8449 env["SRC_PATH"] = self.op.src_path
8450 env["SRC_IMAGES"] = self.src_images
8452 env.update(_BuildInstanceHookEnv(
8453 name=self.op.instance_name,
8454 primary_node=self.op.pnode,
8455 secondary_nodes=self.secondaries,
8456 status=self.op.start,
8457 os_type=self.op.os_type,
8458 memory=self.be_full[constants.BE_MEMORY],
8459 vcpus=self.be_full[constants.BE_VCPUS],
8460 nics=_NICListToTuple(self, self.nics),
8461 disk_template=self.op.disk_template,
8462 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8463 for d in self.disks],
8466 hypervisor_name=self.op.hypervisor,
8472 def BuildHooksNodes(self):
8473 """Build hooks nodes.
8476 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8479 def _ReadExportInfo(self):
8480 """Reads the export information from disk.
8482 It will override the opcode source node and path with the actual
8483 information, if these two were not specified before.
8485 @return: the export information
8488 assert self.op.mode == constants.INSTANCE_IMPORT
8490 src_node = self.op.src_node
8491 src_path = self.op.src_path
8493 if src_node is None:
8494 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8495 exp_list = self.rpc.call_export_list(locked_nodes)
8497 for node in exp_list:
8498 if exp_list[node].fail_msg:
8500 if src_path in exp_list[node].payload:
8502 self.op.src_node = src_node = node
8503 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8507 raise errors.OpPrereqError("No export found for relative path %s" %
8508 src_path, errors.ECODE_INVAL)
8510 _CheckNodeOnline(self, src_node)
8511 result = self.rpc.call_export_info(src_node, src_path)
8512 result.Raise("No export or invalid export found in dir %s" % src_path)
8514 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8515 if not export_info.has_section(constants.INISECT_EXP):
8516 raise errors.ProgrammerError("Corrupted export config",
8517 errors.ECODE_ENVIRON)
8519 ei_version = export_info.get(constants.INISECT_EXP, "version")
8520 if (int(ei_version) != constants.EXPORT_VERSION):
8521 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8522 (ei_version, constants.EXPORT_VERSION),
8523 errors.ECODE_ENVIRON)
8526 def _ReadExportParams(self, einfo):
8527 """Use export parameters as defaults.
8529 In case the opcode doesn't specify (as in override) some instance
8530 parameters, then try to use them from the export information, if
8534 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8536 if self.op.disk_template is None:
8537 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8538 self.op.disk_template = einfo.get(constants.INISECT_INS,
8541 raise errors.OpPrereqError("No disk template specified and the export"
8542 " is missing the disk_template information",
8545 if not self.op.disks:
8546 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8548 # TODO: import the disk iv_name too
8549 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8550 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8551 disks.append({constants.IDISK_SIZE: disk_sz})
8552 self.op.disks = disks
8554 raise errors.OpPrereqError("No disk info specified and the export"
8555 " is missing the disk information",
8558 if (not self.op.nics and
8559 einfo.has_option(constants.INISECT_INS, "nic_count")):
8561 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8563 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8564 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8569 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8570 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8572 if (self.op.hypervisor is None and
8573 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8574 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8576 if einfo.has_section(constants.INISECT_HYP):
8577 # use the export parameters but do not override the ones
8578 # specified by the user
8579 for name, value in einfo.items(constants.INISECT_HYP):
8580 if name not in self.op.hvparams:
8581 self.op.hvparams[name] = value
8583 if einfo.has_section(constants.INISECT_BEP):
8584 # use the parameters, without overriding
8585 for name, value in einfo.items(constants.INISECT_BEP):
8586 if name not in self.op.beparams:
8587 self.op.beparams[name] = value
8589 # try to read the parameters old style, from the main section
8590 for name in constants.BES_PARAMETERS:
8591 if (name not in self.op.beparams and
8592 einfo.has_option(constants.INISECT_INS, name)):
8593 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8595 if einfo.has_section(constants.INISECT_OSP):
8596 # use the parameters, without overriding
8597 for name, value in einfo.items(constants.INISECT_OSP):
8598 if name not in self.op.osparams:
8599 self.op.osparams[name] = value
8601 def _RevertToDefaults(self, cluster):
8602 """Revert the instance parameters to the default values.
8606 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8607 for name in self.op.hvparams.keys():
8608 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8609 del self.op.hvparams[name]
8611 be_defs = cluster.SimpleFillBE({})
8612 for name in self.op.beparams.keys():
8613 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8614 del self.op.beparams[name]
8616 nic_defs = cluster.SimpleFillNIC({})
8617 for nic in self.op.nics:
8618 for name in constants.NICS_PARAMETERS:
8619 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8622 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8623 for name in self.op.osparams.keys():
8624 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8625 del self.op.osparams[name]
8627 def _CalculateFileStorageDir(self):
8628 """Calculate final instance file storage dir.
8631 # file storage dir calculation/check
8632 self.instance_file_storage_dir = None
8633 if self.op.disk_template in constants.DTS_FILEBASED:
8634 # build the full file storage dir path
8637 if self.op.disk_template == constants.DT_SHARED_FILE:
8638 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8640 get_fsd_fn = self.cfg.GetFileStorageDir
8642 cfg_storagedir = get_fsd_fn()
8643 if not cfg_storagedir:
8644 raise errors.OpPrereqError("Cluster file storage dir not defined")
8645 joinargs.append(cfg_storagedir)
8647 if self.op.file_storage_dir is not None:
8648 joinargs.append(self.op.file_storage_dir)
8650 joinargs.append(self.op.instance_name)
8652 # pylint: disable=W0142
8653 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8655 def CheckPrereq(self):
8656 """Check prerequisites.
8659 self._CalculateFileStorageDir()
8661 if self.op.mode == constants.INSTANCE_IMPORT:
8662 export_info = self._ReadExportInfo()
8663 self._ReadExportParams(export_info)
8665 if (not self.cfg.GetVGName() and
8666 self.op.disk_template not in constants.DTS_NOT_LVM):
8667 raise errors.OpPrereqError("Cluster does not support lvm-based"
8668 " instances", errors.ECODE_STATE)
8670 if self.op.hypervisor is None:
8671 self.op.hypervisor = self.cfg.GetHypervisorType()
8673 cluster = self.cfg.GetClusterInfo()
8674 enabled_hvs = cluster.enabled_hypervisors
8675 if self.op.hypervisor not in enabled_hvs:
8676 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8677 " cluster (%s)" % (self.op.hypervisor,
8678 ",".join(enabled_hvs)),
8681 # Check tag validity
8682 for tag in self.op.tags:
8683 objects.TaggableObject.ValidateTag(tag)
8685 # check hypervisor parameter syntax (locally)
8686 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8687 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8689 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8690 hv_type.CheckParameterSyntax(filled_hvp)
8691 self.hv_full = filled_hvp
8692 # check that we don't specify global parameters on an instance
8693 _CheckGlobalHvParams(self.op.hvparams)
8695 # fill and remember the beparams dict
8696 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8697 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8699 # build os parameters
8700 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8702 # now that hvp/bep are in final format, let's reset to defaults,
8704 if self.op.identify_defaults:
8705 self._RevertToDefaults(cluster)
8709 for idx, nic in enumerate(self.op.nics):
8710 nic_mode_req = nic.get(constants.INIC_MODE, None)
8711 nic_mode = nic_mode_req
8712 if nic_mode is None:
8713 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8715 # in routed mode, for the first nic, the default ip is 'auto'
8716 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8717 default_ip_mode = constants.VALUE_AUTO
8719 default_ip_mode = constants.VALUE_NONE
8721 # ip validity checks
8722 ip = nic.get(constants.INIC_IP, default_ip_mode)
8723 if ip is None or ip.lower() == constants.VALUE_NONE:
8725 elif ip.lower() == constants.VALUE_AUTO:
8726 if not self.op.name_check:
8727 raise errors.OpPrereqError("IP address set to auto but name checks"
8728 " have been skipped",
8730 nic_ip = self.hostname1.ip
8732 if not netutils.IPAddress.IsValid(ip):
8733 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8737 # TODO: check the ip address for uniqueness
8738 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8739 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8742 # MAC address verification
8743 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8744 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8745 mac = utils.NormalizeAndValidateMac(mac)
8748 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8749 except errors.ReservationError:
8750 raise errors.OpPrereqError("MAC address %s already in use"
8751 " in cluster" % mac,
8752 errors.ECODE_NOTUNIQUE)
8754 # Build nic parameters
8755 link = nic.get(constants.INIC_LINK, None)
8758 nicparams[constants.NIC_MODE] = nic_mode_req
8760 nicparams[constants.NIC_LINK] = link
8762 check_params = cluster.SimpleFillNIC(nicparams)
8763 objects.NIC.CheckParameterSyntax(check_params)
8764 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8766 # disk checks/pre-build
8767 default_vg = self.cfg.GetVGName()
8769 for disk in self.op.disks:
8770 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8771 if mode not in constants.DISK_ACCESS_SET:
8772 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8773 mode, errors.ECODE_INVAL)
8774 size = disk.get(constants.IDISK_SIZE, None)
8776 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8779 except (TypeError, ValueError):
8780 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8783 data_vg = disk.get(constants.IDISK_VG, default_vg)
8785 constants.IDISK_SIZE: size,
8786 constants.IDISK_MODE: mode,
8787 constants.IDISK_VG: data_vg,
8788 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8790 if constants.IDISK_ADOPT in disk:
8791 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8792 self.disks.append(new_disk)
8794 if self.op.mode == constants.INSTANCE_IMPORT:
8796 # Check that the new instance doesn't have less disks than the export
8797 instance_disks = len(self.disks)
8798 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8799 if instance_disks < export_disks:
8800 raise errors.OpPrereqError("Not enough disks to import."
8801 " (instance: %d, export: %d)" %
8802 (instance_disks, export_disks),
8806 for idx in range(export_disks):
8807 option = "disk%d_dump" % idx
8808 if export_info.has_option(constants.INISECT_INS, option):
8809 # FIXME: are the old os-es, disk sizes, etc. useful?
8810 export_name = export_info.get(constants.INISECT_INS, option)
8811 image = utils.PathJoin(self.op.src_path, export_name)
8812 disk_images.append(image)
8814 disk_images.append(False)
8816 self.src_images = disk_images
8818 old_name = export_info.get(constants.INISECT_INS, "name")
8820 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8821 except (TypeError, ValueError), err:
8822 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8823 " an integer: %s" % str(err),
8825 if self.op.instance_name == old_name:
8826 for idx, nic in enumerate(self.nics):
8827 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8828 nic_mac_ini = "nic%d_mac" % idx
8829 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8831 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8833 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8834 if self.op.ip_check:
8835 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8836 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8837 (self.check_ip, self.op.instance_name),
8838 errors.ECODE_NOTUNIQUE)
8840 #### mac address generation
8841 # By generating here the mac address both the allocator and the hooks get
8842 # the real final mac address rather than the 'auto' or 'generate' value.
8843 # There is a race condition between the generation and the instance object
8844 # creation, which means that we know the mac is valid now, but we're not
8845 # sure it will be when we actually add the instance. If things go bad
8846 # adding the instance will abort because of a duplicate mac, and the
8847 # creation job will fail.
8848 for nic in self.nics:
8849 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8850 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8854 if self.op.iallocator is not None:
8855 self._RunAllocator()
8857 # Release all unneeded node locks
8858 _ReleaseLocks(self, locking.LEVEL_NODE,
8859 keep=filter(None, [self.op.pnode, self.op.snode,
8862 #### node related checks
8864 # check primary node
8865 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8866 assert self.pnode is not None, \
8867 "Cannot retrieve locked node %s" % self.op.pnode
8869 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8870 pnode.name, errors.ECODE_STATE)
8872 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8873 pnode.name, errors.ECODE_STATE)
8874 if not pnode.vm_capable:
8875 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8876 " '%s'" % pnode.name, errors.ECODE_STATE)
8878 self.secondaries = []
8880 # mirror node verification
8881 if self.op.disk_template in constants.DTS_INT_MIRROR:
8882 if self.op.snode == pnode.name:
8883 raise errors.OpPrereqError("The secondary node cannot be the"
8884 " primary node", errors.ECODE_INVAL)
8885 _CheckNodeOnline(self, self.op.snode)
8886 _CheckNodeNotDrained(self, self.op.snode)
8887 _CheckNodeVmCapable(self, self.op.snode)
8888 self.secondaries.append(self.op.snode)
8890 nodenames = [pnode.name] + self.secondaries
8892 if not self.adopt_disks:
8893 # Check lv size requirements, if not adopting
8894 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8895 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8897 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8898 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8899 disk[constants.IDISK_ADOPT])
8900 for disk in self.disks])
8901 if len(all_lvs) != len(self.disks):
8902 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8904 for lv_name in all_lvs:
8906 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8907 # to ReserveLV uses the same syntax
8908 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8909 except errors.ReservationError:
8910 raise errors.OpPrereqError("LV named %s used by another instance" %
8911 lv_name, errors.ECODE_NOTUNIQUE)
8913 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8914 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8916 node_lvs = self.rpc.call_lv_list([pnode.name],
8917 vg_names.payload.keys())[pnode.name]
8918 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8919 node_lvs = node_lvs.payload
8921 delta = all_lvs.difference(node_lvs.keys())
8923 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8924 utils.CommaJoin(delta),
8926 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8928 raise errors.OpPrereqError("Online logical volumes found, cannot"
8929 " adopt: %s" % utils.CommaJoin(online_lvs),
8931 # update the size of disk based on what is found
8932 for dsk in self.disks:
8933 dsk[constants.IDISK_SIZE] = \
8934 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8935 dsk[constants.IDISK_ADOPT])][0]))
8937 elif self.op.disk_template == constants.DT_BLOCK:
8938 # Normalize and de-duplicate device paths
8939 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8940 for disk in self.disks])
8941 if len(all_disks) != len(self.disks):
8942 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8944 baddisks = [d for d in all_disks
8945 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8947 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8948 " cannot be adopted" %
8949 (", ".join(baddisks),
8950 constants.ADOPTABLE_BLOCKDEV_ROOT),
8953 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8954 list(all_disks))[pnode.name]
8955 node_disks.Raise("Cannot get block device information from node %s" %
8957 node_disks = node_disks.payload
8958 delta = all_disks.difference(node_disks.keys())
8960 raise errors.OpPrereqError("Missing block device(s): %s" %
8961 utils.CommaJoin(delta),
8963 for dsk in self.disks:
8964 dsk[constants.IDISK_SIZE] = \
8965 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8967 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8969 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8970 # check OS parameters (remotely)
8971 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8973 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8975 # memory check on primary node
8977 _CheckNodeFreeMemory(self, self.pnode.name,
8978 "creating instance %s" % self.op.instance_name,
8979 self.be_full[constants.BE_MEMORY],
8982 self.dry_run_result = list(nodenames)
8984 def Exec(self, feedback_fn):
8985 """Create and add the instance to the cluster.
8988 instance = self.op.instance_name
8989 pnode_name = self.pnode.name
8991 ht_kind = self.op.hypervisor
8992 if ht_kind in constants.HTS_REQ_PORT:
8993 network_port = self.cfg.AllocatePort()
8997 disks = _GenerateDiskTemplate(self,
8998 self.op.disk_template,
8999 instance, pnode_name,
9002 self.instance_file_storage_dir,
9003 self.op.file_driver,
9007 iobj = objects.Instance(name=instance, os=self.op.os_type,
9008 primary_node=pnode_name,
9009 nics=self.nics, disks=disks,
9010 disk_template=self.op.disk_template,
9012 network_port=network_port,
9013 beparams=self.op.beparams,
9014 hvparams=self.op.hvparams,
9015 hypervisor=self.op.hypervisor,
9016 osparams=self.op.osparams,
9020 for tag in self.op.tags:
9023 if self.adopt_disks:
9024 if self.op.disk_template == constants.DT_PLAIN:
9025 # rename LVs to the newly-generated names; we need to construct
9026 # 'fake' LV disks with the old data, plus the new unique_id
9027 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9029 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9030 rename_to.append(t_dsk.logical_id)
9031 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9032 self.cfg.SetDiskID(t_dsk, pnode_name)
9033 result = self.rpc.call_blockdev_rename(pnode_name,
9034 zip(tmp_disks, rename_to))
9035 result.Raise("Failed to rename adoped LVs")
9037 feedback_fn("* creating instance disks...")
9039 _CreateDisks(self, iobj)
9040 except errors.OpExecError:
9041 self.LogWarning("Device creation failed, reverting...")
9043 _RemoveDisks(self, iobj)
9045 self.cfg.ReleaseDRBDMinors(instance)
9048 feedback_fn("adding instance %s to cluster config" % instance)
9050 self.cfg.AddInstance(iobj, self.proc.GetECId())
9052 # Declare that we don't want to remove the instance lock anymore, as we've
9053 # added the instance to the config
9054 del self.remove_locks[locking.LEVEL_INSTANCE]
9056 if self.op.mode == constants.INSTANCE_IMPORT:
9057 # Release unused nodes
9058 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9061 _ReleaseLocks(self, locking.LEVEL_NODE)
9064 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9065 feedback_fn("* wiping instance disks...")
9067 _WipeDisks(self, iobj)
9068 except errors.OpExecError, err:
9069 logging.exception("Wiping disks failed")
9070 self.LogWarning("Wiping instance disks failed (%s)", err)
9074 # Something is already wrong with the disks, don't do anything else
9076 elif self.op.wait_for_sync:
9077 disk_abort = not _WaitForSync(self, iobj)
9078 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9079 # make sure the disks are not degraded (still sync-ing is ok)
9080 feedback_fn("* checking mirrors status")
9081 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9086 _RemoveDisks(self, iobj)
9087 self.cfg.RemoveInstance(iobj.name)
9088 # Make sure the instance lock gets removed
9089 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9090 raise errors.OpExecError("There are some degraded disks for"
9093 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9094 if self.op.mode == constants.INSTANCE_CREATE:
9095 if not self.op.no_install:
9096 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9097 not self.op.wait_for_sync)
9099 feedback_fn("* pausing disk sync to install instance OS")
9100 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9102 for idx, success in enumerate(result.payload):
9104 logging.warn("pause-sync of instance %s for disk %d failed",
9107 feedback_fn("* running the instance OS create scripts...")
9108 # FIXME: pass debug option from opcode to backend
9110 self.rpc.call_instance_os_add(pnode_name, iobj, False,
9111 self.op.debug_level)
9113 feedback_fn("* resuming disk sync")
9114 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9116 for idx, success in enumerate(result.payload):
9118 logging.warn("resume-sync of instance %s for disk %d failed",
9121 os_add_result.Raise("Could not add os for instance %s"
9122 " on node %s" % (instance, pnode_name))
9124 elif self.op.mode == constants.INSTANCE_IMPORT:
9125 feedback_fn("* running the instance OS import scripts...")
9129 for idx, image in enumerate(self.src_images):
9133 # FIXME: pass debug option from opcode to backend
9134 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9135 constants.IEIO_FILE, (image, ),
9136 constants.IEIO_SCRIPT,
9137 (iobj.disks[idx], idx),
9139 transfers.append(dt)
9142 masterd.instance.TransferInstanceData(self, feedback_fn,
9143 self.op.src_node, pnode_name,
9144 self.pnode.secondary_ip,
9146 if not compat.all(import_result):
9147 self.LogWarning("Some disks for instance %s on node %s were not"
9148 " imported successfully" % (instance, pnode_name))
9150 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9151 feedback_fn("* preparing remote import...")
9152 # The source cluster will stop the instance before attempting to make a
9153 # connection. In some cases stopping an instance can take a long time,
9154 # hence the shutdown timeout is added to the connection timeout.
9155 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9156 self.op.source_shutdown_timeout)
9157 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9159 assert iobj.primary_node == self.pnode.name
9161 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9162 self.source_x509_ca,
9163 self._cds, timeouts)
9164 if not compat.all(disk_results):
9165 # TODO: Should the instance still be started, even if some disks
9166 # failed to import (valid for local imports, too)?
9167 self.LogWarning("Some disks for instance %s on node %s were not"
9168 " imported successfully" % (instance, pnode_name))
9170 # Run rename script on newly imported instance
9171 assert iobj.name == instance
9172 feedback_fn("Running rename script for %s" % instance)
9173 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9174 self.source_instance_name,
9175 self.op.debug_level)
9177 self.LogWarning("Failed to run rename script for %s on node"
9178 " %s: %s" % (instance, pnode_name, result.fail_msg))
9181 # also checked in the prereq part
9182 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9186 iobj.admin_up = True
9187 self.cfg.Update(iobj, feedback_fn)
9188 logging.info("Starting instance %s on node %s", instance, pnode_name)
9189 feedback_fn("* starting instance...")
9190 result = self.rpc.call_instance_start(pnode_name, iobj,
9192 result.Raise("Could not start instance")
9194 return list(iobj.all_nodes)
9197 class LUInstanceConsole(NoHooksLU):
9198 """Connect to an instance's console.
9200 This is somewhat special in that it returns the command line that
9201 you need to run on the master node in order to connect to the
9207 def ExpandNames(self):
9208 self._ExpandAndLockInstance()
9210 def CheckPrereq(self):
9211 """Check prerequisites.
9213 This checks that the instance is in the cluster.
9216 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9217 assert self.instance is not None, \
9218 "Cannot retrieve locked instance %s" % self.op.instance_name
9219 _CheckNodeOnline(self, self.instance.primary_node)
9221 def Exec(self, feedback_fn):
9222 """Connect to the console of an instance
9225 instance = self.instance
9226 node = instance.primary_node
9228 node_insts = self.rpc.call_instance_list([node],
9229 [instance.hypervisor])[node]
9230 node_insts.Raise("Can't get node information from %s" % node)
9232 if instance.name not in node_insts.payload:
9233 if instance.admin_up:
9234 state = constants.INSTST_ERRORDOWN
9236 state = constants.INSTST_ADMINDOWN
9237 raise errors.OpExecError("Instance %s is not running (state %s)" %
9238 (instance.name, state))
9240 logging.debug("Connecting to console of %s on %s", instance.name, node)
9242 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9245 def _GetInstanceConsole(cluster, instance):
9246 """Returns console information for an instance.
9248 @type cluster: L{objects.Cluster}
9249 @type instance: L{objects.Instance}
9253 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9254 # beparams and hvparams are passed separately, to avoid editing the
9255 # instance and then saving the defaults in the instance itself.
9256 hvparams = cluster.FillHV(instance)
9257 beparams = cluster.FillBE(instance)
9258 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9260 assert console.instance == instance.name
9261 assert console.Validate()
9263 return console.ToDict()
9266 class LUInstanceReplaceDisks(LogicalUnit):
9267 """Replace the disks of an instance.
9270 HPATH = "mirrors-replace"
9271 HTYPE = constants.HTYPE_INSTANCE
9274 def CheckArguments(self):
9275 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9278 def ExpandNames(self):
9279 self._ExpandAndLockInstance()
9281 assert locking.LEVEL_NODE not in self.needed_locks
9282 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9284 assert self.op.iallocator is None or self.op.remote_node is None, \
9285 "Conflicting options"
9287 if self.op.remote_node is not None:
9288 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9290 # Warning: do not remove the locking of the new secondary here
9291 # unless DRBD8.AddChildren is changed to work in parallel;
9292 # currently it doesn't since parallel invocations of
9293 # FindUnusedMinor will conflict
9294 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9295 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9297 self.needed_locks[locking.LEVEL_NODE] = []
9298 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9300 if self.op.iallocator is not None:
9301 # iallocator will select a new node in the same group
9302 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9304 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9305 self.op.iallocator, self.op.remote_node,
9306 self.op.disks, False, self.op.early_release)
9308 self.tasklets = [self.replacer]
9310 def DeclareLocks(self, level):
9311 if level == locking.LEVEL_NODEGROUP:
9312 assert self.op.remote_node is None
9313 assert self.op.iallocator is not None
9314 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9316 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9317 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9318 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9320 elif level == locking.LEVEL_NODE:
9321 if self.op.iallocator is not None:
9322 assert self.op.remote_node is None
9323 assert not self.needed_locks[locking.LEVEL_NODE]
9325 # Lock member nodes of all locked groups
9326 self.needed_locks[locking.LEVEL_NODE] = [node_name
9327 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9328 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9330 self._LockInstancesNodes()
9332 def BuildHooksEnv(self):
9335 This runs on the master, the primary and all the secondaries.
9338 instance = self.replacer.instance
9340 "MODE": self.op.mode,
9341 "NEW_SECONDARY": self.op.remote_node,
9342 "OLD_SECONDARY": instance.secondary_nodes[0],
9344 env.update(_BuildInstanceHookEnvByObject(self, instance))
9347 def BuildHooksNodes(self):
9348 """Build hooks nodes.
9351 instance = self.replacer.instance
9353 self.cfg.GetMasterNode(),
9354 instance.primary_node,
9356 if self.op.remote_node is not None:
9357 nl.append(self.op.remote_node)
9360 def CheckPrereq(self):
9361 """Check prerequisites.
9364 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9365 self.op.iallocator is None)
9367 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9369 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9371 return LogicalUnit.CheckPrereq(self)
9374 class TLReplaceDisks(Tasklet):
9375 """Replaces disks for an instance.
9377 Note: Locking is not within the scope of this class.
9380 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9381 disks, delay_iallocator, early_release):
9382 """Initializes this class.
9385 Tasklet.__init__(self, lu)
9388 self.instance_name = instance_name
9390 self.iallocator_name = iallocator_name
9391 self.remote_node = remote_node
9393 self.delay_iallocator = delay_iallocator
9394 self.early_release = early_release
9397 self.instance = None
9398 self.new_node = None
9399 self.target_node = None
9400 self.other_node = None
9401 self.remote_node_info = None
9402 self.node_secondary_ip = None
9405 def CheckArguments(mode, remote_node, iallocator):
9406 """Helper function for users of this class.
9409 # check for valid parameter combination
9410 if mode == constants.REPLACE_DISK_CHG:
9411 if remote_node is None and iallocator is None:
9412 raise errors.OpPrereqError("When changing the secondary either an"
9413 " iallocator script must be used or the"
9414 " new node given", errors.ECODE_INVAL)
9416 if remote_node is not None and iallocator is not None:
9417 raise errors.OpPrereqError("Give either the iallocator or the new"
9418 " secondary, not both", errors.ECODE_INVAL)
9420 elif remote_node is not None or iallocator is not None:
9421 # Not replacing the secondary
9422 raise errors.OpPrereqError("The iallocator and new node options can"
9423 " only be used when changing the"
9424 " secondary node", errors.ECODE_INVAL)
9427 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9428 """Compute a new secondary node using an IAllocator.
9431 ial = IAllocator(lu.cfg, lu.rpc,
9432 mode=constants.IALLOCATOR_MODE_RELOC,
9434 relocate_from=list(relocate_from))
9436 ial.Run(iallocator_name)
9439 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9440 " %s" % (iallocator_name, ial.info),
9443 if len(ial.result) != ial.required_nodes:
9444 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9445 " of nodes (%s), required %s" %
9447 len(ial.result), ial.required_nodes),
9450 remote_node_name = ial.result[0]
9452 lu.LogInfo("Selected new secondary for instance '%s': %s",
9453 instance_name, remote_node_name)
9455 return remote_node_name
9457 def _FindFaultyDisks(self, node_name):
9458 """Wrapper for L{_FindFaultyInstanceDisks}.
9461 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9464 def _CheckDisksActivated(self, instance):
9465 """Checks if the instance disks are activated.
9467 @param instance: The instance to check disks
9468 @return: True if they are activated, False otherwise
9471 nodes = instance.all_nodes
9473 for idx, dev in enumerate(instance.disks):
9475 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9476 self.cfg.SetDiskID(dev, node)
9478 result = self.rpc.call_blockdev_find(node, dev)
9482 elif result.fail_msg or not result.payload:
9487 def CheckPrereq(self):
9488 """Check prerequisites.
9490 This checks that the instance is in the cluster.
9493 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9494 assert instance is not None, \
9495 "Cannot retrieve locked instance %s" % self.instance_name
9497 if instance.disk_template != constants.DT_DRBD8:
9498 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9499 " instances", errors.ECODE_INVAL)
9501 if len(instance.secondary_nodes) != 1:
9502 raise errors.OpPrereqError("The instance has a strange layout,"
9503 " expected one secondary but found %d" %
9504 len(instance.secondary_nodes),
9507 if not self.delay_iallocator:
9508 self._CheckPrereq2()
9510 def _CheckPrereq2(self):
9511 """Check prerequisites, second part.
9513 This function should always be part of CheckPrereq. It was separated and is
9514 now called from Exec because during node evacuation iallocator was only
9515 called with an unmodified cluster model, not taking planned changes into
9519 instance = self.instance
9520 secondary_node = instance.secondary_nodes[0]
9522 if self.iallocator_name is None:
9523 remote_node = self.remote_node
9525 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9526 instance.name, instance.secondary_nodes)
9528 if remote_node is None:
9529 self.remote_node_info = None
9531 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9532 "Remote node '%s' is not locked" % remote_node
9534 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9535 assert self.remote_node_info is not None, \
9536 "Cannot retrieve locked node %s" % remote_node
9538 if remote_node == self.instance.primary_node:
9539 raise errors.OpPrereqError("The specified node is the primary node of"
9540 " the instance", errors.ECODE_INVAL)
9542 if remote_node == secondary_node:
9543 raise errors.OpPrereqError("The specified node is already the"
9544 " secondary node of the instance",
9547 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9548 constants.REPLACE_DISK_CHG):
9549 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9552 if self.mode == constants.REPLACE_DISK_AUTO:
9553 if not self._CheckDisksActivated(instance):
9554 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9555 " first" % self.instance_name,
9557 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9558 faulty_secondary = self._FindFaultyDisks(secondary_node)
9560 if faulty_primary and faulty_secondary:
9561 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9562 " one node and can not be repaired"
9563 " automatically" % self.instance_name,
9567 self.disks = faulty_primary
9568 self.target_node = instance.primary_node
9569 self.other_node = secondary_node
9570 check_nodes = [self.target_node, self.other_node]
9571 elif faulty_secondary:
9572 self.disks = faulty_secondary
9573 self.target_node = secondary_node
9574 self.other_node = instance.primary_node
9575 check_nodes = [self.target_node, self.other_node]
9581 # Non-automatic modes
9582 if self.mode == constants.REPLACE_DISK_PRI:
9583 self.target_node = instance.primary_node
9584 self.other_node = secondary_node
9585 check_nodes = [self.target_node, self.other_node]
9587 elif self.mode == constants.REPLACE_DISK_SEC:
9588 self.target_node = secondary_node
9589 self.other_node = instance.primary_node
9590 check_nodes = [self.target_node, self.other_node]
9592 elif self.mode == constants.REPLACE_DISK_CHG:
9593 self.new_node = remote_node
9594 self.other_node = instance.primary_node
9595 self.target_node = secondary_node
9596 check_nodes = [self.new_node, self.other_node]
9598 _CheckNodeNotDrained(self.lu, remote_node)
9599 _CheckNodeVmCapable(self.lu, remote_node)
9601 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9602 assert old_node_info is not None
9603 if old_node_info.offline and not self.early_release:
9604 # doesn't make sense to delay the release
9605 self.early_release = True
9606 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9607 " early-release mode", secondary_node)
9610 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9613 # If not specified all disks should be replaced
9615 self.disks = range(len(self.instance.disks))
9617 for node in check_nodes:
9618 _CheckNodeOnline(self.lu, node)
9620 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9623 if node_name is not None)
9625 # Release unneeded node locks
9626 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9628 # Release any owned node group
9629 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9630 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9632 # Check whether disks are valid
9633 for disk_idx in self.disks:
9634 instance.FindDisk(disk_idx)
9636 # Get secondary node IP addresses
9637 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9638 in self.cfg.GetMultiNodeInfo(touched_nodes))
9640 def Exec(self, feedback_fn):
9641 """Execute disk replacement.
9643 This dispatches the disk replacement to the appropriate handler.
9646 if self.delay_iallocator:
9647 self._CheckPrereq2()
9650 # Verify owned locks before starting operation
9651 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9652 assert set(owned_nodes) == set(self.node_secondary_ip), \
9653 ("Incorrect node locks, owning %s, expected %s" %
9654 (owned_nodes, self.node_secondary_ip.keys()))
9656 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9657 assert list(owned_instances) == [self.instance_name], \
9658 "Instance '%s' not locked" % self.instance_name
9660 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9661 "Should not own any node group lock at this point"
9664 feedback_fn("No disks need replacement")
9667 feedback_fn("Replacing disk(s) %s for %s" %
9668 (utils.CommaJoin(self.disks), self.instance.name))
9670 activate_disks = (not self.instance.admin_up)
9672 # Activate the instance disks if we're replacing them on a down instance
9674 _StartInstanceDisks(self.lu, self.instance, True)
9677 # Should we replace the secondary node?
9678 if self.new_node is not None:
9679 fn = self._ExecDrbd8Secondary
9681 fn = self._ExecDrbd8DiskOnly
9683 result = fn(feedback_fn)
9685 # Deactivate the instance disks if we're replacing them on a
9688 _SafeShutdownInstanceDisks(self.lu, self.instance)
9691 # Verify owned locks
9692 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9693 nodes = frozenset(self.node_secondary_ip)
9694 assert ((self.early_release and not owned_nodes) or
9695 (not self.early_release and not (set(owned_nodes) - nodes))), \
9696 ("Not owning the correct locks, early_release=%s, owned=%r,"
9697 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9701 def _CheckVolumeGroup(self, nodes):
9702 self.lu.LogInfo("Checking volume groups")
9704 vgname = self.cfg.GetVGName()
9706 # Make sure volume group exists on all involved nodes
9707 results = self.rpc.call_vg_list(nodes)
9709 raise errors.OpExecError("Can't list volume groups on the nodes")
9713 res.Raise("Error checking node %s" % node)
9714 if vgname not in res.payload:
9715 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9718 def _CheckDisksExistence(self, nodes):
9719 # Check disk existence
9720 for idx, dev in enumerate(self.instance.disks):
9721 if idx not in self.disks:
9725 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9726 self.cfg.SetDiskID(dev, node)
9728 result = self.rpc.call_blockdev_find(node, dev)
9730 msg = result.fail_msg
9731 if msg or not result.payload:
9733 msg = "disk not found"
9734 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9737 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9738 for idx, dev in enumerate(self.instance.disks):
9739 if idx not in self.disks:
9742 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9745 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9747 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9748 " replace disks for instance %s" %
9749 (node_name, self.instance.name))
9751 def _CreateNewStorage(self, node_name):
9752 """Create new storage on the primary or secondary node.
9754 This is only used for same-node replaces, not for changing the
9755 secondary node, hence we don't want to modify the existing disk.
9760 for idx, dev in enumerate(self.instance.disks):
9761 if idx not in self.disks:
9764 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9766 self.cfg.SetDiskID(dev, node_name)
9768 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9769 names = _GenerateUniqueNames(self.lu, lv_names)
9771 vg_data = dev.children[0].logical_id[0]
9772 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9773 logical_id=(vg_data, names[0]))
9774 vg_meta = dev.children[1].logical_id[0]
9775 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9776 logical_id=(vg_meta, names[1]))
9778 new_lvs = [lv_data, lv_meta]
9779 old_lvs = [child.Copy() for child in dev.children]
9780 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9782 # we pass force_create=True to force the LVM creation
9783 for new_lv in new_lvs:
9784 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9785 _GetInstanceInfoText(self.instance), False)
9789 def _CheckDevices(self, node_name, iv_names):
9790 for name, (dev, _, _) in iv_names.iteritems():
9791 self.cfg.SetDiskID(dev, node_name)
9793 result = self.rpc.call_blockdev_find(node_name, dev)
9795 msg = result.fail_msg
9796 if msg or not result.payload:
9798 msg = "disk not found"
9799 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9802 if result.payload.is_degraded:
9803 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9805 def _RemoveOldStorage(self, node_name, iv_names):
9806 for name, (_, old_lvs, _) in iv_names.iteritems():
9807 self.lu.LogInfo("Remove logical volumes for %s" % name)
9810 self.cfg.SetDiskID(lv, node_name)
9812 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9814 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9815 hint="remove unused LVs manually")
9817 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9818 """Replace a disk on the primary or secondary for DRBD 8.
9820 The algorithm for replace is quite complicated:
9822 1. for each disk to be replaced:
9824 1. create new LVs on the target node with unique names
9825 1. detach old LVs from the drbd device
9826 1. rename old LVs to name_replaced.<time_t>
9827 1. rename new LVs to old LVs
9828 1. attach the new LVs (with the old names now) to the drbd device
9830 1. wait for sync across all devices
9832 1. for each modified disk:
9834 1. remove old LVs (which have the name name_replaces.<time_t>)
9836 Failures are not very well handled.
9841 # Step: check device activation
9842 self.lu.LogStep(1, steps_total, "Check device existence")
9843 self._CheckDisksExistence([self.other_node, self.target_node])
9844 self._CheckVolumeGroup([self.target_node, self.other_node])
9846 # Step: check other node consistency
9847 self.lu.LogStep(2, steps_total, "Check peer consistency")
9848 self._CheckDisksConsistency(self.other_node,
9849 self.other_node == self.instance.primary_node,
9852 # Step: create new storage
9853 self.lu.LogStep(3, steps_total, "Allocate new storage")
9854 iv_names = self._CreateNewStorage(self.target_node)
9856 # Step: for each lv, detach+rename*2+attach
9857 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9858 for dev, old_lvs, new_lvs in iv_names.itervalues():
9859 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9861 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9863 result.Raise("Can't detach drbd from local storage on node"
9864 " %s for device %s" % (self.target_node, dev.iv_name))
9866 #cfg.Update(instance)
9868 # ok, we created the new LVs, so now we know we have the needed
9869 # storage; as such, we proceed on the target node to rename
9870 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9871 # using the assumption that logical_id == physical_id (which in
9872 # turn is the unique_id on that node)
9874 # FIXME(iustin): use a better name for the replaced LVs
9875 temp_suffix = int(time.time())
9876 ren_fn = lambda d, suff: (d.physical_id[0],
9877 d.physical_id[1] + "_replaced-%s" % suff)
9879 # Build the rename list based on what LVs exist on the node
9880 rename_old_to_new = []
9881 for to_ren in old_lvs:
9882 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9883 if not result.fail_msg and result.payload:
9885 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9887 self.lu.LogInfo("Renaming the old LVs on the target node")
9888 result = self.rpc.call_blockdev_rename(self.target_node,
9890 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9892 # Now we rename the new LVs to the old LVs
9893 self.lu.LogInfo("Renaming the new LVs on the target node")
9894 rename_new_to_old = [(new, old.physical_id)
9895 for old, new in zip(old_lvs, new_lvs)]
9896 result = self.rpc.call_blockdev_rename(self.target_node,
9898 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9900 # Intermediate steps of in memory modifications
9901 for old, new in zip(old_lvs, new_lvs):
9902 new.logical_id = old.logical_id
9903 self.cfg.SetDiskID(new, self.target_node)
9905 # We need to modify old_lvs so that removal later removes the
9906 # right LVs, not the newly added ones; note that old_lvs is a
9908 for disk in old_lvs:
9909 disk.logical_id = ren_fn(disk, temp_suffix)
9910 self.cfg.SetDiskID(disk, self.target_node)
9912 # Now that the new lvs have the old name, we can add them to the device
9913 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9914 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9916 msg = result.fail_msg
9918 for new_lv in new_lvs:
9919 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9922 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9923 hint=("cleanup manually the unused logical"
9925 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9928 if self.early_release:
9929 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9931 self._RemoveOldStorage(self.target_node, iv_names)
9932 # WARNING: we release both node locks here, do not do other RPCs
9933 # than WaitForSync to the primary node
9934 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9935 names=[self.target_node, self.other_node])
9938 # This can fail as the old devices are degraded and _WaitForSync
9939 # does a combined result over all disks, so we don't check its return value
9940 self.lu.LogStep(cstep, steps_total, "Sync devices")
9942 _WaitForSync(self.lu, self.instance)
9944 # Check all devices manually
9945 self._CheckDevices(self.instance.primary_node, iv_names)
9947 # Step: remove old storage
9948 if not self.early_release:
9949 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9951 self._RemoveOldStorage(self.target_node, iv_names)
9953 def _ExecDrbd8Secondary(self, feedback_fn):
9954 """Replace the secondary node for DRBD 8.
9956 The algorithm for replace is quite complicated:
9957 - for all disks of the instance:
9958 - create new LVs on the new node with same names
9959 - shutdown the drbd device on the old secondary
9960 - disconnect the drbd network on the primary
9961 - create the drbd device on the new secondary
9962 - network attach the drbd on the primary, using an artifice:
9963 the drbd code for Attach() will connect to the network if it
9964 finds a device which is connected to the good local disks but
9966 - wait for sync across all devices
9967 - remove all disks from the old secondary
9969 Failures are not very well handled.
9974 pnode = self.instance.primary_node
9976 # Step: check device activation
9977 self.lu.LogStep(1, steps_total, "Check device existence")
9978 self._CheckDisksExistence([self.instance.primary_node])
9979 self._CheckVolumeGroup([self.instance.primary_node])
9981 # Step: check other node consistency
9982 self.lu.LogStep(2, steps_total, "Check peer consistency")
9983 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9985 # Step: create new storage
9986 self.lu.LogStep(3, steps_total, "Allocate new storage")
9987 for idx, dev in enumerate(self.instance.disks):
9988 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9989 (self.new_node, idx))
9990 # we pass force_create=True to force LVM creation
9991 for new_lv in dev.children:
9992 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9993 _GetInstanceInfoText(self.instance), False)
9995 # Step 4: dbrd minors and drbd setups changes
9996 # after this, we must manually remove the drbd minors on both the
9997 # error and the success paths
9998 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9999 minors = self.cfg.AllocateDRBDMinor([self.new_node
10000 for dev in self.instance.disks],
10001 self.instance.name)
10002 logging.debug("Allocated minors %r", minors)
10005 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10006 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10007 (self.new_node, idx))
10008 # create new devices on new_node; note that we create two IDs:
10009 # one without port, so the drbd will be activated without
10010 # networking information on the new node at this stage, and one
10011 # with network, for the latter activation in step 4
10012 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10013 if self.instance.primary_node == o_node1:
10016 assert self.instance.primary_node == o_node2, "Three-node instance?"
10019 new_alone_id = (self.instance.primary_node, self.new_node, None,
10020 p_minor, new_minor, o_secret)
10021 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10022 p_minor, new_minor, o_secret)
10024 iv_names[idx] = (dev, dev.children, new_net_id)
10025 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10027 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10028 logical_id=new_alone_id,
10029 children=dev.children,
10032 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10033 _GetInstanceInfoText(self.instance), False)
10034 except errors.GenericError:
10035 self.cfg.ReleaseDRBDMinors(self.instance.name)
10038 # We have new devices, shutdown the drbd on the old secondary
10039 for idx, dev in enumerate(self.instance.disks):
10040 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10041 self.cfg.SetDiskID(dev, self.target_node)
10042 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10044 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10045 "node: %s" % (idx, msg),
10046 hint=("Please cleanup this device manually as"
10047 " soon as possible"))
10049 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10050 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10051 self.instance.disks)[pnode]
10053 msg = result.fail_msg
10055 # detaches didn't succeed (unlikely)
10056 self.cfg.ReleaseDRBDMinors(self.instance.name)
10057 raise errors.OpExecError("Can't detach the disks from the network on"
10058 " old node: %s" % (msg,))
10060 # if we managed to detach at least one, we update all the disks of
10061 # the instance to point to the new secondary
10062 self.lu.LogInfo("Updating instance configuration")
10063 for dev, _, new_logical_id in iv_names.itervalues():
10064 dev.logical_id = new_logical_id
10065 self.cfg.SetDiskID(dev, self.instance.primary_node)
10067 self.cfg.Update(self.instance, feedback_fn)
10069 # and now perform the drbd attach
10070 self.lu.LogInfo("Attaching primary drbds to new secondary"
10071 " (standalone => connected)")
10072 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10074 self.node_secondary_ip,
10075 self.instance.disks,
10076 self.instance.name,
10078 for to_node, to_result in result.items():
10079 msg = to_result.fail_msg
10081 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10083 hint=("please do a gnt-instance info to see the"
10084 " status of disks"))
10086 if self.early_release:
10087 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10089 self._RemoveOldStorage(self.target_node, iv_names)
10090 # WARNING: we release all node locks here, do not do other RPCs
10091 # than WaitForSync to the primary node
10092 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10093 names=[self.instance.primary_node,
10098 # This can fail as the old devices are degraded and _WaitForSync
10099 # does a combined result over all disks, so we don't check its return value
10100 self.lu.LogStep(cstep, steps_total, "Sync devices")
10102 _WaitForSync(self.lu, self.instance)
10104 # Check all devices manually
10105 self._CheckDevices(self.instance.primary_node, iv_names)
10107 # Step: remove old storage
10108 if not self.early_release:
10109 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10110 self._RemoveOldStorage(self.target_node, iv_names)
10113 class LURepairNodeStorage(NoHooksLU):
10114 """Repairs the volume group on a node.
10119 def CheckArguments(self):
10120 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10122 storage_type = self.op.storage_type
10124 if (constants.SO_FIX_CONSISTENCY not in
10125 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10126 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10127 " repaired" % storage_type,
10128 errors.ECODE_INVAL)
10130 def ExpandNames(self):
10131 self.needed_locks = {
10132 locking.LEVEL_NODE: [self.op.node_name],
10135 def _CheckFaultyDisks(self, instance, node_name):
10136 """Ensure faulty disks abort the opcode or at least warn."""
10138 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10140 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10141 " node '%s'" % (instance.name, node_name),
10142 errors.ECODE_STATE)
10143 except errors.OpPrereqError, err:
10144 if self.op.ignore_consistency:
10145 self.proc.LogWarning(str(err.args[0]))
10149 def CheckPrereq(self):
10150 """Check prerequisites.
10153 # Check whether any instance on this node has faulty disks
10154 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10155 if not inst.admin_up:
10157 check_nodes = set(inst.all_nodes)
10158 check_nodes.discard(self.op.node_name)
10159 for inst_node_name in check_nodes:
10160 self._CheckFaultyDisks(inst, inst_node_name)
10162 def Exec(self, feedback_fn):
10163 feedback_fn("Repairing storage unit '%s' on %s ..." %
10164 (self.op.name, self.op.node_name))
10166 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10167 result = self.rpc.call_storage_execute(self.op.node_name,
10168 self.op.storage_type, st_args,
10170 constants.SO_FIX_CONSISTENCY)
10171 result.Raise("Failed to repair storage unit '%s' on %s" %
10172 (self.op.name, self.op.node_name))
10175 class LUNodeEvacuate(NoHooksLU):
10176 """Evacuates instances off a list of nodes.
10181 _MODE2IALLOCATOR = {
10182 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10183 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10184 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10186 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10187 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10188 constants.IALLOCATOR_NEVAC_MODES)
10190 def CheckArguments(self):
10191 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10193 def ExpandNames(self):
10194 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10196 if self.op.remote_node is not None:
10197 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10198 assert self.op.remote_node
10200 if self.op.remote_node == self.op.node_name:
10201 raise errors.OpPrereqError("Can not use evacuated node as a new"
10202 " secondary node", errors.ECODE_INVAL)
10204 if self.op.mode != constants.NODE_EVAC_SEC:
10205 raise errors.OpPrereqError("Without the use of an iallocator only"
10206 " secondary instances can be evacuated",
10207 errors.ECODE_INVAL)
10210 self.share_locks = _ShareAll()
10211 self.needed_locks = {
10212 locking.LEVEL_INSTANCE: [],
10213 locking.LEVEL_NODEGROUP: [],
10214 locking.LEVEL_NODE: [],
10217 # Determine nodes (via group) optimistically, needs verification once locks
10218 # have been acquired
10219 self.lock_nodes = self._DetermineNodes()
10221 def _DetermineNodes(self):
10222 """Gets the list of nodes to operate on.
10225 if self.op.remote_node is None:
10226 # Iallocator will choose any node(s) in the same group
10227 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10229 group_nodes = frozenset([self.op.remote_node])
10231 # Determine nodes to be locked
10232 return set([self.op.node_name]) | group_nodes
10234 def _DetermineInstances(self):
10235 """Builds list of instances to operate on.
10238 assert self.op.mode in constants.NODE_EVAC_MODES
10240 if self.op.mode == constants.NODE_EVAC_PRI:
10241 # Primary instances only
10242 inst_fn = _GetNodePrimaryInstances
10243 assert self.op.remote_node is None, \
10244 "Evacuating primary instances requires iallocator"
10245 elif self.op.mode == constants.NODE_EVAC_SEC:
10246 # Secondary instances only
10247 inst_fn = _GetNodeSecondaryInstances
10250 assert self.op.mode == constants.NODE_EVAC_ALL
10251 inst_fn = _GetNodeInstances
10252 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10254 raise errors.OpPrereqError("Due to an issue with the iallocator"
10255 " interface it is not possible to evacuate"
10256 " all instances at once; specify explicitly"
10257 " whether to evacuate primary or secondary"
10259 errors.ECODE_INVAL)
10261 return inst_fn(self.cfg, self.op.node_name)
10263 def DeclareLocks(self, level):
10264 if level == locking.LEVEL_INSTANCE:
10265 # Lock instances optimistically, needs verification once node and group
10266 # locks have been acquired
10267 self.needed_locks[locking.LEVEL_INSTANCE] = \
10268 set(i.name for i in self._DetermineInstances())
10270 elif level == locking.LEVEL_NODEGROUP:
10271 # Lock node groups for all potential target nodes optimistically, needs
10272 # verification once nodes have been acquired
10273 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10274 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10276 elif level == locking.LEVEL_NODE:
10277 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10279 def CheckPrereq(self):
10281 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10282 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10283 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10285 need_nodes = self._DetermineNodes()
10287 if not owned_nodes.issuperset(need_nodes):
10288 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10289 " locks were acquired, current nodes are"
10290 " are '%s', used to be '%s'; retry the"
10292 (self.op.node_name,
10293 utils.CommaJoin(need_nodes),
10294 utils.CommaJoin(owned_nodes)),
10295 errors.ECODE_STATE)
10297 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10298 if owned_groups != wanted_groups:
10299 raise errors.OpExecError("Node groups changed since locks were acquired,"
10300 " current groups are '%s', used to be '%s';"
10301 " retry the operation" %
10302 (utils.CommaJoin(wanted_groups),
10303 utils.CommaJoin(owned_groups)))
10305 # Determine affected instances
10306 self.instances = self._DetermineInstances()
10307 self.instance_names = [i.name for i in self.instances]
10309 if set(self.instance_names) != owned_instances:
10310 raise errors.OpExecError("Instances on node '%s' changed since locks"
10311 " were acquired, current instances are '%s',"
10312 " used to be '%s'; retry the operation" %
10313 (self.op.node_name,
10314 utils.CommaJoin(self.instance_names),
10315 utils.CommaJoin(owned_instances)))
10317 if self.instance_names:
10318 self.LogInfo("Evacuating instances from node '%s': %s",
10320 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10322 self.LogInfo("No instances to evacuate from node '%s'",
10325 if self.op.remote_node is not None:
10326 for i in self.instances:
10327 if i.primary_node == self.op.remote_node:
10328 raise errors.OpPrereqError("Node %s is the primary node of"
10329 " instance %s, cannot use it as"
10331 (self.op.remote_node, i.name),
10332 errors.ECODE_INVAL)
10334 def Exec(self, feedback_fn):
10335 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10337 if not self.instance_names:
10338 # No instances to evacuate
10341 elif self.op.iallocator is not None:
10342 # TODO: Implement relocation to other group
10343 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10344 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10345 instances=list(self.instance_names))
10347 ial.Run(self.op.iallocator)
10349 if not ial.success:
10350 raise errors.OpPrereqError("Can't compute node evacuation using"
10351 " iallocator '%s': %s" %
10352 (self.op.iallocator, ial.info),
10353 errors.ECODE_NORES)
10355 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10357 elif self.op.remote_node is not None:
10358 assert self.op.mode == constants.NODE_EVAC_SEC
10360 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10361 remote_node=self.op.remote_node,
10363 mode=constants.REPLACE_DISK_CHG,
10364 early_release=self.op.early_release)]
10365 for instance_name in self.instance_names
10369 raise errors.ProgrammerError("No iallocator or remote node")
10371 return ResultWithJobs(jobs)
10374 def _SetOpEarlyRelease(early_release, op):
10375 """Sets C{early_release} flag on opcodes if available.
10379 op.early_release = early_release
10380 except AttributeError:
10381 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10386 def _NodeEvacDest(use_nodes, group, nodes):
10387 """Returns group or nodes depending on caller's choice.
10391 return utils.CommaJoin(nodes)
10396 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10397 """Unpacks the result of change-group and node-evacuate iallocator requests.
10399 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10400 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10402 @type lu: L{LogicalUnit}
10403 @param lu: Logical unit instance
10404 @type alloc_result: tuple/list
10405 @param alloc_result: Result from iallocator
10406 @type early_release: bool
10407 @param early_release: Whether to release locks early if possible
10408 @type use_nodes: bool
10409 @param use_nodes: Whether to display node names instead of groups
10412 (moved, failed, jobs) = alloc_result
10415 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10416 for (name, reason) in failed)
10417 lu.LogWarning("Unable to evacuate instances %s", failreason)
10418 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10421 lu.LogInfo("Instances to be moved: %s",
10422 utils.CommaJoin("%s (to %s)" %
10423 (name, _NodeEvacDest(use_nodes, group, nodes))
10424 for (name, group, nodes) in moved))
10426 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10427 map(opcodes.OpCode.LoadOpCode, ops))
10431 class LUInstanceGrowDisk(LogicalUnit):
10432 """Grow a disk of an instance.
10435 HPATH = "disk-grow"
10436 HTYPE = constants.HTYPE_INSTANCE
10439 def ExpandNames(self):
10440 self._ExpandAndLockInstance()
10441 self.needed_locks[locking.LEVEL_NODE] = []
10442 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10444 def DeclareLocks(self, level):
10445 if level == locking.LEVEL_NODE:
10446 self._LockInstancesNodes()
10448 def BuildHooksEnv(self):
10449 """Build hooks env.
10451 This runs on the master, the primary and all the secondaries.
10455 "DISK": self.op.disk,
10456 "AMOUNT": self.op.amount,
10458 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10461 def BuildHooksNodes(self):
10462 """Build hooks nodes.
10465 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10468 def CheckPrereq(self):
10469 """Check prerequisites.
10471 This checks that the instance is in the cluster.
10474 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10475 assert instance is not None, \
10476 "Cannot retrieve locked instance %s" % self.op.instance_name
10477 nodenames = list(instance.all_nodes)
10478 for node in nodenames:
10479 _CheckNodeOnline(self, node)
10481 self.instance = instance
10483 if instance.disk_template not in constants.DTS_GROWABLE:
10484 raise errors.OpPrereqError("Instance's disk layout does not support"
10485 " growing", errors.ECODE_INVAL)
10487 self.disk = instance.FindDisk(self.op.disk)
10489 if instance.disk_template not in (constants.DT_FILE,
10490 constants.DT_SHARED_FILE):
10491 # TODO: check the free disk space for file, when that feature will be
10493 _CheckNodesFreeDiskPerVG(self, nodenames,
10494 self.disk.ComputeGrowth(self.op.amount))
10496 def Exec(self, feedback_fn):
10497 """Execute disk grow.
10500 instance = self.instance
10503 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10505 raise errors.OpExecError("Cannot activate block device to grow")
10507 # First run all grow ops in dry-run mode
10508 for node in instance.all_nodes:
10509 self.cfg.SetDiskID(disk, node)
10510 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10511 result.Raise("Grow request failed to node %s" % node)
10513 # We know that (as far as we can test) operations across different
10514 # nodes will succeed, time to run it for real
10515 for node in instance.all_nodes:
10516 self.cfg.SetDiskID(disk, node)
10517 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10518 result.Raise("Grow request failed to node %s" % node)
10520 # TODO: Rewrite code to work properly
10521 # DRBD goes into sync mode for a short amount of time after executing the
10522 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10523 # calling "resize" in sync mode fails. Sleeping for a short amount of
10524 # time is a work-around.
10527 disk.RecordGrow(self.op.amount)
10528 self.cfg.Update(instance, feedback_fn)
10529 if self.op.wait_for_sync:
10530 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10532 self.proc.LogWarning("Disk sync-ing has not returned a good"
10533 " status; please check the instance")
10534 if not instance.admin_up:
10535 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10536 elif not instance.admin_up:
10537 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10538 " not supposed to be running because no wait for"
10539 " sync mode was requested")
10542 class LUInstanceQueryData(NoHooksLU):
10543 """Query runtime instance data.
10548 def ExpandNames(self):
10549 self.needed_locks = {}
10551 # Use locking if requested or when non-static information is wanted
10552 if not (self.op.static or self.op.use_locking):
10553 self.LogWarning("Non-static data requested, locks need to be acquired")
10554 self.op.use_locking = True
10556 if self.op.instances or not self.op.use_locking:
10557 # Expand instance names right here
10558 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10560 # Will use acquired locks
10561 self.wanted_names = None
10563 if self.op.use_locking:
10564 self.share_locks = _ShareAll()
10566 if self.wanted_names is None:
10567 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10569 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10571 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10572 self.needed_locks[locking.LEVEL_NODE] = []
10573 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10575 def DeclareLocks(self, level):
10576 if self.op.use_locking:
10577 if level == locking.LEVEL_NODEGROUP:
10578 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10580 # Lock all groups used by instances optimistically; this requires going
10581 # via the node before it's locked, requiring verification later on
10582 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10583 frozenset(group_uuid
10584 for instance_name in owned_instances
10586 self.cfg.GetInstanceNodeGroups(instance_name))
10588 elif level == locking.LEVEL_NODE:
10589 self._LockInstancesNodes()
10591 def CheckPrereq(self):
10592 """Check prerequisites.
10594 This only checks the optional instance list against the existing names.
10597 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10598 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10599 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10601 if self.wanted_names is None:
10602 assert self.op.use_locking, "Locking was not used"
10603 self.wanted_names = owned_instances
10605 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
10607 if self.op.use_locking:
10608 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
10611 assert not (owned_instances or owned_groups or owned_nodes)
10613 self.wanted_instances = instances.values()
10615 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10616 """Returns the status of a block device
10619 if self.op.static or not node:
10622 self.cfg.SetDiskID(dev, node)
10624 result = self.rpc.call_blockdev_find(node, dev)
10628 result.Raise("Can't compute disk status for %s" % instance_name)
10630 status = result.payload
10634 return (status.dev_path, status.major, status.minor,
10635 status.sync_percent, status.estimated_time,
10636 status.is_degraded, status.ldisk_status)
10638 def _ComputeDiskStatus(self, instance, snode, dev):
10639 """Compute block device status.
10642 if dev.dev_type in constants.LDS_DRBD:
10643 # we change the snode then (otherwise we use the one passed in)
10644 if dev.logical_id[0] == instance.primary_node:
10645 snode = dev.logical_id[1]
10647 snode = dev.logical_id[0]
10649 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10650 instance.name, dev)
10651 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10654 dev_children = map(compat.partial(self._ComputeDiskStatus,
10661 "iv_name": dev.iv_name,
10662 "dev_type": dev.dev_type,
10663 "logical_id": dev.logical_id,
10664 "physical_id": dev.physical_id,
10665 "pstatus": dev_pstatus,
10666 "sstatus": dev_sstatus,
10667 "children": dev_children,
10672 def Exec(self, feedback_fn):
10673 """Gather and return data"""
10676 cluster = self.cfg.GetClusterInfo()
10678 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
10679 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
10681 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
10682 for node in nodes.values()))
10684 group2name_fn = lambda uuid: groups[uuid].name
10686 for instance in self.wanted_instances:
10687 pnode = nodes[instance.primary_node]
10689 if self.op.static or pnode.offline:
10690 remote_state = None
10692 self.LogWarning("Primary node %s is marked offline, returning static"
10693 " information only for instance %s" %
10694 (pnode.name, instance.name))
10696 remote_info = self.rpc.call_instance_info(instance.primary_node,
10698 instance.hypervisor)
10699 remote_info.Raise("Error checking node %s" % instance.primary_node)
10700 remote_info = remote_info.payload
10701 if remote_info and "state" in remote_info:
10702 remote_state = "up"
10704 remote_state = "down"
10706 if instance.admin_up:
10707 config_state = "up"
10709 config_state = "down"
10711 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10714 snodes_group_uuids = [nodes[snode_name].group
10715 for snode_name in instance.secondary_nodes]
10717 result[instance.name] = {
10718 "name": instance.name,
10719 "config_state": config_state,
10720 "run_state": remote_state,
10721 "pnode": instance.primary_node,
10722 "pnode_group_uuid": pnode.group,
10723 "pnode_group_name": group2name_fn(pnode.group),
10724 "snodes": instance.secondary_nodes,
10725 "snodes_group_uuids": snodes_group_uuids,
10726 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
10728 # this happens to be the same format used for hooks
10729 "nics": _NICListToTuple(self, instance.nics),
10730 "disk_template": instance.disk_template,
10732 "hypervisor": instance.hypervisor,
10733 "network_port": instance.network_port,
10734 "hv_instance": instance.hvparams,
10735 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10736 "be_instance": instance.beparams,
10737 "be_actual": cluster.FillBE(instance),
10738 "os_instance": instance.osparams,
10739 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10740 "serial_no": instance.serial_no,
10741 "mtime": instance.mtime,
10742 "ctime": instance.ctime,
10743 "uuid": instance.uuid,
10749 class LUInstanceSetParams(LogicalUnit):
10750 """Modifies an instances's parameters.
10753 HPATH = "instance-modify"
10754 HTYPE = constants.HTYPE_INSTANCE
10757 def CheckArguments(self):
10758 if not (self.op.nics or self.op.disks or self.op.disk_template or
10759 self.op.hvparams or self.op.beparams or self.op.os_name):
10760 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10762 if self.op.hvparams:
10763 _CheckGlobalHvParams(self.op.hvparams)
10767 for disk_op, disk_dict in self.op.disks:
10768 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10769 if disk_op == constants.DDM_REMOVE:
10770 disk_addremove += 1
10772 elif disk_op == constants.DDM_ADD:
10773 disk_addremove += 1
10775 if not isinstance(disk_op, int):
10776 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10777 if not isinstance(disk_dict, dict):
10778 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10779 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10781 if disk_op == constants.DDM_ADD:
10782 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10783 if mode not in constants.DISK_ACCESS_SET:
10784 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10785 errors.ECODE_INVAL)
10786 size = disk_dict.get(constants.IDISK_SIZE, None)
10788 raise errors.OpPrereqError("Required disk parameter size missing",
10789 errors.ECODE_INVAL)
10792 except (TypeError, ValueError), err:
10793 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10794 str(err), errors.ECODE_INVAL)
10795 disk_dict[constants.IDISK_SIZE] = size
10797 # modification of disk
10798 if constants.IDISK_SIZE in disk_dict:
10799 raise errors.OpPrereqError("Disk size change not possible, use"
10800 " grow-disk", errors.ECODE_INVAL)
10802 if disk_addremove > 1:
10803 raise errors.OpPrereqError("Only one disk add or remove operation"
10804 " supported at a time", errors.ECODE_INVAL)
10806 if self.op.disks and self.op.disk_template is not None:
10807 raise errors.OpPrereqError("Disk template conversion and other disk"
10808 " changes not supported at the same time",
10809 errors.ECODE_INVAL)
10811 if (self.op.disk_template and
10812 self.op.disk_template in constants.DTS_INT_MIRROR and
10813 self.op.remote_node is None):
10814 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10815 " one requires specifying a secondary node",
10816 errors.ECODE_INVAL)
10820 for nic_op, nic_dict in self.op.nics:
10821 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10822 if nic_op == constants.DDM_REMOVE:
10825 elif nic_op == constants.DDM_ADD:
10828 if not isinstance(nic_op, int):
10829 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10830 if not isinstance(nic_dict, dict):
10831 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10832 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10834 # nic_dict should be a dict
10835 nic_ip = nic_dict.get(constants.INIC_IP, None)
10836 if nic_ip is not None:
10837 if nic_ip.lower() == constants.VALUE_NONE:
10838 nic_dict[constants.INIC_IP] = None
10840 if not netutils.IPAddress.IsValid(nic_ip):
10841 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10842 errors.ECODE_INVAL)
10844 nic_bridge = nic_dict.get("bridge", None)
10845 nic_link = nic_dict.get(constants.INIC_LINK, None)
10846 if nic_bridge and nic_link:
10847 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10848 " at the same time", errors.ECODE_INVAL)
10849 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10850 nic_dict["bridge"] = None
10851 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10852 nic_dict[constants.INIC_LINK] = None
10854 if nic_op == constants.DDM_ADD:
10855 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10856 if nic_mac is None:
10857 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10859 if constants.INIC_MAC in nic_dict:
10860 nic_mac = nic_dict[constants.INIC_MAC]
10861 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10862 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10864 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10865 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10866 " modifying an existing nic",
10867 errors.ECODE_INVAL)
10869 if nic_addremove > 1:
10870 raise errors.OpPrereqError("Only one NIC add or remove operation"
10871 " supported at a time", errors.ECODE_INVAL)
10873 def ExpandNames(self):
10874 self._ExpandAndLockInstance()
10875 self.needed_locks[locking.LEVEL_NODE] = []
10876 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10878 def DeclareLocks(self, level):
10879 if level == locking.LEVEL_NODE:
10880 self._LockInstancesNodes()
10881 if self.op.disk_template and self.op.remote_node:
10882 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10883 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10885 def BuildHooksEnv(self):
10886 """Build hooks env.
10888 This runs on the master, primary and secondaries.
10892 if constants.BE_MEMORY in self.be_new:
10893 args["memory"] = self.be_new[constants.BE_MEMORY]
10894 if constants.BE_VCPUS in self.be_new:
10895 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10896 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10897 # information at all.
10900 nic_override = dict(self.op.nics)
10901 for idx, nic in enumerate(self.instance.nics):
10902 if idx in nic_override:
10903 this_nic_override = nic_override[idx]
10905 this_nic_override = {}
10906 if constants.INIC_IP in this_nic_override:
10907 ip = this_nic_override[constants.INIC_IP]
10910 if constants.INIC_MAC in this_nic_override:
10911 mac = this_nic_override[constants.INIC_MAC]
10914 if idx in self.nic_pnew:
10915 nicparams = self.nic_pnew[idx]
10917 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10918 mode = nicparams[constants.NIC_MODE]
10919 link = nicparams[constants.NIC_LINK]
10920 args["nics"].append((ip, mac, mode, link))
10921 if constants.DDM_ADD in nic_override:
10922 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10923 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10924 nicparams = self.nic_pnew[constants.DDM_ADD]
10925 mode = nicparams[constants.NIC_MODE]
10926 link = nicparams[constants.NIC_LINK]
10927 args["nics"].append((ip, mac, mode, link))
10928 elif constants.DDM_REMOVE in nic_override:
10929 del args["nics"][-1]
10931 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10932 if self.op.disk_template:
10933 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10937 def BuildHooksNodes(self):
10938 """Build hooks nodes.
10941 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10944 def CheckPrereq(self):
10945 """Check prerequisites.
10947 This only checks the instance list against the existing names.
10950 # checking the new params on the primary/secondary nodes
10952 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10953 cluster = self.cluster = self.cfg.GetClusterInfo()
10954 assert self.instance is not None, \
10955 "Cannot retrieve locked instance %s" % self.op.instance_name
10956 pnode = instance.primary_node
10957 nodelist = list(instance.all_nodes)
10960 if self.op.os_name and not self.op.force:
10961 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10962 self.op.force_variant)
10963 instance_os = self.op.os_name
10965 instance_os = instance.os
10967 if self.op.disk_template:
10968 if instance.disk_template == self.op.disk_template:
10969 raise errors.OpPrereqError("Instance already has disk template %s" %
10970 instance.disk_template, errors.ECODE_INVAL)
10972 if (instance.disk_template,
10973 self.op.disk_template) not in self._DISK_CONVERSIONS:
10974 raise errors.OpPrereqError("Unsupported disk template conversion from"
10975 " %s to %s" % (instance.disk_template,
10976 self.op.disk_template),
10977 errors.ECODE_INVAL)
10978 _CheckInstanceDown(self, instance, "cannot change disk template")
10979 if self.op.disk_template in constants.DTS_INT_MIRROR:
10980 if self.op.remote_node == pnode:
10981 raise errors.OpPrereqError("Given new secondary node %s is the same"
10982 " as the primary node of the instance" %
10983 self.op.remote_node, errors.ECODE_STATE)
10984 _CheckNodeOnline(self, self.op.remote_node)
10985 _CheckNodeNotDrained(self, self.op.remote_node)
10986 # FIXME: here we assume that the old instance type is DT_PLAIN
10987 assert instance.disk_template == constants.DT_PLAIN
10988 disks = [{constants.IDISK_SIZE: d.size,
10989 constants.IDISK_VG: d.logical_id[0]}
10990 for d in instance.disks]
10991 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10992 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10994 # hvparams processing
10995 if self.op.hvparams:
10996 hv_type = instance.hypervisor
10997 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10998 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10999 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11002 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11003 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11004 self.hv_new = hv_new # the new actual values
11005 self.hv_inst = i_hvdict # the new dict (without defaults)
11007 self.hv_new = self.hv_inst = {}
11009 # beparams processing
11010 if self.op.beparams:
11011 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11013 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11014 be_new = cluster.SimpleFillBE(i_bedict)
11015 self.be_new = be_new # the new actual values
11016 self.be_inst = i_bedict # the new dict (without defaults)
11018 self.be_new = self.be_inst = {}
11019 be_old = cluster.FillBE(instance)
11021 # osparams processing
11022 if self.op.osparams:
11023 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11024 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11025 self.os_inst = i_osdict # the new dict (without defaults)
11031 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11032 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11033 mem_check_list = [pnode]
11034 if be_new[constants.BE_AUTO_BALANCE]:
11035 # either we changed auto_balance to yes or it was from before
11036 mem_check_list.extend(instance.secondary_nodes)
11037 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11038 instance.hypervisor)
11039 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11040 instance.hypervisor)
11041 pninfo = nodeinfo[pnode]
11042 msg = pninfo.fail_msg
11044 # Assume the primary node is unreachable and go ahead
11045 self.warn.append("Can't get info from primary node %s: %s" %
11047 elif not isinstance(pninfo.payload.get("memory_free", None), int):
11048 self.warn.append("Node data from primary node %s doesn't contain"
11049 " free memory information" % pnode)
11050 elif instance_info.fail_msg:
11051 self.warn.append("Can't get instance runtime information: %s" %
11052 instance_info.fail_msg)
11054 if instance_info.payload:
11055 current_mem = int(instance_info.payload["memory"])
11057 # Assume instance not running
11058 # (there is a slight race condition here, but it's not very probable,
11059 # and we have no other way to check)
11061 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11062 pninfo.payload["memory_free"])
11064 raise errors.OpPrereqError("This change will prevent the instance"
11065 " from starting, due to %d MB of memory"
11066 " missing on its primary node" % miss_mem,
11067 errors.ECODE_NORES)
11069 if be_new[constants.BE_AUTO_BALANCE]:
11070 for node, nres in nodeinfo.items():
11071 if node not in instance.secondary_nodes:
11073 nres.Raise("Can't get info from secondary node %s" % node,
11074 prereq=True, ecode=errors.ECODE_STATE)
11075 if not isinstance(nres.payload.get("memory_free", None), int):
11076 raise errors.OpPrereqError("Secondary node %s didn't return free"
11077 " memory information" % node,
11078 errors.ECODE_STATE)
11079 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11080 raise errors.OpPrereqError("This change will prevent the instance"
11081 " from failover to its secondary node"
11082 " %s, due to not enough memory" % node,
11083 errors.ECODE_STATE)
11087 self.nic_pinst = {}
11088 for nic_op, nic_dict in self.op.nics:
11089 if nic_op == constants.DDM_REMOVE:
11090 if not instance.nics:
11091 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11092 errors.ECODE_INVAL)
11094 if nic_op != constants.DDM_ADD:
11096 if not instance.nics:
11097 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11098 " no NICs" % nic_op,
11099 errors.ECODE_INVAL)
11100 if nic_op < 0 or nic_op >= len(instance.nics):
11101 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11103 (nic_op, len(instance.nics) - 1),
11104 errors.ECODE_INVAL)
11105 old_nic_params = instance.nics[nic_op].nicparams
11106 old_nic_ip = instance.nics[nic_op].ip
11108 old_nic_params = {}
11111 update_params_dict = dict([(key, nic_dict[key])
11112 for key in constants.NICS_PARAMETERS
11113 if key in nic_dict])
11115 if "bridge" in nic_dict:
11116 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11118 new_nic_params = _GetUpdatedParams(old_nic_params,
11119 update_params_dict)
11120 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11121 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11122 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11123 self.nic_pinst[nic_op] = new_nic_params
11124 self.nic_pnew[nic_op] = new_filled_nic_params
11125 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11127 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11128 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11129 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11131 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11133 self.warn.append(msg)
11135 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11136 if new_nic_mode == constants.NIC_MODE_ROUTED:
11137 if constants.INIC_IP in nic_dict:
11138 nic_ip = nic_dict[constants.INIC_IP]
11140 nic_ip = old_nic_ip
11142 raise errors.OpPrereqError("Cannot set the nic ip to None"
11143 " on a routed nic", errors.ECODE_INVAL)
11144 if constants.INIC_MAC in nic_dict:
11145 nic_mac = nic_dict[constants.INIC_MAC]
11146 if nic_mac is None:
11147 raise errors.OpPrereqError("Cannot set the nic mac to None",
11148 errors.ECODE_INVAL)
11149 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11150 # otherwise generate the mac
11151 nic_dict[constants.INIC_MAC] = \
11152 self.cfg.GenerateMAC(self.proc.GetECId())
11154 # or validate/reserve the current one
11156 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11157 except errors.ReservationError:
11158 raise errors.OpPrereqError("MAC address %s already in use"
11159 " in cluster" % nic_mac,
11160 errors.ECODE_NOTUNIQUE)
11163 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11164 raise errors.OpPrereqError("Disk operations not supported for"
11165 " diskless instances",
11166 errors.ECODE_INVAL)
11167 for disk_op, _ in self.op.disks:
11168 if disk_op == constants.DDM_REMOVE:
11169 if len(instance.disks) == 1:
11170 raise errors.OpPrereqError("Cannot remove the last disk of"
11171 " an instance", errors.ECODE_INVAL)
11172 _CheckInstanceDown(self, instance, "cannot remove disks")
11174 if (disk_op == constants.DDM_ADD and
11175 len(instance.disks) >= constants.MAX_DISKS):
11176 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11177 " add more" % constants.MAX_DISKS,
11178 errors.ECODE_STATE)
11179 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11181 if disk_op < 0 or disk_op >= len(instance.disks):
11182 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11184 (disk_op, len(instance.disks)),
11185 errors.ECODE_INVAL)
11189 def _ConvertPlainToDrbd(self, feedback_fn):
11190 """Converts an instance from plain to drbd.
11193 feedback_fn("Converting template to drbd")
11194 instance = self.instance
11195 pnode = instance.primary_node
11196 snode = self.op.remote_node
11198 # create a fake disk info for _GenerateDiskTemplate
11199 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11200 constants.IDISK_VG: d.logical_id[0]}
11201 for d in instance.disks]
11202 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11203 instance.name, pnode, [snode],
11204 disk_info, None, None, 0, feedback_fn)
11205 info = _GetInstanceInfoText(instance)
11206 feedback_fn("Creating aditional volumes...")
11207 # first, create the missing data and meta devices
11208 for disk in new_disks:
11209 # unfortunately this is... not too nice
11210 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11212 for child in disk.children:
11213 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11214 # at this stage, all new LVs have been created, we can rename the
11216 feedback_fn("Renaming original volumes...")
11217 rename_list = [(o, n.children[0].logical_id)
11218 for (o, n) in zip(instance.disks, new_disks)]
11219 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11220 result.Raise("Failed to rename original LVs")
11222 feedback_fn("Initializing DRBD devices...")
11223 # all child devices are in place, we can now create the DRBD devices
11224 for disk in new_disks:
11225 for node in [pnode, snode]:
11226 f_create = node == pnode
11227 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11229 # at this point, the instance has been modified
11230 instance.disk_template = constants.DT_DRBD8
11231 instance.disks = new_disks
11232 self.cfg.Update(instance, feedback_fn)
11234 # disks are created, waiting for sync
11235 disk_abort = not _WaitForSync(self, instance,
11236 oneshot=not self.op.wait_for_sync)
11238 raise errors.OpExecError("There are some degraded disks for"
11239 " this instance, please cleanup manually")
11241 def _ConvertDrbdToPlain(self, feedback_fn):
11242 """Converts an instance from drbd to plain.
11245 instance = self.instance
11246 assert len(instance.secondary_nodes) == 1
11247 pnode = instance.primary_node
11248 snode = instance.secondary_nodes[0]
11249 feedback_fn("Converting template to plain")
11251 old_disks = instance.disks
11252 new_disks = [d.children[0] for d in old_disks]
11254 # copy over size and mode
11255 for parent, child in zip(old_disks, new_disks):
11256 child.size = parent.size
11257 child.mode = parent.mode
11259 # this is a DRBD disk, return its port to the pool
11260 # NOTE: this must be done right before the call to cfg.Update!
11261 for disk in old_disks:
11262 tcp_port = disk.logical_id[2]
11263 self.cfg.AddTcpUdpPort(tcp_port)
11265 # update instance structure
11266 instance.disks = new_disks
11267 instance.disk_template = constants.DT_PLAIN
11268 self.cfg.Update(instance, feedback_fn)
11270 feedback_fn("Removing volumes on the secondary node...")
11271 for disk in old_disks:
11272 self.cfg.SetDiskID(disk, snode)
11273 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11275 self.LogWarning("Could not remove block device %s on node %s,"
11276 " continuing anyway: %s", disk.iv_name, snode, msg)
11278 feedback_fn("Removing unneeded volumes on the primary node...")
11279 for idx, disk in enumerate(old_disks):
11280 meta = disk.children[1]
11281 self.cfg.SetDiskID(meta, pnode)
11282 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11284 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11285 " continuing anyway: %s", idx, pnode, msg)
11287 def Exec(self, feedback_fn):
11288 """Modifies an instance.
11290 All parameters take effect only at the next restart of the instance.
11293 # Process here the warnings from CheckPrereq, as we don't have a
11294 # feedback_fn there.
11295 for warn in self.warn:
11296 feedback_fn("WARNING: %s" % warn)
11299 instance = self.instance
11301 for disk_op, disk_dict in self.op.disks:
11302 if disk_op == constants.DDM_REMOVE:
11303 # remove the last disk
11304 device = instance.disks.pop()
11305 device_idx = len(instance.disks)
11306 for node, disk in device.ComputeNodeTree(instance.primary_node):
11307 self.cfg.SetDiskID(disk, node)
11308 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11310 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11311 " continuing anyway", device_idx, node, msg)
11312 result.append(("disk/%d" % device_idx, "remove"))
11314 # if this is a DRBD disk, return its port to the pool
11315 if device.dev_type in constants.LDS_DRBD:
11316 tcp_port = device.logical_id[2]
11317 self.cfg.AddTcpUdpPort(tcp_port)
11318 elif disk_op == constants.DDM_ADD:
11320 if instance.disk_template in (constants.DT_FILE,
11321 constants.DT_SHARED_FILE):
11322 file_driver, file_path = instance.disks[0].logical_id
11323 file_path = os.path.dirname(file_path)
11325 file_driver = file_path = None
11326 disk_idx_base = len(instance.disks)
11327 new_disk = _GenerateDiskTemplate(self,
11328 instance.disk_template,
11329 instance.name, instance.primary_node,
11330 instance.secondary_nodes,
11334 disk_idx_base, feedback_fn)[0]
11335 instance.disks.append(new_disk)
11336 info = _GetInstanceInfoText(instance)
11338 logging.info("Creating volume %s for instance %s",
11339 new_disk.iv_name, instance.name)
11340 # Note: this needs to be kept in sync with _CreateDisks
11342 for node in instance.all_nodes:
11343 f_create = node == instance.primary_node
11345 _CreateBlockDev(self, node, instance, new_disk,
11346 f_create, info, f_create)
11347 except errors.OpExecError, err:
11348 self.LogWarning("Failed to create volume %s (%s) on"
11350 new_disk.iv_name, new_disk, node, err)
11351 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11352 (new_disk.size, new_disk.mode)))
11354 # change a given disk
11355 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11356 result.append(("disk.mode/%d" % disk_op,
11357 disk_dict[constants.IDISK_MODE]))
11359 if self.op.disk_template:
11360 r_shut = _ShutdownInstanceDisks(self, instance)
11362 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11363 " proceed with disk template conversion")
11364 mode = (instance.disk_template, self.op.disk_template)
11366 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11368 self.cfg.ReleaseDRBDMinors(instance.name)
11370 result.append(("disk_template", self.op.disk_template))
11373 for nic_op, nic_dict in self.op.nics:
11374 if nic_op == constants.DDM_REMOVE:
11375 # remove the last nic
11376 del instance.nics[-1]
11377 result.append(("nic.%d" % len(instance.nics), "remove"))
11378 elif nic_op == constants.DDM_ADD:
11379 # mac and bridge should be set, by now
11380 mac = nic_dict[constants.INIC_MAC]
11381 ip = nic_dict.get(constants.INIC_IP, None)
11382 nicparams = self.nic_pinst[constants.DDM_ADD]
11383 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11384 instance.nics.append(new_nic)
11385 result.append(("nic.%d" % (len(instance.nics) - 1),
11386 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11387 (new_nic.mac, new_nic.ip,
11388 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11389 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11392 for key in (constants.INIC_MAC, constants.INIC_IP):
11393 if key in nic_dict:
11394 setattr(instance.nics[nic_op], key, nic_dict[key])
11395 if nic_op in self.nic_pinst:
11396 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11397 for key, val in nic_dict.iteritems():
11398 result.append(("nic.%s/%d" % (key, nic_op), val))
11401 if self.op.hvparams:
11402 instance.hvparams = self.hv_inst
11403 for key, val in self.op.hvparams.iteritems():
11404 result.append(("hv/%s" % key, val))
11407 if self.op.beparams:
11408 instance.beparams = self.be_inst
11409 for key, val in self.op.beparams.iteritems():
11410 result.append(("be/%s" % key, val))
11413 if self.op.os_name:
11414 instance.os = self.op.os_name
11417 if self.op.osparams:
11418 instance.osparams = self.os_inst
11419 for key, val in self.op.osparams.iteritems():
11420 result.append(("os/%s" % key, val))
11422 self.cfg.Update(instance, feedback_fn)
11426 _DISK_CONVERSIONS = {
11427 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11428 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11432 class LUInstanceChangeGroup(LogicalUnit):
11433 HPATH = "instance-change-group"
11434 HTYPE = constants.HTYPE_INSTANCE
11437 def ExpandNames(self):
11438 self.share_locks = _ShareAll()
11439 self.needed_locks = {
11440 locking.LEVEL_NODEGROUP: [],
11441 locking.LEVEL_NODE: [],
11444 self._ExpandAndLockInstance()
11446 if self.op.target_groups:
11447 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11448 self.op.target_groups)
11450 self.req_target_uuids = None
11452 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11454 def DeclareLocks(self, level):
11455 if level == locking.LEVEL_NODEGROUP:
11456 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11458 if self.req_target_uuids:
11459 lock_groups = set(self.req_target_uuids)
11461 # Lock all groups used by instance optimistically; this requires going
11462 # via the node before it's locked, requiring verification later on
11463 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11464 lock_groups.update(instance_groups)
11466 # No target groups, need to lock all of them
11467 lock_groups = locking.ALL_SET
11469 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11471 elif level == locking.LEVEL_NODE:
11472 if self.req_target_uuids:
11473 # Lock all nodes used by instances
11474 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11475 self._LockInstancesNodes()
11477 # Lock all nodes in all potential target groups
11478 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11479 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11480 member_nodes = [node_name
11481 for group in lock_groups
11482 for node_name in self.cfg.GetNodeGroup(group).members]
11483 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11485 # Lock all nodes as all groups are potential targets
11486 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11488 def CheckPrereq(self):
11489 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11490 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11491 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11493 assert (self.req_target_uuids is None or
11494 owned_groups.issuperset(self.req_target_uuids))
11495 assert owned_instances == set([self.op.instance_name])
11497 # Get instance information
11498 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11500 # Check if node groups for locked instance are still correct
11501 assert owned_nodes.issuperset(self.instance.all_nodes), \
11502 ("Instance %s's nodes changed while we kept the lock" %
11503 self.op.instance_name)
11505 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11508 if self.req_target_uuids:
11509 # User requested specific target groups
11510 self.target_uuids = frozenset(self.req_target_uuids)
11512 # All groups except those used by the instance are potential targets
11513 self.target_uuids = owned_groups - inst_groups
11515 conflicting_groups = self.target_uuids & inst_groups
11516 if conflicting_groups:
11517 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11518 " used by the instance '%s'" %
11519 (utils.CommaJoin(conflicting_groups),
11520 self.op.instance_name),
11521 errors.ECODE_INVAL)
11523 if not self.target_uuids:
11524 raise errors.OpPrereqError("There are no possible target groups",
11525 errors.ECODE_INVAL)
11527 def BuildHooksEnv(self):
11528 """Build hooks env.
11531 assert self.target_uuids
11534 "TARGET_GROUPS": " ".join(self.target_uuids),
11537 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11541 def BuildHooksNodes(self):
11542 """Build hooks nodes.
11545 mn = self.cfg.GetMasterNode()
11546 return ([mn], [mn])
11548 def Exec(self, feedback_fn):
11549 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11551 assert instances == [self.op.instance_name], "Instance not locked"
11553 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11554 instances=instances, target_groups=list(self.target_uuids))
11556 ial.Run(self.op.iallocator)
11558 if not ial.success:
11559 raise errors.OpPrereqError("Can't compute solution for changing group of"
11560 " instance '%s' using iallocator '%s': %s" %
11561 (self.op.instance_name, self.op.iallocator,
11563 errors.ECODE_NORES)
11565 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11567 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11568 " instance '%s'", len(jobs), self.op.instance_name)
11570 return ResultWithJobs(jobs)
11573 class LUBackupQuery(NoHooksLU):
11574 """Query the exports list
11579 def ExpandNames(self):
11580 self.needed_locks = {}
11581 self.share_locks[locking.LEVEL_NODE] = 1
11582 if not self.op.nodes:
11583 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11585 self.needed_locks[locking.LEVEL_NODE] = \
11586 _GetWantedNodes(self, self.op.nodes)
11588 def Exec(self, feedback_fn):
11589 """Compute the list of all the exported system images.
11592 @return: a dictionary with the structure node->(export-list)
11593 where export-list is a list of the instances exported on
11597 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11598 rpcresult = self.rpc.call_export_list(self.nodes)
11600 for node in rpcresult:
11601 if rpcresult[node].fail_msg:
11602 result[node] = False
11604 result[node] = rpcresult[node].payload
11609 class LUBackupPrepare(NoHooksLU):
11610 """Prepares an instance for an export and returns useful information.
11615 def ExpandNames(self):
11616 self._ExpandAndLockInstance()
11618 def CheckPrereq(self):
11619 """Check prerequisites.
11622 instance_name = self.op.instance_name
11624 self.instance = self.cfg.GetInstanceInfo(instance_name)
11625 assert self.instance is not None, \
11626 "Cannot retrieve locked instance %s" % self.op.instance_name
11627 _CheckNodeOnline(self, self.instance.primary_node)
11629 self._cds = _GetClusterDomainSecret()
11631 def Exec(self, feedback_fn):
11632 """Prepares an instance for an export.
11635 instance = self.instance
11637 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11638 salt = utils.GenerateSecret(8)
11640 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11641 result = self.rpc.call_x509_cert_create(instance.primary_node,
11642 constants.RIE_CERT_VALIDITY)
11643 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11645 (name, cert_pem) = result.payload
11647 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11651 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11652 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11654 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11660 class LUBackupExport(LogicalUnit):
11661 """Export an instance to an image in the cluster.
11664 HPATH = "instance-export"
11665 HTYPE = constants.HTYPE_INSTANCE
11668 def CheckArguments(self):
11669 """Check the arguments.
11672 self.x509_key_name = self.op.x509_key_name
11673 self.dest_x509_ca_pem = self.op.destination_x509_ca
11675 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11676 if not self.x509_key_name:
11677 raise errors.OpPrereqError("Missing X509 key name for encryption",
11678 errors.ECODE_INVAL)
11680 if not self.dest_x509_ca_pem:
11681 raise errors.OpPrereqError("Missing destination X509 CA",
11682 errors.ECODE_INVAL)
11684 def ExpandNames(self):
11685 self._ExpandAndLockInstance()
11687 # Lock all nodes for local exports
11688 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11689 # FIXME: lock only instance primary and destination node
11691 # Sad but true, for now we have do lock all nodes, as we don't know where
11692 # the previous export might be, and in this LU we search for it and
11693 # remove it from its current node. In the future we could fix this by:
11694 # - making a tasklet to search (share-lock all), then create the
11695 # new one, then one to remove, after
11696 # - removing the removal operation altogether
11697 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11699 def DeclareLocks(self, level):
11700 """Last minute lock declaration."""
11701 # All nodes are locked anyway, so nothing to do here.
11703 def BuildHooksEnv(self):
11704 """Build hooks env.
11706 This will run on the master, primary node and target node.
11710 "EXPORT_MODE": self.op.mode,
11711 "EXPORT_NODE": self.op.target_node,
11712 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11713 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11714 # TODO: Generic function for boolean env variables
11715 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11718 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11722 def BuildHooksNodes(self):
11723 """Build hooks nodes.
11726 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11728 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11729 nl.append(self.op.target_node)
11733 def CheckPrereq(self):
11734 """Check prerequisites.
11736 This checks that the instance and node names are valid.
11739 instance_name = self.op.instance_name
11741 self.instance = self.cfg.GetInstanceInfo(instance_name)
11742 assert self.instance is not None, \
11743 "Cannot retrieve locked instance %s" % self.op.instance_name
11744 _CheckNodeOnline(self, self.instance.primary_node)
11746 if (self.op.remove_instance and self.instance.admin_up and
11747 not self.op.shutdown):
11748 raise errors.OpPrereqError("Can not remove instance without shutting it"
11751 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11752 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11753 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11754 assert self.dst_node is not None
11756 _CheckNodeOnline(self, self.dst_node.name)
11757 _CheckNodeNotDrained(self, self.dst_node.name)
11760 self.dest_disk_info = None
11761 self.dest_x509_ca = None
11763 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11764 self.dst_node = None
11766 if len(self.op.target_node) != len(self.instance.disks):
11767 raise errors.OpPrereqError(("Received destination information for %s"
11768 " disks, but instance %s has %s disks") %
11769 (len(self.op.target_node), instance_name,
11770 len(self.instance.disks)),
11771 errors.ECODE_INVAL)
11773 cds = _GetClusterDomainSecret()
11775 # Check X509 key name
11777 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11778 except (TypeError, ValueError), err:
11779 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11781 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11782 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11783 errors.ECODE_INVAL)
11785 # Load and verify CA
11787 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11788 except OpenSSL.crypto.Error, err:
11789 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11790 (err, ), errors.ECODE_INVAL)
11792 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11793 if errcode is not None:
11794 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11795 (msg, ), errors.ECODE_INVAL)
11797 self.dest_x509_ca = cert
11799 # Verify target information
11801 for idx, disk_data in enumerate(self.op.target_node):
11803 (host, port, magic) = \
11804 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11805 except errors.GenericError, err:
11806 raise errors.OpPrereqError("Target info for disk %s: %s" %
11807 (idx, err), errors.ECODE_INVAL)
11809 disk_info.append((host, port, magic))
11811 assert len(disk_info) == len(self.op.target_node)
11812 self.dest_disk_info = disk_info
11815 raise errors.ProgrammerError("Unhandled export mode %r" %
11818 # instance disk type verification
11819 # TODO: Implement export support for file-based disks
11820 for disk in self.instance.disks:
11821 if disk.dev_type == constants.LD_FILE:
11822 raise errors.OpPrereqError("Export not supported for instances with"
11823 " file-based disks", errors.ECODE_INVAL)
11825 def _CleanupExports(self, feedback_fn):
11826 """Removes exports of current instance from all other nodes.
11828 If an instance in a cluster with nodes A..D was exported to node C, its
11829 exports will be removed from the nodes A, B and D.
11832 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11834 nodelist = self.cfg.GetNodeList()
11835 nodelist.remove(self.dst_node.name)
11837 # on one-node clusters nodelist will be empty after the removal
11838 # if we proceed the backup would be removed because OpBackupQuery
11839 # substitutes an empty list with the full cluster node list.
11840 iname = self.instance.name
11842 feedback_fn("Removing old exports for instance %s" % iname)
11843 exportlist = self.rpc.call_export_list(nodelist)
11844 for node in exportlist:
11845 if exportlist[node].fail_msg:
11847 if iname in exportlist[node].payload:
11848 msg = self.rpc.call_export_remove(node, iname).fail_msg
11850 self.LogWarning("Could not remove older export for instance %s"
11851 " on node %s: %s", iname, node, msg)
11853 def Exec(self, feedback_fn):
11854 """Export an instance to an image in the cluster.
11857 assert self.op.mode in constants.EXPORT_MODES
11859 instance = self.instance
11860 src_node = instance.primary_node
11862 if self.op.shutdown:
11863 # shutdown the instance, but not the disks
11864 feedback_fn("Shutting down instance %s" % instance.name)
11865 result = self.rpc.call_instance_shutdown(src_node, instance,
11866 self.op.shutdown_timeout)
11867 # TODO: Maybe ignore failures if ignore_remove_failures is set
11868 result.Raise("Could not shutdown instance %s on"
11869 " node %s" % (instance.name, src_node))
11871 # set the disks ID correctly since call_instance_start needs the
11872 # correct drbd minor to create the symlinks
11873 for disk in instance.disks:
11874 self.cfg.SetDiskID(disk, src_node)
11876 activate_disks = (not instance.admin_up)
11879 # Activate the instance disks if we'exporting a stopped instance
11880 feedback_fn("Activating disks for %s" % instance.name)
11881 _StartInstanceDisks(self, instance, None)
11884 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11887 helper.CreateSnapshots()
11889 if (self.op.shutdown and instance.admin_up and
11890 not self.op.remove_instance):
11891 assert not activate_disks
11892 feedback_fn("Starting instance %s" % instance.name)
11893 result = self.rpc.call_instance_start(src_node, instance,
11895 msg = result.fail_msg
11897 feedback_fn("Failed to start instance: %s" % msg)
11898 _ShutdownInstanceDisks(self, instance)
11899 raise errors.OpExecError("Could not start instance: %s" % msg)
11901 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11902 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11903 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11904 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11905 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11907 (key_name, _, _) = self.x509_key_name
11910 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11913 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11914 key_name, dest_ca_pem,
11919 # Check for backwards compatibility
11920 assert len(dresults) == len(instance.disks)
11921 assert compat.all(isinstance(i, bool) for i in dresults), \
11922 "Not all results are boolean: %r" % dresults
11926 feedback_fn("Deactivating disks for %s" % instance.name)
11927 _ShutdownInstanceDisks(self, instance)
11929 if not (compat.all(dresults) and fin_resu):
11932 failures.append("export finalization")
11933 if not compat.all(dresults):
11934 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11936 failures.append("disk export: disk(s) %s" % fdsk)
11938 raise errors.OpExecError("Export failed, errors in %s" %
11939 utils.CommaJoin(failures))
11941 # At this point, the export was successful, we can cleanup/finish
11943 # Remove instance if requested
11944 if self.op.remove_instance:
11945 feedback_fn("Removing instance %s" % instance.name)
11946 _RemoveInstance(self, feedback_fn, instance,
11947 self.op.ignore_remove_failures)
11949 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11950 self._CleanupExports(feedback_fn)
11952 return fin_resu, dresults
11955 class LUBackupRemove(NoHooksLU):
11956 """Remove exports related to the named instance.
11961 def ExpandNames(self):
11962 self.needed_locks = {}
11963 # We need all nodes to be locked in order for RemoveExport to work, but we
11964 # don't need to lock the instance itself, as nothing will happen to it (and
11965 # we can remove exports also for a removed instance)
11966 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11968 def Exec(self, feedback_fn):
11969 """Remove any export.
11972 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11973 # If the instance was not found we'll try with the name that was passed in.
11974 # This will only work if it was an FQDN, though.
11976 if not instance_name:
11978 instance_name = self.op.instance_name
11980 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11981 exportlist = self.rpc.call_export_list(locked_nodes)
11983 for node in exportlist:
11984 msg = exportlist[node].fail_msg
11986 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11988 if instance_name in exportlist[node].payload:
11990 result = self.rpc.call_export_remove(node, instance_name)
11991 msg = result.fail_msg
11993 logging.error("Could not remove export for instance %s"
11994 " on node %s: %s", instance_name, node, msg)
11996 if fqdn_warn and not found:
11997 feedback_fn("Export not found. If trying to remove an export belonging"
11998 " to a deleted instance please use its Fully Qualified"
12002 class LUGroupAdd(LogicalUnit):
12003 """Logical unit for creating node groups.
12006 HPATH = "group-add"
12007 HTYPE = constants.HTYPE_GROUP
12010 def ExpandNames(self):
12011 # We need the new group's UUID here so that we can create and acquire the
12012 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12013 # that it should not check whether the UUID exists in the configuration.
12014 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12015 self.needed_locks = {}
12016 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12018 def CheckPrereq(self):
12019 """Check prerequisites.
12021 This checks that the given group name is not an existing node group
12026 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12027 except errors.OpPrereqError:
12030 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12031 " node group (UUID: %s)" %
12032 (self.op.group_name, existing_uuid),
12033 errors.ECODE_EXISTS)
12035 if self.op.ndparams:
12036 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12038 def BuildHooksEnv(self):
12039 """Build hooks env.
12043 "GROUP_NAME": self.op.group_name,
12046 def BuildHooksNodes(self):
12047 """Build hooks nodes.
12050 mn = self.cfg.GetMasterNode()
12051 return ([mn], [mn])
12053 def Exec(self, feedback_fn):
12054 """Add the node group to the cluster.
12057 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12058 uuid=self.group_uuid,
12059 alloc_policy=self.op.alloc_policy,
12060 ndparams=self.op.ndparams)
12062 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12063 del self.remove_locks[locking.LEVEL_NODEGROUP]
12066 class LUGroupAssignNodes(NoHooksLU):
12067 """Logical unit for assigning nodes to groups.
12072 def ExpandNames(self):
12073 # These raise errors.OpPrereqError on their own:
12074 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12075 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12077 # We want to lock all the affected nodes and groups. We have readily
12078 # available the list of nodes, and the *destination* group. To gather the
12079 # list of "source" groups, we need to fetch node information later on.
12080 self.needed_locks = {
12081 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12082 locking.LEVEL_NODE: self.op.nodes,
12085 def DeclareLocks(self, level):
12086 if level == locking.LEVEL_NODEGROUP:
12087 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12089 # Try to get all affected nodes' groups without having the group or node
12090 # lock yet. Needs verification later in the code flow.
12091 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12093 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12095 def CheckPrereq(self):
12096 """Check prerequisites.
12099 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12100 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12101 frozenset(self.op.nodes))
12103 expected_locks = (set([self.group_uuid]) |
12104 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12105 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12106 if actual_locks != expected_locks:
12107 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12108 " current groups are '%s', used to be '%s'" %
12109 (utils.CommaJoin(expected_locks),
12110 utils.CommaJoin(actual_locks)))
12112 self.node_data = self.cfg.GetAllNodesInfo()
12113 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12114 instance_data = self.cfg.GetAllInstancesInfo()
12116 if self.group is None:
12117 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12118 (self.op.group_name, self.group_uuid))
12120 (new_splits, previous_splits) = \
12121 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12122 for node in self.op.nodes],
12123 self.node_data, instance_data)
12126 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12128 if not self.op.force:
12129 raise errors.OpExecError("The following instances get split by this"
12130 " change and --force was not given: %s" %
12133 self.LogWarning("This operation will split the following instances: %s",
12136 if previous_splits:
12137 self.LogWarning("In addition, these already-split instances continue"
12138 " to be split across groups: %s",
12139 utils.CommaJoin(utils.NiceSort(previous_splits)))
12141 def Exec(self, feedback_fn):
12142 """Assign nodes to a new group.
12145 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12147 self.cfg.AssignGroupNodes(mods)
12150 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12151 """Check for split instances after a node assignment.
12153 This method considers a series of node assignments as an atomic operation,
12154 and returns information about split instances after applying the set of
12157 In particular, it returns information about newly split instances, and
12158 instances that were already split, and remain so after the change.
12160 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12163 @type changes: list of (node_name, new_group_uuid) pairs.
12164 @param changes: list of node assignments to consider.
12165 @param node_data: a dict with data for all nodes
12166 @param instance_data: a dict with all instances to consider
12167 @rtype: a two-tuple
12168 @return: a list of instances that were previously okay and result split as a
12169 consequence of this change, and a list of instances that were previously
12170 split and this change does not fix.
12173 changed_nodes = dict((node, group) for node, group in changes
12174 if node_data[node].group != group)
12176 all_split_instances = set()
12177 previously_split_instances = set()
12179 def InstanceNodes(instance):
12180 return [instance.primary_node] + list(instance.secondary_nodes)
12182 for inst in instance_data.values():
12183 if inst.disk_template not in constants.DTS_INT_MIRROR:
12186 instance_nodes = InstanceNodes(inst)
12188 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12189 previously_split_instances.add(inst.name)
12191 if len(set(changed_nodes.get(node, node_data[node].group)
12192 for node in instance_nodes)) > 1:
12193 all_split_instances.add(inst.name)
12195 return (list(all_split_instances - previously_split_instances),
12196 list(previously_split_instances & all_split_instances))
12199 class _GroupQuery(_QueryBase):
12200 FIELDS = query.GROUP_FIELDS
12202 def ExpandNames(self, lu):
12203 lu.needed_locks = {}
12205 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12206 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12209 self.wanted = [name_to_uuid[name]
12210 for name in utils.NiceSort(name_to_uuid.keys())]
12212 # Accept names to be either names or UUIDs.
12215 all_uuid = frozenset(self._all_groups.keys())
12217 for name in self.names:
12218 if name in all_uuid:
12219 self.wanted.append(name)
12220 elif name in name_to_uuid:
12221 self.wanted.append(name_to_uuid[name])
12223 missing.append(name)
12226 raise errors.OpPrereqError("Some groups do not exist: %s" %
12227 utils.CommaJoin(missing),
12228 errors.ECODE_NOENT)
12230 def DeclareLocks(self, lu, level):
12233 def _GetQueryData(self, lu):
12234 """Computes the list of node groups and their attributes.
12237 do_nodes = query.GQ_NODE in self.requested_data
12238 do_instances = query.GQ_INST in self.requested_data
12240 group_to_nodes = None
12241 group_to_instances = None
12243 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12244 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12245 # latter GetAllInstancesInfo() is not enough, for we have to go through
12246 # instance->node. Hence, we will need to process nodes even if we only need
12247 # instance information.
12248 if do_nodes or do_instances:
12249 all_nodes = lu.cfg.GetAllNodesInfo()
12250 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12253 for node in all_nodes.values():
12254 if node.group in group_to_nodes:
12255 group_to_nodes[node.group].append(node.name)
12256 node_to_group[node.name] = node.group
12259 all_instances = lu.cfg.GetAllInstancesInfo()
12260 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12262 for instance in all_instances.values():
12263 node = instance.primary_node
12264 if node in node_to_group:
12265 group_to_instances[node_to_group[node]].append(instance.name)
12268 # Do not pass on node information if it was not requested.
12269 group_to_nodes = None
12271 return query.GroupQueryData([self._all_groups[uuid]
12272 for uuid in self.wanted],
12273 group_to_nodes, group_to_instances)
12276 class LUGroupQuery(NoHooksLU):
12277 """Logical unit for querying node groups.
12282 def CheckArguments(self):
12283 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12284 self.op.output_fields, False)
12286 def ExpandNames(self):
12287 self.gq.ExpandNames(self)
12289 def DeclareLocks(self, level):
12290 self.gq.DeclareLocks(self, level)
12292 def Exec(self, feedback_fn):
12293 return self.gq.OldStyleQuery(self)
12296 class LUGroupSetParams(LogicalUnit):
12297 """Modifies the parameters of a node group.
12300 HPATH = "group-modify"
12301 HTYPE = constants.HTYPE_GROUP
12304 def CheckArguments(self):
12307 self.op.alloc_policy,
12310 if all_changes.count(None) == len(all_changes):
12311 raise errors.OpPrereqError("Please pass at least one modification",
12312 errors.ECODE_INVAL)
12314 def ExpandNames(self):
12315 # This raises errors.OpPrereqError on its own:
12316 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12318 self.needed_locks = {
12319 locking.LEVEL_NODEGROUP: [self.group_uuid],
12322 def CheckPrereq(self):
12323 """Check prerequisites.
12326 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12328 if self.group is None:
12329 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12330 (self.op.group_name, self.group_uuid))
12332 if self.op.ndparams:
12333 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12334 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12335 self.new_ndparams = new_ndparams
12337 def BuildHooksEnv(self):
12338 """Build hooks env.
12342 "GROUP_NAME": self.op.group_name,
12343 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12346 def BuildHooksNodes(self):
12347 """Build hooks nodes.
12350 mn = self.cfg.GetMasterNode()
12351 return ([mn], [mn])
12353 def Exec(self, feedback_fn):
12354 """Modifies the node group.
12359 if self.op.ndparams:
12360 self.group.ndparams = self.new_ndparams
12361 result.append(("ndparams", str(self.group.ndparams)))
12363 if self.op.alloc_policy:
12364 self.group.alloc_policy = self.op.alloc_policy
12366 self.cfg.Update(self.group, feedback_fn)
12370 class LUGroupRemove(LogicalUnit):
12371 HPATH = "group-remove"
12372 HTYPE = constants.HTYPE_GROUP
12375 def ExpandNames(self):
12376 # This will raises errors.OpPrereqError on its own:
12377 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12378 self.needed_locks = {
12379 locking.LEVEL_NODEGROUP: [self.group_uuid],
12382 def CheckPrereq(self):
12383 """Check prerequisites.
12385 This checks that the given group name exists as a node group, that is
12386 empty (i.e., contains no nodes), and that is not the last group of the
12390 # Verify that the group is empty.
12391 group_nodes = [node.name
12392 for node in self.cfg.GetAllNodesInfo().values()
12393 if node.group == self.group_uuid]
12396 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12398 (self.op.group_name,
12399 utils.CommaJoin(utils.NiceSort(group_nodes))),
12400 errors.ECODE_STATE)
12402 # Verify the cluster would not be left group-less.
12403 if len(self.cfg.GetNodeGroupList()) == 1:
12404 raise errors.OpPrereqError("Group '%s' is the only group,"
12405 " cannot be removed" %
12406 self.op.group_name,
12407 errors.ECODE_STATE)
12409 def BuildHooksEnv(self):
12410 """Build hooks env.
12414 "GROUP_NAME": self.op.group_name,
12417 def BuildHooksNodes(self):
12418 """Build hooks nodes.
12421 mn = self.cfg.GetMasterNode()
12422 return ([mn], [mn])
12424 def Exec(self, feedback_fn):
12425 """Remove the node group.
12429 self.cfg.RemoveNodeGroup(self.group_uuid)
12430 except errors.ConfigurationError:
12431 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12432 (self.op.group_name, self.group_uuid))
12434 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12437 class LUGroupRename(LogicalUnit):
12438 HPATH = "group-rename"
12439 HTYPE = constants.HTYPE_GROUP
12442 def ExpandNames(self):
12443 # This raises errors.OpPrereqError on its own:
12444 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12446 self.needed_locks = {
12447 locking.LEVEL_NODEGROUP: [self.group_uuid],
12450 def CheckPrereq(self):
12451 """Check prerequisites.
12453 Ensures requested new name is not yet used.
12457 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12458 except errors.OpPrereqError:
12461 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12462 " node group (UUID: %s)" %
12463 (self.op.new_name, new_name_uuid),
12464 errors.ECODE_EXISTS)
12466 def BuildHooksEnv(self):
12467 """Build hooks env.
12471 "OLD_NAME": self.op.group_name,
12472 "NEW_NAME": self.op.new_name,
12475 def BuildHooksNodes(self):
12476 """Build hooks nodes.
12479 mn = self.cfg.GetMasterNode()
12481 all_nodes = self.cfg.GetAllNodesInfo()
12482 all_nodes.pop(mn, None)
12485 run_nodes.extend(node.name for node in all_nodes.values()
12486 if node.group == self.group_uuid)
12488 return (run_nodes, run_nodes)
12490 def Exec(self, feedback_fn):
12491 """Rename the node group.
12494 group = self.cfg.GetNodeGroup(self.group_uuid)
12497 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12498 (self.op.group_name, self.group_uuid))
12500 group.name = self.op.new_name
12501 self.cfg.Update(group, feedback_fn)
12503 return self.op.new_name
12506 class LUGroupEvacuate(LogicalUnit):
12507 HPATH = "group-evacuate"
12508 HTYPE = constants.HTYPE_GROUP
12511 def ExpandNames(self):
12512 # This raises errors.OpPrereqError on its own:
12513 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12515 if self.op.target_groups:
12516 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12517 self.op.target_groups)
12519 self.req_target_uuids = []
12521 if self.group_uuid in self.req_target_uuids:
12522 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12523 " as a target group (targets are %s)" %
12525 utils.CommaJoin(self.req_target_uuids)),
12526 errors.ECODE_INVAL)
12528 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12530 self.share_locks = _ShareAll()
12531 self.needed_locks = {
12532 locking.LEVEL_INSTANCE: [],
12533 locking.LEVEL_NODEGROUP: [],
12534 locking.LEVEL_NODE: [],
12537 def DeclareLocks(self, level):
12538 if level == locking.LEVEL_INSTANCE:
12539 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12541 # Lock instances optimistically, needs verification once node and group
12542 # locks have been acquired
12543 self.needed_locks[locking.LEVEL_INSTANCE] = \
12544 self.cfg.GetNodeGroupInstances(self.group_uuid)
12546 elif level == locking.LEVEL_NODEGROUP:
12547 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12549 if self.req_target_uuids:
12550 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12552 # Lock all groups used by instances optimistically; this requires going
12553 # via the node before it's locked, requiring verification later on
12554 lock_groups.update(group_uuid
12555 for instance_name in
12556 self.owned_locks(locking.LEVEL_INSTANCE)
12558 self.cfg.GetInstanceNodeGroups(instance_name))
12560 # No target groups, need to lock all of them
12561 lock_groups = locking.ALL_SET
12563 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12565 elif level == locking.LEVEL_NODE:
12566 # This will only lock the nodes in the group to be evacuated which
12567 # contain actual instances
12568 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12569 self._LockInstancesNodes()
12571 # Lock all nodes in group to be evacuated and target groups
12572 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12573 assert self.group_uuid in owned_groups
12574 member_nodes = [node_name
12575 for group in owned_groups
12576 for node_name in self.cfg.GetNodeGroup(group).members]
12577 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12579 def CheckPrereq(self):
12580 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12581 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12582 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12584 assert owned_groups.issuperset(self.req_target_uuids)
12585 assert self.group_uuid in owned_groups
12587 # Check if locked instances are still correct
12588 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12590 # Get instance information
12591 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12593 # Check if node groups for locked instances are still correct
12594 _CheckInstancesNodeGroups(self.cfg, self.instances,
12595 owned_groups, owned_nodes, self.group_uuid)
12597 if self.req_target_uuids:
12598 # User requested specific target groups
12599 self.target_uuids = self.req_target_uuids
12601 # All groups except the one to be evacuated are potential targets
12602 self.target_uuids = [group_uuid for group_uuid in owned_groups
12603 if group_uuid != self.group_uuid]
12605 if not self.target_uuids:
12606 raise errors.OpPrereqError("There are no possible target groups",
12607 errors.ECODE_INVAL)
12609 def BuildHooksEnv(self):
12610 """Build hooks env.
12614 "GROUP_NAME": self.op.group_name,
12615 "TARGET_GROUPS": " ".join(self.target_uuids),
12618 def BuildHooksNodes(self):
12619 """Build hooks nodes.
12622 mn = self.cfg.GetMasterNode()
12624 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12626 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12628 return (run_nodes, run_nodes)
12630 def Exec(self, feedback_fn):
12631 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12633 assert self.group_uuid not in self.target_uuids
12635 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12636 instances=instances, target_groups=self.target_uuids)
12638 ial.Run(self.op.iallocator)
12640 if not ial.success:
12641 raise errors.OpPrereqError("Can't compute group evacuation using"
12642 " iallocator '%s': %s" %
12643 (self.op.iallocator, ial.info),
12644 errors.ECODE_NORES)
12646 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12648 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12649 len(jobs), self.op.group_name)
12651 return ResultWithJobs(jobs)
12654 class TagsLU(NoHooksLU): # pylint: disable=W0223
12655 """Generic tags LU.
12657 This is an abstract class which is the parent of all the other tags LUs.
12660 def ExpandNames(self):
12661 self.group_uuid = None
12662 self.needed_locks = {}
12663 if self.op.kind == constants.TAG_NODE:
12664 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12665 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12666 elif self.op.kind == constants.TAG_INSTANCE:
12667 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12668 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12669 elif self.op.kind == constants.TAG_NODEGROUP:
12670 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12672 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12673 # not possible to acquire the BGL based on opcode parameters)
12675 def CheckPrereq(self):
12676 """Check prerequisites.
12679 if self.op.kind == constants.TAG_CLUSTER:
12680 self.target = self.cfg.GetClusterInfo()
12681 elif self.op.kind == constants.TAG_NODE:
12682 self.target = self.cfg.GetNodeInfo(self.op.name)
12683 elif self.op.kind == constants.TAG_INSTANCE:
12684 self.target = self.cfg.GetInstanceInfo(self.op.name)
12685 elif self.op.kind == constants.TAG_NODEGROUP:
12686 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12688 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12689 str(self.op.kind), errors.ECODE_INVAL)
12692 class LUTagsGet(TagsLU):
12693 """Returns the tags of a given object.
12698 def ExpandNames(self):
12699 TagsLU.ExpandNames(self)
12701 # Share locks as this is only a read operation
12702 self.share_locks = _ShareAll()
12704 def Exec(self, feedback_fn):
12705 """Returns the tag list.
12708 return list(self.target.GetTags())
12711 class LUTagsSearch(NoHooksLU):
12712 """Searches the tags for a given pattern.
12717 def ExpandNames(self):
12718 self.needed_locks = {}
12720 def CheckPrereq(self):
12721 """Check prerequisites.
12723 This checks the pattern passed for validity by compiling it.
12727 self.re = re.compile(self.op.pattern)
12728 except re.error, err:
12729 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12730 (self.op.pattern, err), errors.ECODE_INVAL)
12732 def Exec(self, feedback_fn):
12733 """Returns the tag list.
12737 tgts = [("/cluster", cfg.GetClusterInfo())]
12738 ilist = cfg.GetAllInstancesInfo().values()
12739 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12740 nlist = cfg.GetAllNodesInfo().values()
12741 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12742 tgts.extend(("/nodegroup/%s" % n.name, n)
12743 for n in cfg.GetAllNodeGroupsInfo().values())
12745 for path, target in tgts:
12746 for tag in target.GetTags():
12747 if self.re.search(tag):
12748 results.append((path, tag))
12752 class LUTagsSet(TagsLU):
12753 """Sets a tag on a given object.
12758 def CheckPrereq(self):
12759 """Check prerequisites.
12761 This checks the type and length of the tag name and value.
12764 TagsLU.CheckPrereq(self)
12765 for tag in self.op.tags:
12766 objects.TaggableObject.ValidateTag(tag)
12768 def Exec(self, feedback_fn):
12773 for tag in self.op.tags:
12774 self.target.AddTag(tag)
12775 except errors.TagError, err:
12776 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12777 self.cfg.Update(self.target, feedback_fn)
12780 class LUTagsDel(TagsLU):
12781 """Delete a list of tags from a given object.
12786 def CheckPrereq(self):
12787 """Check prerequisites.
12789 This checks that we have the given tag.
12792 TagsLU.CheckPrereq(self)
12793 for tag in self.op.tags:
12794 objects.TaggableObject.ValidateTag(tag)
12795 del_tags = frozenset(self.op.tags)
12796 cur_tags = self.target.GetTags()
12798 diff_tags = del_tags - cur_tags
12800 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12801 raise errors.OpPrereqError("Tag(s) %s not found" %
12802 (utils.CommaJoin(diff_names), ),
12803 errors.ECODE_NOENT)
12805 def Exec(self, feedback_fn):
12806 """Remove the tag from the object.
12809 for tag in self.op.tags:
12810 self.target.RemoveTag(tag)
12811 self.cfg.Update(self.target, feedback_fn)
12814 class LUTestDelay(NoHooksLU):
12815 """Sleep for a specified amount of time.
12817 This LU sleeps on the master and/or nodes for a specified amount of
12823 def ExpandNames(self):
12824 """Expand names and set required locks.
12826 This expands the node list, if any.
12829 self.needed_locks = {}
12830 if self.op.on_nodes:
12831 # _GetWantedNodes can be used here, but is not always appropriate to use
12832 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12833 # more information.
12834 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12835 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12837 def _TestDelay(self):
12838 """Do the actual sleep.
12841 if self.op.on_master:
12842 if not utils.TestDelay(self.op.duration):
12843 raise errors.OpExecError("Error during master delay test")
12844 if self.op.on_nodes:
12845 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12846 for node, node_result in result.items():
12847 node_result.Raise("Failure during rpc call to node %s" % node)
12849 def Exec(self, feedback_fn):
12850 """Execute the test delay opcode, with the wanted repetitions.
12853 if self.op.repeat == 0:
12856 top_value = self.op.repeat - 1
12857 for i in range(self.op.repeat):
12858 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12862 class LUTestJqueue(NoHooksLU):
12863 """Utility LU to test some aspects of the job queue.
12868 # Must be lower than default timeout for WaitForJobChange to see whether it
12869 # notices changed jobs
12870 _CLIENT_CONNECT_TIMEOUT = 20.0
12871 _CLIENT_CONFIRM_TIMEOUT = 60.0
12874 def _NotifyUsingSocket(cls, cb, errcls):
12875 """Opens a Unix socket and waits for another program to connect.
12878 @param cb: Callback to send socket name to client
12879 @type errcls: class
12880 @param errcls: Exception class to use for errors
12883 # Using a temporary directory as there's no easy way to create temporary
12884 # sockets without writing a custom loop around tempfile.mktemp and
12886 tmpdir = tempfile.mkdtemp()
12888 tmpsock = utils.PathJoin(tmpdir, "sock")
12890 logging.debug("Creating temporary socket at %s", tmpsock)
12891 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12896 # Send details to client
12899 # Wait for client to connect before continuing
12900 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12902 (conn, _) = sock.accept()
12903 except socket.error, err:
12904 raise errcls("Client didn't connect in time (%s)" % err)
12908 # Remove as soon as client is connected
12909 shutil.rmtree(tmpdir)
12911 # Wait for client to close
12914 # pylint: disable=E1101
12915 # Instance of '_socketobject' has no ... member
12916 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12918 except socket.error, err:
12919 raise errcls("Client failed to confirm notification (%s)" % err)
12923 def _SendNotification(self, test, arg, sockname):
12924 """Sends a notification to the client.
12927 @param test: Test name
12928 @param arg: Test argument (depends on test)
12929 @type sockname: string
12930 @param sockname: Socket path
12933 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12935 def _Notify(self, prereq, test, arg):
12936 """Notifies the client of a test.
12939 @param prereq: Whether this is a prereq-phase test
12941 @param test: Test name
12942 @param arg: Test argument (depends on test)
12946 errcls = errors.OpPrereqError
12948 errcls = errors.OpExecError
12950 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12954 def CheckArguments(self):
12955 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12956 self.expandnames_calls = 0
12958 def ExpandNames(self):
12959 checkargs_calls = getattr(self, "checkargs_calls", 0)
12960 if checkargs_calls < 1:
12961 raise errors.ProgrammerError("CheckArguments was not called")
12963 self.expandnames_calls += 1
12965 if self.op.notify_waitlock:
12966 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12968 self.LogInfo("Expanding names")
12970 # Get lock on master node (just to get a lock, not for a particular reason)
12971 self.needed_locks = {
12972 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12975 def Exec(self, feedback_fn):
12976 if self.expandnames_calls < 1:
12977 raise errors.ProgrammerError("ExpandNames was not called")
12979 if self.op.notify_exec:
12980 self._Notify(False, constants.JQT_EXEC, None)
12982 self.LogInfo("Executing")
12984 if self.op.log_messages:
12985 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12986 for idx, msg in enumerate(self.op.log_messages):
12987 self.LogInfo("Sending log message %s", idx + 1)
12988 feedback_fn(constants.JQT_MSGPREFIX + msg)
12989 # Report how many test messages have been sent
12990 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12993 raise errors.OpExecError("Opcode failure was requested")
12998 class IAllocator(object):
12999 """IAllocator framework.
13001 An IAllocator instance has three sets of attributes:
13002 - cfg that is needed to query the cluster
13003 - input data (all members of the _KEYS class attribute are required)
13004 - four buffer attributes (in|out_data|text), that represent the
13005 input (to the external script) in text and data structure format,
13006 and the output from it, again in two formats
13007 - the result variables from the script (success, info, nodes) for
13011 # pylint: disable=R0902
13012 # lots of instance attributes
13014 def __init__(self, cfg, rpc, mode, **kwargs):
13017 # init buffer variables
13018 self.in_text = self.out_text = self.in_data = self.out_data = None
13019 # init all input fields so that pylint is happy
13021 self.memory = self.disks = self.disk_template = None
13022 self.os = self.tags = self.nics = self.vcpus = None
13023 self.hypervisor = None
13024 self.relocate_from = None
13026 self.instances = None
13027 self.evac_mode = None
13028 self.target_groups = []
13030 self.required_nodes = None
13031 # init result fields
13032 self.success = self.info = self.result = None
13035 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13037 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13038 " IAllocator" % self.mode)
13040 keyset = [n for (n, _) in keydata]
13043 if key not in keyset:
13044 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13045 " IAllocator" % key)
13046 setattr(self, key, kwargs[key])
13049 if key not in kwargs:
13050 raise errors.ProgrammerError("Missing input parameter '%s' to"
13051 " IAllocator" % key)
13052 self._BuildInputData(compat.partial(fn, self), keydata)
13054 def _ComputeClusterData(self):
13055 """Compute the generic allocator input data.
13057 This is the data that is independent of the actual operation.
13061 cluster_info = cfg.GetClusterInfo()
13064 "version": constants.IALLOCATOR_VERSION,
13065 "cluster_name": cfg.GetClusterName(),
13066 "cluster_tags": list(cluster_info.GetTags()),
13067 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13068 # we don't have job IDs
13070 ninfo = cfg.GetAllNodesInfo()
13071 iinfo = cfg.GetAllInstancesInfo().values()
13072 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13075 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13077 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13078 hypervisor_name = self.hypervisor
13079 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13080 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13082 hypervisor_name = cluster_info.enabled_hypervisors[0]
13084 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13087 self.rpc.call_all_instances_info(node_list,
13088 cluster_info.enabled_hypervisors)
13090 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13092 config_ndata = self._ComputeBasicNodeData(ninfo)
13093 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13094 i_list, config_ndata)
13095 assert len(data["nodes"]) == len(ninfo), \
13096 "Incomplete node data computed"
13098 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13100 self.in_data = data
13103 def _ComputeNodeGroupData(cfg):
13104 """Compute node groups data.
13107 ng = dict((guuid, {
13108 "name": gdata.name,
13109 "alloc_policy": gdata.alloc_policy,
13111 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13116 def _ComputeBasicNodeData(node_cfg):
13117 """Compute global node data.
13120 @returns: a dict of name: (node dict, node config)
13123 # fill in static (config-based) values
13124 node_results = dict((ninfo.name, {
13125 "tags": list(ninfo.GetTags()),
13126 "primary_ip": ninfo.primary_ip,
13127 "secondary_ip": ninfo.secondary_ip,
13128 "offline": ninfo.offline,
13129 "drained": ninfo.drained,
13130 "master_candidate": ninfo.master_candidate,
13131 "group": ninfo.group,
13132 "master_capable": ninfo.master_capable,
13133 "vm_capable": ninfo.vm_capable,
13135 for ninfo in node_cfg.values())
13137 return node_results
13140 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13142 """Compute global node data.
13144 @param node_results: the basic node structures as filled from the config
13147 # make a copy of the current dict
13148 node_results = dict(node_results)
13149 for nname, nresult in node_data.items():
13150 assert nname in node_results, "Missing basic data for node %s" % nname
13151 ninfo = node_cfg[nname]
13153 if not (ninfo.offline or ninfo.drained):
13154 nresult.Raise("Can't get data for node %s" % nname)
13155 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13157 remote_info = nresult.payload
13159 for attr in ["memory_total", "memory_free", "memory_dom0",
13160 "vg_size", "vg_free", "cpu_total"]:
13161 if attr not in remote_info:
13162 raise errors.OpExecError("Node '%s' didn't return attribute"
13163 " '%s'" % (nname, attr))
13164 if not isinstance(remote_info[attr], int):
13165 raise errors.OpExecError("Node '%s' returned invalid value"
13167 (nname, attr, remote_info[attr]))
13168 # compute memory used by primary instances
13169 i_p_mem = i_p_up_mem = 0
13170 for iinfo, beinfo in i_list:
13171 if iinfo.primary_node == nname:
13172 i_p_mem += beinfo[constants.BE_MEMORY]
13173 if iinfo.name not in node_iinfo[nname].payload:
13176 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13177 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13178 remote_info["memory_free"] -= max(0, i_mem_diff)
13181 i_p_up_mem += beinfo[constants.BE_MEMORY]
13183 # compute memory used by instances
13185 "total_memory": remote_info["memory_total"],
13186 "reserved_memory": remote_info["memory_dom0"],
13187 "free_memory": remote_info["memory_free"],
13188 "total_disk": remote_info["vg_size"],
13189 "free_disk": remote_info["vg_free"],
13190 "total_cpus": remote_info["cpu_total"],
13191 "i_pri_memory": i_p_mem,
13192 "i_pri_up_memory": i_p_up_mem,
13194 pnr_dyn.update(node_results[nname])
13195 node_results[nname] = pnr_dyn
13197 return node_results
13200 def _ComputeInstanceData(cluster_info, i_list):
13201 """Compute global instance data.
13205 for iinfo, beinfo in i_list:
13207 for nic in iinfo.nics:
13208 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13212 "mode": filled_params[constants.NIC_MODE],
13213 "link": filled_params[constants.NIC_LINK],
13215 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13216 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13217 nic_data.append(nic_dict)
13219 "tags": list(iinfo.GetTags()),
13220 "admin_up": iinfo.admin_up,
13221 "vcpus": beinfo[constants.BE_VCPUS],
13222 "memory": beinfo[constants.BE_MEMORY],
13224 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13226 "disks": [{constants.IDISK_SIZE: dsk.size,
13227 constants.IDISK_MODE: dsk.mode}
13228 for dsk in iinfo.disks],
13229 "disk_template": iinfo.disk_template,
13230 "hypervisor": iinfo.hypervisor,
13232 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13234 instance_data[iinfo.name] = pir
13236 return instance_data
13238 def _AddNewInstance(self):
13239 """Add new instance data to allocator structure.
13241 This in combination with _AllocatorGetClusterData will create the
13242 correct structure needed as input for the allocator.
13244 The checks for the completeness of the opcode must have already been
13248 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13250 if self.disk_template in constants.DTS_INT_MIRROR:
13251 self.required_nodes = 2
13253 self.required_nodes = 1
13257 "disk_template": self.disk_template,
13260 "vcpus": self.vcpus,
13261 "memory": self.memory,
13262 "disks": self.disks,
13263 "disk_space_total": disk_space,
13265 "required_nodes": self.required_nodes,
13266 "hypervisor": self.hypervisor,
13271 def _AddRelocateInstance(self):
13272 """Add relocate instance data to allocator structure.
13274 This in combination with _IAllocatorGetClusterData will create the
13275 correct structure needed as input for the allocator.
13277 The checks for the completeness of the opcode must have already been
13281 instance = self.cfg.GetInstanceInfo(self.name)
13282 if instance is None:
13283 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13284 " IAllocator" % self.name)
13286 if instance.disk_template not in constants.DTS_MIRRORED:
13287 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13288 errors.ECODE_INVAL)
13290 if instance.disk_template in constants.DTS_INT_MIRROR and \
13291 len(instance.secondary_nodes) != 1:
13292 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13293 errors.ECODE_STATE)
13295 self.required_nodes = 1
13296 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13297 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13301 "disk_space_total": disk_space,
13302 "required_nodes": self.required_nodes,
13303 "relocate_from": self.relocate_from,
13307 def _AddNodeEvacuate(self):
13308 """Get data for node-evacuate requests.
13312 "instances": self.instances,
13313 "evac_mode": self.evac_mode,
13316 def _AddChangeGroup(self):
13317 """Get data for node-evacuate requests.
13321 "instances": self.instances,
13322 "target_groups": self.target_groups,
13325 def _BuildInputData(self, fn, keydata):
13326 """Build input data structures.
13329 self._ComputeClusterData()
13332 request["type"] = self.mode
13333 for keyname, keytype in keydata:
13334 if keyname not in request:
13335 raise errors.ProgrammerError("Request parameter %s is missing" %
13337 val = request[keyname]
13338 if not keytype(val):
13339 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13340 " validation, value %s, expected"
13341 " type %s" % (keyname, val, keytype))
13342 self.in_data["request"] = request
13344 self.in_text = serializer.Dump(self.in_data)
13346 _STRING_LIST = ht.TListOf(ht.TString)
13347 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13348 # pylint: disable=E1101
13349 # Class '...' has no 'OP_ID' member
13350 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13351 opcodes.OpInstanceMigrate.OP_ID,
13352 opcodes.OpInstanceReplaceDisks.OP_ID])
13356 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13357 ht.TItems([ht.TNonEmptyString,
13358 ht.TNonEmptyString,
13359 ht.TListOf(ht.TNonEmptyString),
13362 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13363 ht.TItems([ht.TNonEmptyString,
13366 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13367 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13370 constants.IALLOCATOR_MODE_ALLOC:
13373 ("name", ht.TString),
13374 ("memory", ht.TInt),
13375 ("disks", ht.TListOf(ht.TDict)),
13376 ("disk_template", ht.TString),
13377 ("os", ht.TString),
13378 ("tags", _STRING_LIST),
13379 ("nics", ht.TListOf(ht.TDict)),
13380 ("vcpus", ht.TInt),
13381 ("hypervisor", ht.TString),
13383 constants.IALLOCATOR_MODE_RELOC:
13384 (_AddRelocateInstance,
13385 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13387 constants.IALLOCATOR_MODE_NODE_EVAC:
13388 (_AddNodeEvacuate, [
13389 ("instances", _STRING_LIST),
13390 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13392 constants.IALLOCATOR_MODE_CHG_GROUP:
13393 (_AddChangeGroup, [
13394 ("instances", _STRING_LIST),
13395 ("target_groups", _STRING_LIST),
13399 def Run(self, name, validate=True, call_fn=None):
13400 """Run an instance allocator and return the results.
13403 if call_fn is None:
13404 call_fn = self.rpc.call_iallocator_runner
13406 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13407 result.Raise("Failure while running the iallocator script")
13409 self.out_text = result.payload
13411 self._ValidateResult()
13413 def _ValidateResult(self):
13414 """Process the allocator results.
13416 This will process and if successful save the result in
13417 self.out_data and the other parameters.
13421 rdict = serializer.Load(self.out_text)
13422 except Exception, err:
13423 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13425 if not isinstance(rdict, dict):
13426 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13428 # TODO: remove backwards compatiblity in later versions
13429 if "nodes" in rdict and "result" not in rdict:
13430 rdict["result"] = rdict["nodes"]
13433 for key in "success", "info", "result":
13434 if key not in rdict:
13435 raise errors.OpExecError("Can't parse iallocator results:"
13436 " missing key '%s'" % key)
13437 setattr(self, key, rdict[key])
13439 if not self._result_check(self.result):
13440 raise errors.OpExecError("Iallocator returned invalid result,"
13441 " expected %s, got %s" %
13442 (self._result_check, self.result),
13443 errors.ECODE_INVAL)
13445 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13446 assert self.relocate_from is not None
13447 assert self.required_nodes == 1
13449 node2group = dict((name, ndata["group"])
13450 for (name, ndata) in self.in_data["nodes"].items())
13452 fn = compat.partial(self._NodesToGroups, node2group,
13453 self.in_data["nodegroups"])
13455 instance = self.cfg.GetInstanceInfo(self.name)
13456 request_groups = fn(self.relocate_from + [instance.primary_node])
13457 result_groups = fn(rdict["result"] + [instance.primary_node])
13459 if self.success and not set(result_groups).issubset(request_groups):
13460 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13461 " differ from original groups (%s)" %
13462 (utils.CommaJoin(result_groups),
13463 utils.CommaJoin(request_groups)))
13465 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13466 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13468 self.out_data = rdict
13471 def _NodesToGroups(node2group, groups, nodes):
13472 """Returns a list of unique group names for a list of nodes.
13474 @type node2group: dict
13475 @param node2group: Map from node name to group UUID
13477 @param groups: Group information
13479 @param nodes: Node names
13486 group_uuid = node2group[node]
13488 # Ignore unknown node
13492 group = groups[group_uuid]
13494 # Can't find group, let's use UUID
13495 group_name = group_uuid
13497 group_name = group["name"]
13499 result.add(group_name)
13501 return sorted(result)
13504 class LUTestAllocator(NoHooksLU):
13505 """Run allocator tests.
13507 This LU runs the allocator tests
13510 def CheckPrereq(self):
13511 """Check prerequisites.
13513 This checks the opcode parameters depending on the director and mode test.
13516 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13517 for attr in ["memory", "disks", "disk_template",
13518 "os", "tags", "nics", "vcpus"]:
13519 if not hasattr(self.op, attr):
13520 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13521 attr, errors.ECODE_INVAL)
13522 iname = self.cfg.ExpandInstanceName(self.op.name)
13523 if iname is not None:
13524 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13525 iname, errors.ECODE_EXISTS)
13526 if not isinstance(self.op.nics, list):
13527 raise errors.OpPrereqError("Invalid parameter 'nics'",
13528 errors.ECODE_INVAL)
13529 if not isinstance(self.op.disks, list):
13530 raise errors.OpPrereqError("Invalid parameter 'disks'",
13531 errors.ECODE_INVAL)
13532 for row in self.op.disks:
13533 if (not isinstance(row, dict) or
13534 constants.IDISK_SIZE not in row or
13535 not isinstance(row[constants.IDISK_SIZE], int) or
13536 constants.IDISK_MODE not in row or
13537 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13538 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13539 " parameter", errors.ECODE_INVAL)
13540 if self.op.hypervisor is None:
13541 self.op.hypervisor = self.cfg.GetHypervisorType()
13542 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13543 fname = _ExpandInstanceName(self.cfg, self.op.name)
13544 self.op.name = fname
13545 self.relocate_from = \
13546 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13547 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13548 constants.IALLOCATOR_MODE_NODE_EVAC):
13549 if not self.op.instances:
13550 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13551 self.op.instances = _GetWantedInstances(self, self.op.instances)
13553 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13554 self.op.mode, errors.ECODE_INVAL)
13556 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13557 if self.op.allocator is None:
13558 raise errors.OpPrereqError("Missing allocator name",
13559 errors.ECODE_INVAL)
13560 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13561 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13562 self.op.direction, errors.ECODE_INVAL)
13564 def Exec(self, feedback_fn):
13565 """Run the allocator test.
13568 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13569 ial = IAllocator(self.cfg, self.rpc,
13572 memory=self.op.memory,
13573 disks=self.op.disks,
13574 disk_template=self.op.disk_template,
13578 vcpus=self.op.vcpus,
13579 hypervisor=self.op.hypervisor,
13581 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13582 ial = IAllocator(self.cfg, self.rpc,
13585 relocate_from=list(self.relocate_from),
13587 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13588 ial = IAllocator(self.cfg, self.rpc,
13590 instances=self.op.instances,
13591 target_groups=self.op.target_groups)
13592 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13593 ial = IAllocator(self.cfg, self.rpc,
13595 instances=self.op.instances,
13596 evac_mode=self.op.evac_mode)
13598 raise errors.ProgrammerError("Uncatched mode %s in"
13599 " LUTestAllocator.Exec", self.op.mode)
13601 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13602 result = ial.in_text
13604 ial.Run(self.op.allocator, validate=False)
13605 result = ial.out_text
13609 #: Query type implementations
13611 constants.QR_INSTANCE: _InstanceQuery,
13612 constants.QR_NODE: _NodeQuery,
13613 constants.QR_GROUP: _GroupQuery,
13614 constants.QR_OS: _OsQuery,
13617 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13620 def _GetQueryImplementation(name):
13621 """Returns the implemtnation for a query type.
13623 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13627 return _QUERY_IMPL[name]
13629 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13630 errors.ECODE_INVAL)