4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable=C0103
137 self.LogStep = processor.LogStep # pylint: disable=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587 """Checks if the instances in a node group are still correct.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type group_uuid: string
592 @param group_uuid: Node group UUID
593 @type owned_instances: set or frozenset
594 @param owned_instances: List of currently owned instances
597 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598 if owned_instances != wanted_instances:
599 raise errors.OpPrereqError("Instances in node group '%s' changed since"
600 " locks were acquired, wanted '%s', have '%s';"
601 " retry the operation" %
603 utils.CommaJoin(wanted_instances),
604 utils.CommaJoin(owned_instances)),
607 return wanted_instances
610 def _SupportsOob(cfg, node):
611 """Tells if node supports OOB.
613 @type cfg: L{config.ConfigWriter}
614 @param cfg: The cluster configuration
615 @type node: L{objects.Node}
616 @param node: The node
617 @return: The OOB script if supported or an empty string otherwise
620 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
623 def _GetWantedNodes(lu, nodes):
624 """Returns list of checked and expanded node names.
626 @type lu: L{LogicalUnit}
627 @param lu: the logical unit on whose behalf we execute
629 @param nodes: list of node names or None for all nodes
631 @return: the list of nodes, sorted
632 @raise errors.ProgrammerError: if the nodes parameter is wrong type
636 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
638 return utils.NiceSort(lu.cfg.GetNodeList())
641 def _GetWantedInstances(lu, instances):
642 """Returns list of checked and expanded instance names.
644 @type lu: L{LogicalUnit}
645 @param lu: the logical unit on whose behalf we execute
646 @type instances: list
647 @param instances: list of instance names or None for all instances
649 @return: the list of instances, sorted
650 @raise errors.OpPrereqError: if the instances parameter is wrong type
651 @raise errors.OpPrereqError: if any of the passed instances is not found
655 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
657 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
661 def _GetUpdatedParams(old_params, update_dict,
662 use_default=True, use_none=False):
663 """Return the new version of a parameter dictionary.
665 @type old_params: dict
666 @param old_params: old parameters
667 @type update_dict: dict
668 @param update_dict: dict containing new parameter values, or
669 constants.VALUE_DEFAULT to reset the parameter to its default
671 @param use_default: boolean
672 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673 values as 'to be deleted' values
674 @param use_none: boolean
675 @type use_none: whether to recognise C{None} values as 'to be
678 @return: the new parameter dictionary
681 params_copy = copy.deepcopy(old_params)
682 for key, val in update_dict.iteritems():
683 if ((use_default and val == constants.VALUE_DEFAULT) or
684 (use_none and val is None)):
690 params_copy[key] = val
694 def _ReleaseLocks(lu, level, names=None, keep=None):
695 """Releases locks owned by an LU.
697 @type lu: L{LogicalUnit}
698 @param level: Lock level
699 @type names: list or None
700 @param names: Names of locks to release
701 @type keep: list or None
702 @param keep: Names of locks to retain
705 assert not (keep is not None and names is not None), \
706 "Only one of the 'names' and the 'keep' parameters can be given"
708 if names is not None:
709 should_release = names.__contains__
711 should_release = lambda name: name not in keep
713 should_release = None
719 # Determine which locks to release
720 for name in lu.owned_locks(level):
721 if should_release(name):
726 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
728 # Release just some locks
729 lu.glm.release(level, names=release)
731 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
734 lu.glm.release(level)
736 assert not lu.glm.is_owned(level), "No locks should be owned"
739 def _MapInstanceDisksToNodes(instances):
740 """Creates a map from (node, volume) to instance name.
742 @type instances: list of L{objects.Instance}
743 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
746 return dict(((node, vol), inst.name)
747 for inst in instances
748 for (node, vols) in inst.MapLVsByNode().items()
752 def _RunPostHook(lu, node_name):
753 """Runs the post-hook for an opcode on a single node.
756 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
758 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
760 # pylint: disable=W0702
761 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
764 def _CheckOutputFields(static, dynamic, selected):
765 """Checks whether all selected fields are valid.
767 @type static: L{utils.FieldSet}
768 @param static: static fields set
769 @type dynamic: L{utils.FieldSet}
770 @param dynamic: dynamic fields set
777 delta = f.NonMatching(selected)
779 raise errors.OpPrereqError("Unknown output fields selected: %s"
780 % ",".join(delta), errors.ECODE_INVAL)
783 def _CheckGlobalHvParams(params):
784 """Validates that given hypervisor params are not global ones.
786 This will ensure that instances don't get customised versions of
790 used_globals = constants.HVC_GLOBALS.intersection(params)
792 msg = ("The following hypervisor parameters are global and cannot"
793 " be customized at instance level, please modify them at"
794 " cluster level: %s" % utils.CommaJoin(used_globals))
795 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
798 def _CheckNodeOnline(lu, node, msg=None):
799 """Ensure that a given node is online.
801 @param lu: the LU on behalf of which we make the check
802 @param node: the node to check
803 @param msg: if passed, should be a message to replace the default one
804 @raise errors.OpPrereqError: if the node is offline
808 msg = "Can't use offline node"
809 if lu.cfg.GetNodeInfo(node).offline:
810 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
813 def _CheckNodeNotDrained(lu, node):
814 """Ensure that a given node is not drained.
816 @param lu: the LU on behalf of which we make the check
817 @param node: the node to check
818 @raise errors.OpPrereqError: if the node is drained
821 if lu.cfg.GetNodeInfo(node).drained:
822 raise errors.OpPrereqError("Can't use drained node %s" % node,
826 def _CheckNodeVmCapable(lu, node):
827 """Ensure that a given node is vm capable.
829 @param lu: the LU on behalf of which we make the check
830 @param node: the node to check
831 @raise errors.OpPrereqError: if the node is not vm capable
834 if not lu.cfg.GetNodeInfo(node).vm_capable:
835 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
840 """Ensure that a node supports a given OS.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @param os_name: the OS to query about
845 @param force_variant: whether to ignore variant errors
846 @raise errors.OpPrereqError: if the node is not supporting the OS
849 result = lu.rpc.call_os_get(node, os_name)
850 result.Raise("OS '%s' not in supported OS list for node %s" %
852 prereq=True, ecode=errors.ECODE_INVAL)
853 if not force_variant:
854 _CheckOSVariant(result.payload, os_name)
857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858 """Ensure that a node has the given secondary ip.
860 @type lu: L{LogicalUnit}
861 @param lu: the LU on behalf of which we make the check
863 @param node: the node to check
864 @type secondary_ip: string
865 @param secondary_ip: the ip to check
866 @type prereq: boolean
867 @param prereq: whether to throw a prerequisite or an execute error
868 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
872 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873 result.Raise("Failure checking secondary ip on node %s" % node,
874 prereq=prereq, ecode=errors.ECODE_ENVIRON)
875 if not result.payload:
876 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877 " please fix and re-run this command" % secondary_ip)
879 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
881 raise errors.OpExecError(msg)
884 def _GetClusterDomainSecret():
885 """Reads the cluster domain secret.
888 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
892 def _CheckInstanceDown(lu, instance, reason):
893 """Ensure that an instance is not running."""
894 if instance.admin_up:
895 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896 (instance.name, reason), errors.ECODE_STATE)
898 pnode = instance.primary_node
899 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900 ins_l.Raise("Can't contact node %s for instance information" % pnode,
901 prereq=True, ecode=errors.ECODE_ENVIRON)
903 if instance.name in ins_l.payload:
904 raise errors.OpPrereqError("Instance %s is running, %s" %
905 (instance.name, reason), errors.ECODE_STATE)
908 def _ExpandItemName(fn, name, kind):
909 """Expand an item name.
911 @param fn: the function to use for expansion
912 @param name: requested item name
913 @param kind: text description ('Node' or 'Instance')
914 @return: the resolved (full) name
915 @raise errors.OpPrereqError: if the item is not found
919 if full_name is None:
920 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925 def _ExpandNodeName(cfg, name):
926 """Wrapper over L{_ExpandItemName} for nodes."""
927 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
930 def _ExpandInstanceName(cfg, name):
931 """Wrapper over L{_ExpandItemName} for instance."""
932 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936 memory, vcpus, nics, disk_template, disks,
937 bep, hvp, hypervisor_name, tags):
938 """Builds instance related env variables for hooks
940 This builds the hook environment from individual variables.
943 @param name: the name of the instance
944 @type primary_node: string
945 @param primary_node: the name of the instance's primary node
946 @type secondary_nodes: list
947 @param secondary_nodes: list of secondary nodes as strings
948 @type os_type: string
949 @param os_type: the name of the instance's OS
950 @type status: boolean
951 @param status: the should_run status of the instance
953 @param memory: the memory size of the instance
955 @param vcpus: the count of VCPUs the instance has
957 @param nics: list of tuples (ip, mac, mode, link) representing
958 the NICs the instance has
959 @type disk_template: string
960 @param disk_template: the disk template of the instance
962 @param disks: the list of (size, mode) pairs
964 @param bep: the backend parameters for the instance
966 @param hvp: the hypervisor parameters for the instance
967 @type hypervisor_name: string
968 @param hypervisor_name: the hypervisor for the instance
970 @param tags: list of instance tags as strings
972 @return: the hook environment for this instance
981 "INSTANCE_NAME": name,
982 "INSTANCE_PRIMARY": primary_node,
983 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984 "INSTANCE_OS_TYPE": os_type,
985 "INSTANCE_STATUS": str_status,
986 "INSTANCE_MEMORY": memory,
987 "INSTANCE_VCPUS": vcpus,
988 "INSTANCE_DISK_TEMPLATE": disk_template,
989 "INSTANCE_HYPERVISOR": hypervisor_name,
993 nic_count = len(nics)
994 for idx, (ip, mac, mode, link) in enumerate(nics):
997 env["INSTANCE_NIC%d_IP" % idx] = ip
998 env["INSTANCE_NIC%d_MAC" % idx] = mac
999 env["INSTANCE_NIC%d_MODE" % idx] = mode
1000 env["INSTANCE_NIC%d_LINK" % idx] = link
1001 if mode == constants.NIC_MODE_BRIDGED:
1002 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1006 env["INSTANCE_NIC_COUNT"] = nic_count
1009 disk_count = len(disks)
1010 for idx, (size, mode) in enumerate(disks):
1011 env["INSTANCE_DISK%d_SIZE" % idx] = size
1012 env["INSTANCE_DISK%d_MODE" % idx] = mode
1016 env["INSTANCE_DISK_COUNT"] = disk_count
1021 env["INSTANCE_TAGS"] = " ".join(tags)
1023 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024 for key, value in source.items():
1025 env["INSTANCE_%s_%s" % (kind, key)] = value
1030 def _NICListToTuple(lu, nics):
1031 """Build a list of nic information tuples.
1033 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034 value in LUInstanceQueryData.
1036 @type lu: L{LogicalUnit}
1037 @param lu: the logical unit on whose behalf we execute
1038 @type nics: list of L{objects.NIC}
1039 @param nics: list of nics to convert to hooks tuples
1043 cluster = lu.cfg.GetClusterInfo()
1047 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048 mode = filled_params[constants.NIC_MODE]
1049 link = filled_params[constants.NIC_LINK]
1050 hooks_nics.append((ip, mac, mode, link))
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055 """Builds instance related env variables for hooks from an object.
1057 @type lu: L{LogicalUnit}
1058 @param lu: the logical unit on whose behalf we execute
1059 @type instance: L{objects.Instance}
1060 @param instance: the instance for which we should build the
1062 @type override: dict
1063 @param override: dictionary with key/values that will override
1066 @return: the hook environment dictionary
1069 cluster = lu.cfg.GetClusterInfo()
1070 bep = cluster.FillBE(instance)
1071 hvp = cluster.FillHV(instance)
1073 "name": instance.name,
1074 "primary_node": instance.primary_node,
1075 "secondary_nodes": instance.secondary_nodes,
1076 "os_type": instance.os,
1077 "status": instance.admin_up,
1078 "memory": bep[constants.BE_MEMORY],
1079 "vcpus": bep[constants.BE_VCPUS],
1080 "nics": _NICListToTuple(lu, instance.nics),
1081 "disk_template": instance.disk_template,
1082 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1085 "hypervisor_name": instance.hypervisor,
1086 "tags": instance.tags,
1089 args.update(override)
1090 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1093 def _AdjustCandidatePool(lu, exceptions):
1094 """Adjust the candidate pool after node operations.
1097 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1099 lu.LogInfo("Promoted nodes to master candidate role: %s",
1100 utils.CommaJoin(node.name for node in mod_list))
1101 for name in mod_list:
1102 lu.context.ReaddNode(name)
1103 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1105 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110 """Decide whether I should promote myself as a master candidate.
1113 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115 # the new node will increase mc_max with one, so:
1116 mc_should = min(mc_should + 1, cp_size)
1117 return mc_now < mc_should
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121 """Check that the brigdes needed by a list of nics exist.
1124 cluster = lu.cfg.GetClusterInfo()
1125 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126 brlist = [params[constants.NIC_LINK] for params in paramslist
1127 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1129 result = lu.rpc.call_bridges_exist(target_node, brlist)
1130 result.Raise("Error checking bridges on destination node '%s'" %
1131 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135 """Check that the brigdes needed by an instance exist.
1139 node = instance.primary_node
1140 _CheckNicsBridgesExist(lu, instance.nics, node)
1143 def _CheckOSVariant(os_obj, name):
1144 """Check whether an OS name conforms to the os variants specification.
1146 @type os_obj: L{objects.OS}
1147 @param os_obj: OS object to check
1149 @param name: OS name passed by the user, to check for validity
1152 variant = objects.OS.GetVariant(name)
1153 if not os_obj.supported_variants:
1155 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156 " passed)" % (os_obj.name, variant),
1160 raise errors.OpPrereqError("OS name must include a variant",
1163 if variant not in os_obj.supported_variants:
1164 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1167 def _GetNodeInstancesInner(cfg, fn):
1168 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1171 def _GetNodeInstances(cfg, node_name):
1172 """Returns a list of all primary and secondary instances on a node.
1176 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180 """Returns primary instances on a node.
1183 return _GetNodeInstancesInner(cfg,
1184 lambda inst: node_name == inst.primary_node)
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188 """Returns secondary instances on a node.
1191 return _GetNodeInstancesInner(cfg,
1192 lambda inst: node_name in inst.secondary_nodes)
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196 """Returns the arguments for a storage type.
1199 # Special case for file storage
1200 if storage_type == constants.ST_FILE:
1201 # storage.FileStorage wants a list of storage directories
1202 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1210 for dev in instance.disks:
1211 cfg.SetDiskID(dev, node_name)
1213 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214 result.Raise("Failed to get disk status from node %s" % node_name,
1215 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1217 for idx, bdev_status in enumerate(result.payload):
1218 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225 """Check the sanity of iallocator and node arguments and use the
1226 cluster-wide iallocator if appropriate.
1228 Check that at most one of (iallocator, node) is specified. If none is
1229 specified, then the LU's opcode's iallocator slot is filled with the
1230 cluster-wide default iallocator.
1232 @type iallocator_slot: string
1233 @param iallocator_slot: the name of the opcode iallocator slot
1234 @type node_slot: string
1235 @param node_slot: the name of the opcode target node slot
1238 node = getattr(lu.op, node_slot, None)
1239 iallocator = getattr(lu.op, iallocator_slot, None)
1241 if node is not None and iallocator is not None:
1242 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1244 elif node is None and iallocator is None:
1245 default_iallocator = lu.cfg.GetDefaultIAllocator()
1246 if default_iallocator:
1247 setattr(lu.op, iallocator_slot, default_iallocator)
1249 raise errors.OpPrereqError("No iallocator or node given and no"
1250 " cluster-wide default iallocator found;"
1251 " please specify either an iallocator or a"
1252 " node, or set a cluster-wide default"
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257 """Decides on which iallocator to use.
1259 @type cfg: L{config.ConfigWriter}
1260 @param cfg: Cluster configuration object
1261 @type iallocator: string or None
1262 @param iallocator: Iallocator specified in opcode
1264 @return: Iallocator name
1268 # Use default iallocator
1269 iallocator = cfg.GetDefaultIAllocator()
1272 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273 " opcode nor as a cluster-wide default",
1279 class LUClusterPostInit(LogicalUnit):
1280 """Logical unit for running hooks after cluster initialization.
1283 HPATH = "cluster-init"
1284 HTYPE = constants.HTYPE_CLUSTER
1286 def BuildHooksEnv(self):
1291 "OP_TARGET": self.cfg.GetClusterName(),
1294 def BuildHooksNodes(self):
1295 """Build hooks nodes.
1298 return ([], [self.cfg.GetMasterNode()])
1300 def Exec(self, feedback_fn):
1307 class LUClusterDestroy(LogicalUnit):
1308 """Logical unit for destroying the cluster.
1311 HPATH = "cluster-destroy"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1328 def CheckPrereq(self):
1329 """Check prerequisites.
1331 This checks whether the cluster is empty.
1333 Any errors are signaled by raising errors.OpPrereqError.
1336 master = self.cfg.GetMasterNode()
1338 nodelist = self.cfg.GetNodeList()
1339 if len(nodelist) != 1 or nodelist[0] != master:
1340 raise errors.OpPrereqError("There are still %d node(s) in"
1341 " this cluster." % (len(nodelist) - 1),
1343 instancelist = self.cfg.GetInstanceList()
1345 raise errors.OpPrereqError("There are still %d instance(s) in"
1346 " this cluster." % len(instancelist),
1349 def Exec(self, feedback_fn):
1350 """Destroys the cluster.
1353 master = self.cfg.GetMasterNode()
1355 # Run post hooks on master node before it's removed
1356 _RunPostHook(self, master)
1358 result = self.rpc.call_node_stop_master(master, False)
1359 result.Raise("Could not disable the master role")
1364 def _VerifyCertificate(filename):
1365 """Verifies a certificate for L{LUClusterVerifyConfig}.
1367 @type filename: string
1368 @param filename: Path to PEM file
1372 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373 utils.ReadFile(filename))
1374 except Exception, err: # pylint: disable=W0703
1375 return (LUClusterVerifyConfig.ETYPE_ERROR,
1376 "Failed to load X509 certificate %s: %s" % (filename, err))
1379 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380 constants.SSL_CERT_EXPIRATION_ERROR)
1383 fnamemsg = "While verifying %s: %s" % (filename, msg)
1388 return (None, fnamemsg)
1389 elif errcode == utils.CERT_WARNING:
1390 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391 elif errcode == utils.CERT_ERROR:
1392 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1394 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1397 def _GetAllHypervisorParameters(cluster, instances):
1398 """Compute the set of all hypervisor parameters.
1400 @type cluster: L{objects.Cluster}
1401 @param cluster: the cluster object
1402 @param instances: list of L{objects.Instance}
1403 @param instances: additional instances from which to obtain parameters
1404 @rtype: list of (origin, hypervisor, parameters)
1405 @return: a list with all parameters found, indicating the hypervisor they
1406 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411 for hv_name in cluster.enabled_hypervisors:
1412 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1414 for os_name, os_hvp in cluster.os_hvp.items():
1415 for hv_name, hv_params in os_hvp.items():
1417 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1420 # TODO: collapse identical parameter values in a single one
1421 for instance in instances:
1422 if instance.hvparams:
1423 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424 cluster.FillHV(instance)))
1429 class _VerifyErrors(object):
1430 """Mix-in for cluster/group verify LUs.
1432 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433 self.op and self._feedback_fn to be available.)
1436 TCLUSTER = "cluster"
1438 TINSTANCE = "instance"
1440 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452 ENODEDRBD = (TNODE, "ENODEDRBD")
1453 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456 ENODEHV = (TNODE, "ENODEHV")
1457 ENODELVM = (TNODE, "ENODELVM")
1458 ENODEN1 = (TNODE, "ENODEN1")
1459 ENODENET = (TNODE, "ENODENET")
1460 ENODEOS = (TNODE, "ENODEOS")
1461 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463 ENODERPC = (TNODE, "ENODERPC")
1464 ENODESSH = (TNODE, "ENODESSH")
1465 ENODEVERSION = (TNODE, "ENODEVERSION")
1466 ENODESETUP = (TNODE, "ENODESETUP")
1467 ENODETIME = (TNODE, "ENODETIME")
1468 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1470 ETYPE_FIELD = "code"
1471 ETYPE_ERROR = "ERROR"
1472 ETYPE_WARNING = "WARNING"
1474 def _Error(self, ecode, item, msg, *args, **kwargs):
1475 """Format an error message.
1477 Based on the opcode's error_codes parameter, either format a
1478 parseable error code, or a simpler error string.
1480 This must be called only from Exec and functions called from Exec.
1483 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1485 # first complete the msg
1488 # then format the whole message
1489 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1496 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497 # and finally report it via the feedback_fn
1498 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1500 def _ErrorIf(self, cond, *args, **kwargs):
1501 """Log an error message if the passed condition is True.
1505 or self.op.debug_simulate_errors) # pylint: disable=E1101
1507 self._Error(*args, **kwargs)
1508 # do not mark the operation as failed for WARN cases only
1509 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510 self.bad = self.bad or cond
1513 class LUClusterVerify(NoHooksLU):
1514 """Submits all jobs necessary to verify the cluster.
1519 def ExpandNames(self):
1520 self.needed_locks = {}
1522 def Exec(self, feedback_fn):
1525 if self.op.group_name:
1526 groups = [self.op.group_name]
1527 depends_fn = lambda: None
1529 groups = self.cfg.GetNodeGroupList()
1531 # Verify global configuration
1532 jobs.append([opcodes.OpClusterVerifyConfig()])
1534 # Always depend on global verification
1535 depends_fn = lambda: [(-len(jobs), [])]
1537 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538 depends=depends_fn())]
1539 for group in groups)
1541 # Fix up all parameters
1542 for op in itertools.chain(*jobs): # pylint: disable=W0142
1543 op.debug_simulate_errors = self.op.debug_simulate_errors
1544 op.verbose = self.op.verbose
1545 op.error_codes = self.op.error_codes
1547 op.skip_checks = self.op.skip_checks
1548 except AttributeError:
1549 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1551 return ResultWithJobs(jobs)
1554 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555 """Verifies the cluster config.
1560 def _VerifyHVP(self, hvp_data):
1561 """Verifies locally the syntax of the hypervisor parameters.
1564 for item, hv_name, hv_params in hvp_data:
1565 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1568 hv_class = hypervisor.GetHypervisor(hv_name)
1569 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570 hv_class.CheckParameterSyntax(hv_params)
1571 except errors.GenericError, err:
1572 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1574 def ExpandNames(self):
1575 # Information can be safely retrieved as the BGL is acquired in exclusive
1577 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579 self.all_node_info = self.cfg.GetAllNodesInfo()
1580 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581 self.needed_locks = {}
1583 def Exec(self, feedback_fn):
1584 """Verify integrity of cluster, performing various test on nodes.
1588 self._feedback_fn = feedback_fn
1590 feedback_fn("* Verifying cluster config")
1592 for msg in self.cfg.VerifyConfig():
1593 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1595 feedback_fn("* Verifying cluster certificate files")
1597 for cert_filename in constants.ALL_CERT_FILES:
1598 (errcode, msg) = _VerifyCertificate(cert_filename)
1599 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1601 feedback_fn("* Verifying hypervisor parameters")
1603 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604 self.all_inst_info.values()))
1606 feedback_fn("* Verifying all nodes belong to an existing group")
1608 # We do this verification here because, should this bogus circumstance
1609 # occur, it would never be caught by VerifyGroup, which only acts on
1610 # nodes/instances reachable from existing node groups.
1612 dangling_nodes = set(node.name for node in self.all_node_info.values()
1613 if node.group not in self.all_group_info)
1615 dangling_instances = {}
1616 no_node_instances = []
1618 for inst in self.all_inst_info.values():
1619 if inst.primary_node in dangling_nodes:
1620 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621 elif inst.primary_node not in self.all_node_info:
1622 no_node_instances.append(inst.name)
1627 utils.CommaJoin(dangling_instances.get(node.name,
1629 for node in dangling_nodes]
1631 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632 "the following nodes (and their instances) belong to a non"
1633 " existing group: %s", utils.CommaJoin(pretty_dangling))
1635 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636 "the following instances have a non-existing primary-node:"
1637 " %s", utils.CommaJoin(no_node_instances))
1642 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643 """Verifies the status of a node group.
1646 HPATH = "cluster-verify"
1647 HTYPE = constants.HTYPE_CLUSTER
1650 _HOOKS_INDENT_RE = re.compile("^", re.M)
1652 class NodeImage(object):
1653 """A class representing the logical and physical status of a node.
1656 @ivar name: the node name to which this object refers
1657 @ivar volumes: a structure as returned from
1658 L{ganeti.backend.GetVolumeList} (runtime)
1659 @ivar instances: a list of running instances (runtime)
1660 @ivar pinst: list of configured primary instances (config)
1661 @ivar sinst: list of configured secondary instances (config)
1662 @ivar sbp: dictionary of {primary-node: list of instances} for all
1663 instances for which this node is secondary (config)
1664 @ivar mfree: free memory, as reported by hypervisor (runtime)
1665 @ivar dfree: free disk, as reported by the node (runtime)
1666 @ivar offline: the offline status (config)
1667 @type rpc_fail: boolean
1668 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669 not whether the individual keys were correct) (runtime)
1670 @type lvm_fail: boolean
1671 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672 @type hyp_fail: boolean
1673 @ivar hyp_fail: whether the RPC call didn't return the instance list
1674 @type ghost: boolean
1675 @ivar ghost: whether this is a known node or not (config)
1676 @type os_fail: boolean
1677 @ivar os_fail: whether the RPC call didn't return valid OS data
1679 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680 @type vm_capable: boolean
1681 @ivar vm_capable: whether the node can host instances
1684 def __init__(self, offline=False, name=None, vm_capable=True):
1693 self.offline = offline
1694 self.vm_capable = vm_capable
1695 self.rpc_fail = False
1696 self.lvm_fail = False
1697 self.hyp_fail = False
1699 self.os_fail = False
1702 def ExpandNames(self):
1703 # This raises errors.OpPrereqError on its own:
1704 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1706 # Get instances in node group; this is unsafe and needs verification later
1707 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1709 self.needed_locks = {
1710 locking.LEVEL_INSTANCE: inst_names,
1711 locking.LEVEL_NODEGROUP: [self.group_uuid],
1712 locking.LEVEL_NODE: [],
1715 self.share_locks = _ShareAll()
1717 def DeclareLocks(self, level):
1718 if level == locking.LEVEL_NODE:
1719 # Get members of node group; this is unsafe and needs verification later
1720 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722 all_inst_info = self.cfg.GetAllInstancesInfo()
1724 # In Exec(), we warn about mirrored instances that have primary and
1725 # secondary living in separate node groups. To fully verify that
1726 # volumes for these instances are healthy, we will need to do an
1727 # extra call to their secondaries. We ensure here those nodes will
1729 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730 # Important: access only the instances whose lock is owned
1731 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732 nodes.update(all_inst_info[inst].secondary_nodes)
1734 self.needed_locks[locking.LEVEL_NODE] = nodes
1736 def CheckPrereq(self):
1737 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740 group_nodes = set(self.group_info.members)
1741 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1744 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1746 unlocked_instances = \
1747 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751 utils.CommaJoin(unlocked_nodes))
1753 if unlocked_instances:
1754 raise errors.OpPrereqError("Missing lock for instances: %s" %
1755 utils.CommaJoin(unlocked_instances))
1757 self.all_node_info = self.cfg.GetAllNodesInfo()
1758 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1760 self.my_node_names = utils.NiceSort(group_nodes)
1761 self.my_inst_names = utils.NiceSort(group_instances)
1763 self.my_node_info = dict((name, self.all_node_info[name])
1764 for name in self.my_node_names)
1766 self.my_inst_info = dict((name, self.all_inst_info[name])
1767 for name in self.my_inst_names)
1769 # We detect here the nodes that will need the extra RPC calls for verifying
1770 # split LV volumes; they should be locked.
1771 extra_lv_nodes = set()
1773 for inst in self.my_inst_info.values():
1774 if inst.disk_template in constants.DTS_INT_MIRROR:
1775 group = self.my_node_info[inst.primary_node].group
1776 for nname in inst.secondary_nodes:
1777 if self.all_node_info[nname].group != group:
1778 extra_lv_nodes.add(nname)
1780 unlocked_lv_nodes = \
1781 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1783 if unlocked_lv_nodes:
1784 raise errors.OpPrereqError("these nodes could be locked: %s" %
1785 utils.CommaJoin(unlocked_lv_nodes))
1786 self.extra_lv_nodes = list(extra_lv_nodes)
1788 def _VerifyNode(self, ninfo, nresult):
1789 """Perform some basic validation on data returned from a node.
1791 - check the result data structure is well formed and has all the
1793 - check ganeti version
1795 @type ninfo: L{objects.Node}
1796 @param ninfo: the node to check
1797 @param nresult: the results from the node
1799 @return: whether overall this call was successful (and we can expect
1800 reasonable values in the respose)
1804 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1806 # main result, nresult should be a non-empty dict
1807 test = not nresult or not isinstance(nresult, dict)
1808 _ErrorIf(test, self.ENODERPC, node,
1809 "unable to verify node: no data returned")
1813 # compares ganeti version
1814 local_version = constants.PROTOCOL_VERSION
1815 remote_version = nresult.get("version", None)
1816 test = not (remote_version and
1817 isinstance(remote_version, (list, tuple)) and
1818 len(remote_version) == 2)
1819 _ErrorIf(test, self.ENODERPC, node,
1820 "connection to node returned invalid data")
1824 test = local_version != remote_version[0]
1825 _ErrorIf(test, self.ENODEVERSION, node,
1826 "incompatible protocol versions: master %s,"
1827 " node %s", local_version, remote_version[0])
1831 # node seems compatible, we can actually try to look into its results
1833 # full package version
1834 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835 self.ENODEVERSION, node,
1836 "software version mismatch: master %s, node %s",
1837 constants.RELEASE_VERSION, remote_version[1],
1838 code=self.ETYPE_WARNING)
1840 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841 if ninfo.vm_capable and isinstance(hyp_result, dict):
1842 for hv_name, hv_result in hyp_result.iteritems():
1843 test = hv_result is not None
1844 _ErrorIf(test, self.ENODEHV, node,
1845 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1847 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848 if ninfo.vm_capable and isinstance(hvp_result, list):
1849 for item, hv_name, hv_result in hvp_result:
1850 _ErrorIf(True, self.ENODEHV, node,
1851 "hypervisor %s parameter verify failure (source %s): %s",
1852 hv_name, item, hv_result)
1854 test = nresult.get(constants.NV_NODESETUP,
1855 ["Missing NODESETUP results"])
1856 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861 def _VerifyNodeTime(self, ninfo, nresult,
1862 nvinfo_starttime, nvinfo_endtime):
1863 """Check the node time.
1865 @type ninfo: L{objects.Node}
1866 @param ninfo: the node to check
1867 @param nresult: the remote results for the node
1868 @param nvinfo_starttime: the start time of the RPC call
1869 @param nvinfo_endtime: the end time of the RPC call
1873 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1875 ntime = nresult.get(constants.NV_TIME, None)
1877 ntime_merged = utils.MergeTime(ntime)
1878 except (ValueError, TypeError):
1879 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1882 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1889 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890 "Node time diverges by at least %s from master node time",
1893 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894 """Check the node LVM results.
1896 @type ninfo: L{objects.Node}
1897 @param ninfo: the node to check
1898 @param nresult: the remote results for the node
1899 @param vg_name: the configured VG name
1906 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1908 # checks vg existence and size > 20G
1909 vglist = nresult.get(constants.NV_VGLIST, None)
1911 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1913 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914 constants.MIN_VG_SIZE)
1915 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1918 pvlist = nresult.get(constants.NV_PVLIST, None)
1919 test = pvlist is None
1920 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1922 # check that ':' is not present in PV names, since it's a
1923 # special character for lvcreate (denotes the range of PEs to
1925 for _, pvname, owner_vg in pvlist:
1926 test = ":" in pvname
1927 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928 " '%s' of VG '%s'", pvname, owner_vg)
1930 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931 """Check the node bridges.
1933 @type ninfo: L{objects.Node}
1934 @param ninfo: the node to check
1935 @param nresult: the remote results for the node
1936 @param bridges: the expected list of bridges
1943 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1945 missing = nresult.get(constants.NV_BRIDGES, None)
1946 test = not isinstance(missing, list)
1947 _ErrorIf(test, self.ENODENET, node,
1948 "did not return valid bridge information")
1950 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951 utils.CommaJoin(sorted(missing)))
1953 def _VerifyNodeNetwork(self, ninfo, nresult):
1954 """Check the node network connectivity results.
1956 @type ninfo: L{objects.Node}
1957 @param ninfo: the node to check
1958 @param nresult: the remote results for the node
1962 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1964 test = constants.NV_NODELIST not in nresult
1965 _ErrorIf(test, self.ENODESSH, node,
1966 "node hasn't returned node ssh connectivity data")
1968 if nresult[constants.NV_NODELIST]:
1969 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970 _ErrorIf(True, self.ENODESSH, node,
1971 "ssh communication with node '%s': %s", a_node, a_msg)
1973 test = constants.NV_NODENETTEST not in nresult
1974 _ErrorIf(test, self.ENODENET, node,
1975 "node hasn't returned node tcp connectivity data")
1977 if nresult[constants.NV_NODENETTEST]:
1978 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1980 _ErrorIf(True, self.ENODENET, node,
1981 "tcp communication with node '%s': %s",
1982 anode, nresult[constants.NV_NODENETTEST][anode])
1984 test = constants.NV_MASTERIP not in nresult
1985 _ErrorIf(test, self.ENODENET, node,
1986 "node hasn't returned node master IP reachability data")
1988 if not nresult[constants.NV_MASTERIP]:
1989 if node == self.master_node:
1990 msg = "the master node cannot reach the master IP (not configured?)"
1992 msg = "cannot reach the master IP"
1993 _ErrorIf(True, self.ENODENET, node, msg)
1995 def _VerifyInstance(self, instance, instanceconfig, node_image,
1997 """Verify an instance.
1999 This function checks to see if the required block devices are
2000 available on the instance's node.
2003 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004 node_current = instanceconfig.primary_node
2006 node_vol_should = {}
2007 instanceconfig.MapLVsByNode(node_vol_should)
2009 for node in node_vol_should:
2010 n_img = node_image[node]
2011 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012 # ignore missing volumes on offline or broken nodes
2014 for volume in node_vol_should[node]:
2015 test = volume not in n_img.volumes
2016 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017 "volume %s missing on node %s", volume, node)
2019 if instanceconfig.admin_up:
2020 pri_img = node_image[node_current]
2021 test = instance not in pri_img.instances and not pri_img.offline
2022 _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023 "instance not running on its primary node %s",
2026 diskdata = [(nname, success, status, idx)
2027 for (nname, disks) in diskstatus.items()
2028 for idx, (success, status) in enumerate(disks)]
2030 for nname, success, bdev_status, idx in diskdata:
2031 # the 'ghost node' construction in Exec() ensures that we have a
2033 snode = node_image[nname]
2034 bad_snode = snode.ghost or snode.offline
2035 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036 self.EINSTANCEFAULTYDISK, instance,
2037 "couldn't retrieve status for disk/%s on %s: %s",
2038 idx, nname, bdev_status)
2039 _ErrorIf((instanceconfig.admin_up and success and
2040 bdev_status.ldisk_status == constants.LDS_FAULTY),
2041 self.EINSTANCEFAULTYDISK, instance,
2042 "disk/%s on %s is faulty", idx, nname)
2044 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045 """Verify if there are any unknown volumes in the cluster.
2047 The .os, .swap and backup volumes are ignored. All other volumes are
2048 reported as unknown.
2050 @type reserved: L{ganeti.utils.FieldSet}
2051 @param reserved: a FieldSet of reserved volume names
2054 for node, n_img in node_image.items():
2055 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056 # skip non-healthy nodes
2058 for volume in n_img.volumes:
2059 test = ((node not in node_vol_should or
2060 volume not in node_vol_should[node]) and
2061 not reserved.Matches(volume))
2062 self._ErrorIf(test, self.ENODEORPHANLV, node,
2063 "volume %s is unknown", volume)
2065 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066 """Verify N+1 Memory Resilience.
2068 Check that if one single node dies we can still start all the
2069 instances it was primary for.
2072 cluster_info = self.cfg.GetClusterInfo()
2073 for node, n_img in node_image.items():
2074 # This code checks that every node which is now listed as
2075 # secondary has enough memory to host all instances it is
2076 # supposed to should a single other node in the cluster fail.
2077 # FIXME: not ready for failover to an arbitrary node
2078 # FIXME: does not support file-backed instances
2079 # WARNING: we currently take into account down instances as well
2080 # as up ones, considering that even if they're down someone
2081 # might want to start them even in the event of a node failure.
2083 # we're skipping offline nodes from the N+1 warning, since
2084 # most likely we don't have good memory infromation from them;
2085 # we already list instances living on such nodes, and that's
2088 for prinode, instances in n_img.sbp.items():
2090 for instance in instances:
2091 bep = cluster_info.FillBE(instance_cfg[instance])
2092 if bep[constants.BE_AUTO_BALANCE]:
2093 needed_mem += bep[constants.BE_MEMORY]
2094 test = n_img.mfree < needed_mem
2095 self._ErrorIf(test, self.ENODEN1, node,
2096 "not enough memory to accomodate instance failovers"
2097 " should node %s fail (%dMiB needed, %dMiB available)",
2098 prinode, needed_mem, n_img.mfree)
2101 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102 (files_all, files_all_opt, files_mc, files_vm)):
2103 """Verifies file checksums collected from all nodes.
2105 @param errorif: Callback for reporting errors
2106 @param nodeinfo: List of L{objects.Node} objects
2107 @param master_node: Name of master node
2108 @param all_nvinfo: RPC results
2111 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2112 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2113 "Found file listed in more than one file list"
2115 # Define functions determining which nodes to consider for a file
2118 (files_all_opt, None),
2119 (files_mc, lambda node: (node.master_candidate or
2120 node.name == master_node)),
2121 (files_vm, lambda node: node.vm_capable),
2124 # Build mapping from filename to list of nodes which should have the file
2126 for (files, fn) in files2nodefn:
2128 filenodes = nodeinfo
2130 filenodes = filter(fn, nodeinfo)
2131 nodefiles.update((filename,
2132 frozenset(map(operator.attrgetter("name"), filenodes)))
2133 for filename in files)
2135 assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2137 fileinfo = dict((filename, {}) for filename in nodefiles)
2138 ignore_nodes = set()
2140 for node in nodeinfo:
2142 ignore_nodes.add(node.name)
2145 nresult = all_nvinfo[node.name]
2147 if nresult.fail_msg or not nresult.payload:
2150 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2152 test = not (node_files and isinstance(node_files, dict))
2153 errorif(test, cls.ENODEFILECHECK, node.name,
2154 "Node did not return file checksum data")
2156 ignore_nodes.add(node.name)
2159 # Build per-checksum mapping from filename to nodes having it
2160 for (filename, checksum) in node_files.items():
2161 assert filename in nodefiles
2162 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2164 for (filename, checksums) in fileinfo.items():
2165 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2167 # Nodes having the file
2168 with_file = frozenset(node_name
2169 for nodes in fileinfo[filename].values()
2170 for node_name in nodes) - ignore_nodes
2172 expected_nodes = nodefiles[filename] - ignore_nodes
2174 # Nodes missing file
2175 missing_file = expected_nodes - with_file
2177 if filename in files_all_opt:
2179 errorif(missing_file and missing_file != expected_nodes,
2180 cls.ECLUSTERFILECHECK, None,
2181 "File %s is optional, but it must exist on all or no"
2182 " nodes (not found on %s)",
2183 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2185 # Non-optional files
2186 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187 "File %s is missing from node(s) %s", filename,
2188 utils.CommaJoin(utils.NiceSort(missing_file)))
2190 # Warn if a node has a file it shouldn't
2191 unexpected = with_file - expected_nodes
2193 cls.ECLUSTERFILECHECK, None,
2194 "File %s should not exist on node(s) %s",
2195 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2197 # See if there are multiple versions of the file
2198 test = len(checksums) > 1
2200 variants = ["variant %s on %s" %
2201 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202 for (idx, (checksum, nodes)) in
2203 enumerate(sorted(checksums.items()))]
2207 errorif(test, cls.ECLUSTERFILECHECK, None,
2208 "File %s found with %s different checksums (%s)",
2209 filename, len(checksums), "; ".join(variants))
2211 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2213 """Verifies and the node DRBD status.
2215 @type ninfo: L{objects.Node}
2216 @param ninfo: the node to check
2217 @param nresult: the remote results for the node
2218 @param instanceinfo: the dict of instances
2219 @param drbd_helper: the configured DRBD usermode helper
2220 @param drbd_map: the DRBD map as returned by
2221 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2225 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2228 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229 test = (helper_result == None)
2230 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231 "no drbd usermode helper returned")
2233 status, payload = helper_result
2235 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236 "drbd usermode helper check unsuccessful: %s", payload)
2237 test = status and (payload != drbd_helper)
2238 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239 "wrong drbd usermode helper: %s", payload)
2241 # compute the DRBD minors
2243 for minor, instance in drbd_map[node].items():
2244 test = instance not in instanceinfo
2245 _ErrorIf(test, self.ECLUSTERCFG, None,
2246 "ghost instance '%s' in temporary DRBD map", instance)
2247 # ghost instance should not be running, but otherwise we
2248 # don't give double warnings (both ghost instance and
2249 # unallocated minor in use)
2251 node_drbd[minor] = (instance, False)
2253 instance = instanceinfo[instance]
2254 node_drbd[minor] = (instance.name, instance.admin_up)
2256 # and now check them
2257 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258 test = not isinstance(used_minors, (tuple, list))
2259 _ErrorIf(test, self.ENODEDRBD, node,
2260 "cannot parse drbd status file: %s", str(used_minors))
2262 # we cannot check drbd status
2265 for minor, (iname, must_exist) in node_drbd.items():
2266 test = minor not in used_minors and must_exist
2267 _ErrorIf(test, self.ENODEDRBD, node,
2268 "drbd minor %d of instance %s is not active", minor, iname)
2269 for minor in used_minors:
2270 test = minor not in node_drbd
2271 _ErrorIf(test, self.ENODEDRBD, node,
2272 "unallocated drbd minor %d is in use", minor)
2274 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275 """Builds the node OS structures.
2277 @type ninfo: L{objects.Node}
2278 @param ninfo: the node to check
2279 @param nresult: the remote results for the node
2280 @param nimg: the node image object
2284 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2286 remote_os = nresult.get(constants.NV_OSLIST, None)
2287 test = (not isinstance(remote_os, list) or
2288 not compat.all(isinstance(v, list) and len(v) == 7
2289 for v in remote_os))
2291 _ErrorIf(test, self.ENODEOS, node,
2292 "node hasn't returned valid OS data")
2301 for (name, os_path, status, diagnose,
2302 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2304 if name not in os_dict:
2307 # parameters is a list of lists instead of list of tuples due to
2308 # JSON lacking a real tuple type, fix it:
2309 parameters = [tuple(v) for v in parameters]
2310 os_dict[name].append((os_path, status, diagnose,
2311 set(variants), set(parameters), set(api_ver)))
2313 nimg.oslist = os_dict
2315 def _VerifyNodeOS(self, ninfo, nimg, base):
2316 """Verifies the node OS list.
2318 @type ninfo: L{objects.Node}
2319 @param ninfo: the node to check
2320 @param nimg: the node image object
2321 @param base: the 'template' node we match against (e.g. from the master)
2325 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2327 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2329 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330 for os_name, os_data in nimg.oslist.items():
2331 assert os_data, "Empty OS status for OS %s?!" % os_name
2332 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333 _ErrorIf(not f_status, self.ENODEOS, node,
2334 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336 "OS '%s' has multiple entries (first one shadows the rest): %s",
2337 os_name, utils.CommaJoin([v[0] for v in os_data]))
2338 # comparisons with the 'base' image
2339 test = os_name not in base.oslist
2340 _ErrorIf(test, self.ENODEOS, node,
2341 "Extra OS %s not present on reference node (%s)",
2345 assert base.oslist[os_name], "Base node has empty OS status?"
2346 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2348 # base OS is invalid, skipping
2350 for kind, a, b in [("API version", f_api, b_api),
2351 ("variants list", f_var, b_var),
2352 ("parameters", beautify_params(f_param),
2353 beautify_params(b_param))]:
2354 _ErrorIf(a != b, self.ENODEOS, node,
2355 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356 kind, os_name, base.name,
2357 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2359 # check any missing OSes
2360 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361 _ErrorIf(missing, self.ENODEOS, node,
2362 "OSes present on reference node %s but missing on this node: %s",
2363 base.name, utils.CommaJoin(missing))
2365 def _VerifyOob(self, ninfo, nresult):
2366 """Verifies out of band functionality of a node.
2368 @type ninfo: L{objects.Node}
2369 @param ninfo: the node to check
2370 @param nresult: the remote results for the node
2374 # We just have to verify the paths on master and/or master candidates
2375 # as the oob helper is invoked on the master
2376 if ((ninfo.master_candidate or ninfo.master_capable) and
2377 constants.NV_OOB_PATHS in nresult):
2378 for path_result in nresult[constants.NV_OOB_PATHS]:
2379 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2381 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382 """Verifies and updates the node volume data.
2384 This function will update a L{NodeImage}'s internal structures
2385 with data from the remote call.
2387 @type ninfo: L{objects.Node}
2388 @param ninfo: the node to check
2389 @param nresult: the remote results for the node
2390 @param nimg: the node image object
2391 @param vg_name: the configured VG name
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2397 nimg.lvm_fail = True
2398 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2401 elif isinstance(lvdata, basestring):
2402 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403 utils.SafeEncode(lvdata))
2404 elif not isinstance(lvdata, dict):
2405 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2407 nimg.volumes = lvdata
2408 nimg.lvm_fail = False
2410 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411 """Verifies and updates the node instance list.
2413 If the listing was successful, then updates this node's instance
2414 list. Otherwise, it marks the RPC call as failed for the instance
2417 @type ninfo: L{objects.Node}
2418 @param ninfo: the node to check
2419 @param nresult: the remote results for the node
2420 @param nimg: the node image object
2423 idata = nresult.get(constants.NV_INSTANCELIST, None)
2424 test = not isinstance(idata, list)
2425 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426 " (instancelist): %s", utils.SafeEncode(str(idata)))
2428 nimg.hyp_fail = True
2430 nimg.instances = idata
2432 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433 """Verifies and computes a node information map
2435 @type ninfo: L{objects.Node}
2436 @param ninfo: the node to check
2437 @param nresult: the remote results for the node
2438 @param nimg: the node image object
2439 @param vg_name: the configured VG name
2443 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2445 # try to read free memory (from the hypervisor)
2446 hv_info = nresult.get(constants.NV_HVINFO, None)
2447 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2451 nimg.mfree = int(hv_info["memory_free"])
2452 except (ValueError, TypeError):
2453 _ErrorIf(True, self.ENODERPC, node,
2454 "node returned invalid nodeinfo, check hypervisor")
2456 # FIXME: devise a free space model for file based instances as well
2457 if vg_name is not None:
2458 test = (constants.NV_VGLIST not in nresult or
2459 vg_name not in nresult[constants.NV_VGLIST])
2460 _ErrorIf(test, self.ENODELVM, node,
2461 "node didn't return data for the volume group '%s'"
2462 " - it is either missing or broken", vg_name)
2465 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466 except (ValueError, TypeError):
2467 _ErrorIf(True, self.ENODERPC, node,
2468 "node returned invalid LVM info, check LVM status")
2470 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471 """Gets per-disk status information for all instances.
2473 @type nodelist: list of strings
2474 @param nodelist: Node names
2475 @type node_image: dict of (name, L{objects.Node})
2476 @param node_image: Node objects
2477 @type instanceinfo: dict of (name, L{objects.Instance})
2478 @param instanceinfo: Instance objects
2479 @rtype: {instance: {node: [(succes, payload)]}}
2480 @return: a dictionary of per-instance dictionaries with nodes as
2481 keys and disk information as values; the disk information is a
2482 list of tuples (success, payload)
2485 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2488 node_disks_devonly = {}
2489 diskless_instances = set()
2490 diskless = constants.DT_DISKLESS
2492 for nname in nodelist:
2493 node_instances = list(itertools.chain(node_image[nname].pinst,
2494 node_image[nname].sinst))
2495 diskless_instances.update(inst for inst in node_instances
2496 if instanceinfo[inst].disk_template == diskless)
2497 disks = [(inst, disk)
2498 for inst in node_instances
2499 for disk in instanceinfo[inst].disks]
2502 # No need to collect data
2505 node_disks[nname] = disks
2507 # Creating copies as SetDiskID below will modify the objects and that can
2508 # lead to incorrect data returned from nodes
2509 devonly = [dev.Copy() for (_, dev) in disks]
2512 self.cfg.SetDiskID(dev, nname)
2514 node_disks_devonly[nname] = devonly
2516 assert len(node_disks) == len(node_disks_devonly)
2518 # Collect data from all nodes with disks
2519 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2522 assert len(result) == len(node_disks)
2526 for (nname, nres) in result.items():
2527 disks = node_disks[nname]
2530 # No data from this node
2531 data = len(disks) * [(False, "node offline")]
2534 _ErrorIf(msg, self.ENODERPC, nname,
2535 "while getting disk information: %s", msg)
2537 # No data from this node
2538 data = len(disks) * [(False, msg)]
2541 for idx, i in enumerate(nres.payload):
2542 if isinstance(i, (tuple, list)) and len(i) == 2:
2545 logging.warning("Invalid result from node %s, entry %d: %s",
2547 data.append((False, "Invalid result from the remote node"))
2549 for ((inst, _), status) in zip(disks, data):
2550 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2552 # Add empty entries for diskless instances.
2553 for inst in diskless_instances:
2554 assert inst not in instdisk
2557 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559 compat.all(isinstance(s, (tuple, list)) and
2560 len(s) == 2 for s in statuses)
2561 for inst, nnames in instdisk.items()
2562 for nname, statuses in nnames.items())
2563 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2568 def _SshNodeSelector(group_uuid, all_nodes):
2569 """Create endless iterators for all potential SSH check hosts.
2572 nodes = [node for node in all_nodes
2573 if (node.group != group_uuid and
2575 keyfunc = operator.attrgetter("group")
2577 return map(itertools.cycle,
2578 [sorted(map(operator.attrgetter("name"), names))
2579 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2583 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584 """Choose which nodes should talk to which other nodes.
2586 We will make nodes contact all nodes in their group, and one node from
2589 @warning: This algorithm has a known issue if one node group is much
2590 smaller than others (e.g. just one node). In such a case all other
2591 nodes will talk to the single node.
2594 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2597 return (online_nodes,
2598 dict((name, sorted([i.next() for i in sel]))
2599 for name in online_nodes))
2601 def BuildHooksEnv(self):
2604 Cluster-Verify hooks just ran in the post phase and their failure makes
2605 the output be logged in the verify output and the verification to fail.
2609 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2612 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613 for node in self.my_node_info.values())
2617 def BuildHooksNodes(self):
2618 """Build hooks nodes.
2621 return ([], self.my_node_names)
2623 def Exec(self, feedback_fn):
2624 """Verify integrity of the node group, performing various test on nodes.
2627 # This method has too many local variables. pylint: disable=R0914
2628 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2630 if not self.my_node_names:
2632 feedback_fn("* Empty node group, skipping verification")
2636 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637 verbose = self.op.verbose
2638 self._feedback_fn = feedback_fn
2640 vg_name = self.cfg.GetVGName()
2641 drbd_helper = self.cfg.GetDRBDHelper()
2642 cluster = self.cfg.GetClusterInfo()
2643 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644 hypervisors = cluster.enabled_hypervisors
2645 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2647 i_non_redundant = [] # Non redundant instances
2648 i_non_a_balanced = [] # Non auto-balanced instances
2649 n_offline = 0 # Count of offline nodes
2650 n_drained = 0 # Count of nodes being drained
2651 node_vol_should = {}
2653 # FIXME: verify OS list
2656 filemap = _ComputeAncillaryFiles(cluster, False)
2658 # do local checksums
2659 master_node = self.master_node = self.cfg.GetMasterNode()
2660 master_ip = self.cfg.GetMasterIP()
2662 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2664 node_verify_param = {
2665 constants.NV_FILELIST:
2666 utils.UniqueSequence(filename
2667 for files in filemap
2668 for filename in files),
2669 constants.NV_NODELIST:
2670 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671 self.all_node_info.values()),
2672 constants.NV_HYPERVISOR: hypervisors,
2673 constants.NV_HVPARAMS:
2674 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676 for node in node_data_list
2677 if not node.offline],
2678 constants.NV_INSTANCELIST: hypervisors,
2679 constants.NV_VERSION: None,
2680 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681 constants.NV_NODESETUP: None,
2682 constants.NV_TIME: None,
2683 constants.NV_MASTERIP: (master_node, master_ip),
2684 constants.NV_OSLIST: None,
2685 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2688 if vg_name is not None:
2689 node_verify_param[constants.NV_VGLIST] = None
2690 node_verify_param[constants.NV_LVLIST] = vg_name
2691 node_verify_param[constants.NV_PVLIST] = [vg_name]
2692 node_verify_param[constants.NV_DRBDLIST] = None
2695 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2698 # FIXME: this needs to be changed per node-group, not cluster-wide
2700 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702 bridges.add(default_nicpp[constants.NIC_LINK])
2703 for instance in self.my_inst_info.values():
2704 for nic in instance.nics:
2705 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707 bridges.add(full_nic[constants.NIC_LINK])
2710 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2712 # Build our expected cluster state
2713 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2715 vm_capable=node.vm_capable))
2716 for node in node_data_list)
2720 for node in self.all_node_info.values():
2721 path = _SupportsOob(self.cfg, node)
2722 if path and path not in oob_paths:
2723 oob_paths.append(path)
2726 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2728 for instance in self.my_inst_names:
2729 inst_config = self.my_inst_info[instance]
2731 for nname in inst_config.all_nodes:
2732 if nname not in node_image:
2733 gnode = self.NodeImage(name=nname)
2734 gnode.ghost = (nname not in self.all_node_info)
2735 node_image[nname] = gnode
2737 inst_config.MapLVsByNode(node_vol_should)
2739 pnode = inst_config.primary_node
2740 node_image[pnode].pinst.append(instance)
2742 for snode in inst_config.secondary_nodes:
2743 nimg = node_image[snode]
2744 nimg.sinst.append(instance)
2745 if pnode not in nimg.sbp:
2746 nimg.sbp[pnode] = []
2747 nimg.sbp[pnode].append(instance)
2749 # At this point, we have the in-memory data structures complete,
2750 # except for the runtime information, which we'll gather next
2752 # Due to the way our RPC system works, exact response times cannot be
2753 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754 # time before and after executing the request, we can at least have a time
2756 nvinfo_starttime = time.time()
2757 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2759 self.cfg.GetClusterName())
2760 nvinfo_endtime = time.time()
2762 if self.extra_lv_nodes and vg_name is not None:
2764 self.rpc.call_node_verify(self.extra_lv_nodes,
2765 {constants.NV_LVLIST: vg_name},
2766 self.cfg.GetClusterName())
2768 extra_lv_nvinfo = {}
2770 all_drbd_map = self.cfg.ComputeDRBDMap()
2772 feedback_fn("* Gathering disk information (%s nodes)" %
2773 len(self.my_node_names))
2774 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2777 feedback_fn("* Verifying configuration file consistency")
2779 # If not all nodes are being checked, we need to make sure the master node
2780 # and a non-checked vm_capable node are in the list.
2781 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2783 vf_nvinfo = all_nvinfo.copy()
2784 vf_node_info = list(self.my_node_info.values())
2785 additional_nodes = []
2786 if master_node not in self.my_node_info:
2787 additional_nodes.append(master_node)
2788 vf_node_info.append(self.all_node_info[master_node])
2789 # Add the first vm_capable node we find which is not included
2790 for node in absent_nodes:
2791 nodeinfo = self.all_node_info[node]
2792 if nodeinfo.vm_capable and not nodeinfo.offline:
2793 additional_nodes.append(node)
2794 vf_node_info.append(self.all_node_info[node])
2796 key = constants.NV_FILELIST
2797 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798 {key: node_verify_param[key]},
2799 self.cfg.GetClusterName()))
2801 vf_nvinfo = all_nvinfo
2802 vf_node_info = self.my_node_info.values()
2804 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2806 feedback_fn("* Verifying node status")
2810 for node_i in node_data_list:
2812 nimg = node_image[node]
2816 feedback_fn("* Skipping offline node %s" % (node,))
2820 if node == master_node:
2822 elif node_i.master_candidate:
2823 ntype = "master candidate"
2824 elif node_i.drained:
2830 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2832 msg = all_nvinfo[node].fail_msg
2833 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2835 nimg.rpc_fail = True
2838 nresult = all_nvinfo[node].payload
2840 nimg.call_ok = self._VerifyNode(node_i, nresult)
2841 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842 self._VerifyNodeNetwork(node_i, nresult)
2843 self._VerifyOob(node_i, nresult)
2846 self._VerifyNodeLVM(node_i, nresult, vg_name)
2847 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2850 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851 self._UpdateNodeInstances(node_i, nresult, nimg)
2852 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853 self._UpdateNodeOS(node_i, nresult, nimg)
2855 if not nimg.os_fail:
2856 if refos_img is None:
2858 self._VerifyNodeOS(node_i, nimg, refos_img)
2859 self._VerifyNodeBridges(node_i, nresult, bridges)
2861 # Check whether all running instancies are primary for the node. (This
2862 # can no longer be done from _VerifyInstance below, since some of the
2863 # wrong instances could be from other node groups.)
2864 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2866 for inst in non_primary_inst:
2867 test = inst in self.all_inst_info
2868 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869 "instance should not run on node %s", node_i.name)
2870 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871 "node is running unknown instance %s", inst)
2873 for node, result in extra_lv_nvinfo.items():
2874 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875 node_image[node], vg_name)
2877 feedback_fn("* Verifying instance status")
2878 for instance in self.my_inst_names:
2880 feedback_fn("* Verifying instance %s" % instance)
2881 inst_config = self.my_inst_info[instance]
2882 self._VerifyInstance(instance, inst_config, node_image,
2884 inst_nodes_offline = []
2886 pnode = inst_config.primary_node
2887 pnode_img = node_image[pnode]
2888 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889 self.ENODERPC, pnode, "instance %s, connection to"
2890 " primary node failed", instance)
2892 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893 self.EINSTANCEBADNODE, instance,
2894 "instance is marked as running and lives on offline node %s",
2895 inst_config.primary_node)
2897 # If the instance is non-redundant we cannot survive losing its primary
2898 # node, so we are not N+1 compliant. On the other hand we have no disk
2899 # templates with more than one secondary so that situation is not well
2901 # FIXME: does not support file-backed instances
2902 if not inst_config.secondary_nodes:
2903 i_non_redundant.append(instance)
2905 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906 instance, "instance has multiple secondary nodes: %s",
2907 utils.CommaJoin(inst_config.secondary_nodes),
2908 code=self.ETYPE_WARNING)
2910 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911 pnode = inst_config.primary_node
2912 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913 instance_groups = {}
2915 for node in instance_nodes:
2916 instance_groups.setdefault(self.all_node_info[node].group,
2920 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921 # Sort so that we always list the primary node first.
2922 for group, nodes in sorted(instance_groups.items(),
2923 key=lambda (_, nodes): pnode in nodes,
2926 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927 instance, "instance has primary and secondary nodes in"
2928 " different groups: %s", utils.CommaJoin(pretty_list),
2929 code=self.ETYPE_WARNING)
2931 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932 i_non_a_balanced.append(instance)
2934 for snode in inst_config.secondary_nodes:
2935 s_img = node_image[snode]
2936 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937 "instance %s, connection to secondary node failed", instance)
2940 inst_nodes_offline.append(snode)
2942 # warn that the instance lives on offline nodes
2943 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944 "instance has offline secondary node(s) %s",
2945 utils.CommaJoin(inst_nodes_offline))
2946 # ... or ghost/non-vm_capable nodes
2947 for node in inst_config.all_nodes:
2948 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949 "instance lives on ghost node %s", node)
2950 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951 instance, "instance lives on non-vm_capable node %s", node)
2953 feedback_fn("* Verifying orphan volumes")
2954 reserved = utils.FieldSet(*cluster.reserved_lvs)
2956 # We will get spurious "unknown volume" warnings if any node of this group
2957 # is secondary for an instance whose primary is in another group. To avoid
2958 # them, we find these instances and add their volumes to node_vol_should.
2959 for inst in self.all_inst_info.values():
2960 for secondary in inst.secondary_nodes:
2961 if (secondary in self.my_node_info
2962 and inst.name not in self.my_inst_info):
2963 inst.MapLVsByNode(node_vol_should)
2966 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2968 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969 feedback_fn("* Verifying N+1 Memory redundancy")
2970 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2972 feedback_fn("* Other Notes")
2974 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2975 % len(i_non_redundant))
2977 if i_non_a_balanced:
2978 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2979 % len(i_non_a_balanced))
2982 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2985 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2989 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990 """Analyze the post-hooks' result
2992 This method analyses the hook result, handles it, and sends some
2993 nicely-formatted feedback back to the user.
2995 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997 @param hooks_results: the results of the multi-node hooks rpc call
2998 @param feedback_fn: function used send feedback back to the caller
2999 @param lu_result: previous Exec result
3000 @return: the new Exec result, based on the previous result
3004 # We only really run POST phase hooks, only for non-empty groups,
3005 # and are only interested in their results
3006 if not self.my_node_names:
3009 elif phase == constants.HOOKS_PHASE_POST:
3010 # Used to change hooks' output to proper indentation
3011 feedback_fn("* Hooks Results")
3012 assert hooks_results, "invalid result from hooks"
3014 for node_name in hooks_results:
3015 res = hooks_results[node_name]
3017 test = msg and not res.offline
3018 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019 "Communication failure in hooks execution: %s", msg)
3020 if res.offline or msg:
3021 # No need to investigate payload if node is offline or gave
3024 for script, hkr, output in res.payload:
3025 test = hkr == constants.HKR_FAIL
3026 self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027 "Script %s failed, output:", script)
3029 output = self._HOOKS_INDENT_RE.sub(" ", output)
3030 feedback_fn("%s" % output)
3036 class LUClusterVerifyDisks(NoHooksLU):
3037 """Verifies the cluster disks status.
3042 def ExpandNames(self):
3043 self.share_locks = _ShareAll()
3044 self.needed_locks = {
3045 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3048 def Exec(self, feedback_fn):
3049 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3051 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053 for group in group_names])
3056 class LUGroupVerifyDisks(NoHooksLU):
3057 """Verifies the status of all disks in a node group.
3062 def ExpandNames(self):
3063 # Raises errors.OpPrereqError on its own if group can't be found
3064 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3066 self.share_locks = _ShareAll()
3067 self.needed_locks = {
3068 locking.LEVEL_INSTANCE: [],
3069 locking.LEVEL_NODEGROUP: [],
3070 locking.LEVEL_NODE: [],
3073 def DeclareLocks(self, level):
3074 if level == locking.LEVEL_INSTANCE:
3075 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3077 # Lock instances optimistically, needs verification once node and group
3078 # locks have been acquired
3079 self.needed_locks[locking.LEVEL_INSTANCE] = \
3080 self.cfg.GetNodeGroupInstances(self.group_uuid)
3082 elif level == locking.LEVEL_NODEGROUP:
3083 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3085 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086 set([self.group_uuid] +
3087 # Lock all groups used by instances optimistically; this requires
3088 # going via the node before it's locked, requiring verification
3091 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3094 elif level == locking.LEVEL_NODE:
3095 # This will only lock the nodes in the group to be verified which contain
3097 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098 self._LockInstancesNodes()
3100 # Lock all nodes in group to be verified
3101 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3105 def CheckPrereq(self):
3106 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3110 assert self.group_uuid in owned_groups
3112 # Check if locked instances are still correct
3113 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3115 # Get instance information
3116 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3118 # Check if node groups for locked instances are still correct
3119 for (instance_name, inst) in self.instances.items():
3120 assert owned_nodes.issuperset(inst.all_nodes), \
3121 "Instance %s's nodes changed while we kept the lock" % instance_name
3123 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3126 assert self.group_uuid in inst_groups, \
3127 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3129 def Exec(self, feedback_fn):
3130 """Verify integrity of cluster disks.
3132 @rtype: tuple of three items
3133 @return: a tuple of (dict of node-to-node_error, list of instances
3134 which need activate-disks, dict of instance: (node, volume) for
3139 res_instances = set()
3142 nv_dict = _MapInstanceDisksToNodes([inst
3143 for inst in self.instances.values()
3147 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148 set(self.cfg.GetVmCapableNodeList()))
3150 node_lvs = self.rpc.call_lv_list(nodes, [])
3152 for (node, node_res) in node_lvs.items():
3153 if node_res.offline:
3156 msg = node_res.fail_msg
3158 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159 res_nodes[node] = msg
3162 for lv_name, (_, _, lv_online) in node_res.payload.items():
3163 inst = nv_dict.pop((node, lv_name), None)
3164 if not (lv_online or inst is None):
3165 res_instances.add(inst)
3167 # any leftover items in nv_dict are missing LVs, let's arrange the data
3169 for key, inst in nv_dict.iteritems():
3170 res_missing.setdefault(inst, []).append(list(key))
3172 return (res_nodes, list(res_instances), res_missing)
3175 class LUClusterRepairDiskSizes(NoHooksLU):
3176 """Verifies the cluster disks sizes.
3181 def ExpandNames(self):
3182 if self.op.instances:
3183 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184 self.needed_locks = {
3185 locking.LEVEL_NODE: [],
3186 locking.LEVEL_INSTANCE: self.wanted_names,
3188 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3190 self.wanted_names = None
3191 self.needed_locks = {
3192 locking.LEVEL_NODE: locking.ALL_SET,
3193 locking.LEVEL_INSTANCE: locking.ALL_SET,
3195 self.share_locks = {
3196 locking.LEVEL_NODE: 1,
3197 locking.LEVEL_INSTANCE: 0,
3200 def DeclareLocks(self, level):
3201 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3202 self._LockInstancesNodes(primary_only=True)
3204 def CheckPrereq(self):
3205 """Check prerequisites.
3207 This only checks the optional instance list against the existing names.
3210 if self.wanted_names is None:
3211 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3213 self.wanted_instances = \
3214 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3216 def _EnsureChildSizes(self, disk):
3217 """Ensure children of the disk have the needed disk size.
3219 This is valid mainly for DRBD8 and fixes an issue where the
3220 children have smaller disk size.
3222 @param disk: an L{ganeti.objects.Disk} object
3225 if disk.dev_type == constants.LD_DRBD8:
3226 assert disk.children, "Empty children for DRBD8?"
3227 fchild = disk.children[0]
3228 mismatch = fchild.size < disk.size
3230 self.LogInfo("Child disk has size %d, parent %d, fixing",
3231 fchild.size, disk.size)
3232 fchild.size = disk.size
3234 # and we recurse on this child only, not on the metadev
3235 return self._EnsureChildSizes(fchild) or mismatch
3239 def Exec(self, feedback_fn):
3240 """Verify the size of cluster disks.
3243 # TODO: check child disks too
3244 # TODO: check differences in size between primary/secondary nodes
3246 for instance in self.wanted_instances:
3247 pnode = instance.primary_node
3248 if pnode not in per_node_disks:
3249 per_node_disks[pnode] = []
3250 for idx, disk in enumerate(instance.disks):
3251 per_node_disks[pnode].append((instance, idx, disk))
3254 for node, dskl in per_node_disks.items():
3255 newl = [v[2].Copy() for v in dskl]
3257 self.cfg.SetDiskID(dsk, node)
3258 result = self.rpc.call_blockdev_getsize(node, newl)
3260 self.LogWarning("Failure in blockdev_getsize call to node"
3261 " %s, ignoring", node)
3263 if len(result.payload) != len(dskl):
3264 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3265 " result.payload=%s", node, len(dskl), result.payload)
3266 self.LogWarning("Invalid result from node %s, ignoring node results",
3269 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3271 self.LogWarning("Disk %d of instance %s did not return size"
3272 " information, ignoring", idx, instance.name)
3274 if not isinstance(size, (int, long)):
3275 self.LogWarning("Disk %d of instance %s did not return valid"
3276 " size information, ignoring", idx, instance.name)
3279 if size != disk.size:
3280 self.LogInfo("Disk %d of instance %s has mismatched size,"
3281 " correcting: recorded %d, actual %d", idx,
3282 instance.name, disk.size, size)
3284 self.cfg.Update(instance, feedback_fn)
3285 changed.append((instance.name, idx, size))
3286 if self._EnsureChildSizes(disk):
3287 self.cfg.Update(instance, feedback_fn)
3288 changed.append((instance.name, idx, disk.size))
3292 class LUClusterRename(LogicalUnit):
3293 """Rename the cluster.
3296 HPATH = "cluster-rename"
3297 HTYPE = constants.HTYPE_CLUSTER
3299 def BuildHooksEnv(self):
3304 "OP_TARGET": self.cfg.GetClusterName(),
3305 "NEW_NAME": self.op.name,
3308 def BuildHooksNodes(self):
3309 """Build hooks nodes.
3312 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3314 def CheckPrereq(self):
3315 """Verify that the passed name is a valid one.
3318 hostname = netutils.GetHostname(name=self.op.name,
3319 family=self.cfg.GetPrimaryIPFamily())
3321 new_name = hostname.name
3322 self.ip = new_ip = hostname.ip
3323 old_name = self.cfg.GetClusterName()
3324 old_ip = self.cfg.GetMasterIP()
3325 if new_name == old_name and new_ip == old_ip:
3326 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3327 " cluster has changed",
3329 if new_ip != old_ip:
3330 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3331 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3332 " reachable on the network" %
3333 new_ip, errors.ECODE_NOTUNIQUE)
3335 self.op.name = new_name
3337 def Exec(self, feedback_fn):
3338 """Rename the cluster.
3341 clustername = self.op.name
3344 # shutdown the master IP
3345 master = self.cfg.GetMasterNode()
3346 result = self.rpc.call_node_stop_master(master, False)
3347 result.Raise("Could not disable the master role")
3350 cluster = self.cfg.GetClusterInfo()
3351 cluster.cluster_name = clustername
3352 cluster.master_ip = ip
3353 self.cfg.Update(cluster, feedback_fn)
3355 # update the known hosts file
3356 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3357 node_list = self.cfg.GetOnlineNodeList()
3359 node_list.remove(master)
3362 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3364 result = self.rpc.call_node_start_master(master, False, False)
3365 msg = result.fail_msg
3367 self.LogWarning("Could not re-enable the master role on"
3368 " the master, please restart manually: %s", msg)
3373 class LUClusterSetParams(LogicalUnit):
3374 """Change the parameters of the cluster.
3377 HPATH = "cluster-modify"
3378 HTYPE = constants.HTYPE_CLUSTER
3381 def CheckArguments(self):
3385 if self.op.uid_pool:
3386 uidpool.CheckUidPool(self.op.uid_pool)
3388 if self.op.add_uids:
3389 uidpool.CheckUidPool(self.op.add_uids)
3391 if self.op.remove_uids:
3392 uidpool.CheckUidPool(self.op.remove_uids)
3394 def ExpandNames(self):
3395 # FIXME: in the future maybe other cluster params won't require checking on
3396 # all nodes to be modified.
3397 self.needed_locks = {
3398 locking.LEVEL_NODE: locking.ALL_SET,
3400 self.share_locks[locking.LEVEL_NODE] = 1
3402 def BuildHooksEnv(self):
3407 "OP_TARGET": self.cfg.GetClusterName(),
3408 "NEW_VG_NAME": self.op.vg_name,
3411 def BuildHooksNodes(self):
3412 """Build hooks nodes.
3415 mn = self.cfg.GetMasterNode()
3418 def CheckPrereq(self):
3419 """Check prerequisites.
3421 This checks whether the given params don't conflict and
3422 if the given volume group is valid.
3425 if self.op.vg_name is not None and not self.op.vg_name:
3426 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3427 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3428 " instances exist", errors.ECODE_INVAL)
3430 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3431 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3432 raise errors.OpPrereqError("Cannot disable drbd helper while"
3433 " drbd-based instances exist",
3436 node_list = self.owned_locks(locking.LEVEL_NODE)
3438 # if vg_name not None, checks given volume group on all nodes
3440 vglist = self.rpc.call_vg_list(node_list)
3441 for node in node_list:
3442 msg = vglist[node].fail_msg
3444 # ignoring down node
3445 self.LogWarning("Error while gathering data on node %s"
3446 " (ignoring node): %s", node, msg)
3448 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3450 constants.MIN_VG_SIZE)
3452 raise errors.OpPrereqError("Error on node '%s': %s" %
3453 (node, vgstatus), errors.ECODE_ENVIRON)
3455 if self.op.drbd_helper:
3456 # checks given drbd helper on all nodes
3457 helpers = self.rpc.call_drbd_helper(node_list)
3458 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3460 self.LogInfo("Not checking drbd helper on offline node %s", node)
3462 msg = helpers[node].fail_msg
3464 raise errors.OpPrereqError("Error checking drbd helper on node"
3465 " '%s': %s" % (node, msg),
3466 errors.ECODE_ENVIRON)
3467 node_helper = helpers[node].payload
3468 if node_helper != self.op.drbd_helper:
3469 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3470 (node, node_helper), errors.ECODE_ENVIRON)
3472 self.cluster = cluster = self.cfg.GetClusterInfo()
3473 # validate params changes
3474 if self.op.beparams:
3475 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3476 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3478 if self.op.ndparams:
3479 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3480 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3482 # TODO: we need a more general way to handle resetting
3483 # cluster-level parameters to default values
3484 if self.new_ndparams["oob_program"] == "":
3485 self.new_ndparams["oob_program"] = \
3486 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3488 if self.op.nicparams:
3489 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3490 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3491 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3494 # check all instances for consistency
3495 for instance in self.cfg.GetAllInstancesInfo().values():
3496 for nic_idx, nic in enumerate(instance.nics):
3497 params_copy = copy.deepcopy(nic.nicparams)
3498 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3500 # check parameter syntax
3502 objects.NIC.CheckParameterSyntax(params_filled)
3503 except errors.ConfigurationError, err:
3504 nic_errors.append("Instance %s, nic/%d: %s" %
3505 (instance.name, nic_idx, err))
3507 # if we're moving instances to routed, check that they have an ip
3508 target_mode = params_filled[constants.NIC_MODE]
3509 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3510 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3511 " address" % (instance.name, nic_idx))
3513 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3514 "\n".join(nic_errors))
3516 # hypervisor list/parameters
3517 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3518 if self.op.hvparams:
3519 for hv_name, hv_dict in self.op.hvparams.items():
3520 if hv_name not in self.new_hvparams:
3521 self.new_hvparams[hv_name] = hv_dict
3523 self.new_hvparams[hv_name].update(hv_dict)
3525 # os hypervisor parameters
3526 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3528 for os_name, hvs in self.op.os_hvp.items():
3529 if os_name not in self.new_os_hvp:
3530 self.new_os_hvp[os_name] = hvs
3532 for hv_name, hv_dict in hvs.items():
3533 if hv_name not in self.new_os_hvp[os_name]:
3534 self.new_os_hvp[os_name][hv_name] = hv_dict
3536 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3539 self.new_osp = objects.FillDict(cluster.osparams, {})
3540 if self.op.osparams:
3541 for os_name, osp in self.op.osparams.items():
3542 if os_name not in self.new_osp:
3543 self.new_osp[os_name] = {}
3545 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3548 if not self.new_osp[os_name]:
3549 # we removed all parameters
3550 del self.new_osp[os_name]
3552 # check the parameter validity (remote check)
3553 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3554 os_name, self.new_osp[os_name])
3556 # changes to the hypervisor list
3557 if self.op.enabled_hypervisors is not None:
3558 self.hv_list = self.op.enabled_hypervisors
3559 for hv in self.hv_list:
3560 # if the hypervisor doesn't already exist in the cluster
3561 # hvparams, we initialize it to empty, and then (in both
3562 # cases) we make sure to fill the defaults, as we might not
3563 # have a complete defaults list if the hypervisor wasn't
3565 if hv not in new_hvp:
3567 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3568 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3570 self.hv_list = cluster.enabled_hypervisors
3572 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3573 # either the enabled list has changed, or the parameters have, validate
3574 for hv_name, hv_params in self.new_hvparams.items():
3575 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3576 (self.op.enabled_hypervisors and
3577 hv_name in self.op.enabled_hypervisors)):
3578 # either this is a new hypervisor, or its parameters have changed
3579 hv_class = hypervisor.GetHypervisor(hv_name)
3580 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3581 hv_class.CheckParameterSyntax(hv_params)
3582 _CheckHVParams(self, node_list, hv_name, hv_params)
3585 # no need to check any newly-enabled hypervisors, since the
3586 # defaults have already been checked in the above code-block
3587 for os_name, os_hvp in self.new_os_hvp.items():
3588 for hv_name, hv_params in os_hvp.items():
3589 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590 # we need to fill in the new os_hvp on top of the actual hv_p
3591 cluster_defaults = self.new_hvparams.get(hv_name, {})
3592 new_osp = objects.FillDict(cluster_defaults, hv_params)
3593 hv_class = hypervisor.GetHypervisor(hv_name)
3594 hv_class.CheckParameterSyntax(new_osp)
3595 _CheckHVParams(self, node_list, hv_name, new_osp)
3597 if self.op.default_iallocator:
3598 alloc_script = utils.FindFile(self.op.default_iallocator,
3599 constants.IALLOCATOR_SEARCH_PATH,
3601 if alloc_script is None:
3602 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3603 " specified" % self.op.default_iallocator,
3606 def Exec(self, feedback_fn):
3607 """Change the parameters of the cluster.
3610 if self.op.vg_name is not None:
3611 new_volume = self.op.vg_name
3614 if new_volume != self.cfg.GetVGName():
3615 self.cfg.SetVGName(new_volume)
3617 feedback_fn("Cluster LVM configuration already in desired"
3618 " state, not changing")
3619 if self.op.drbd_helper is not None:
3620 new_helper = self.op.drbd_helper
3623 if new_helper != self.cfg.GetDRBDHelper():
3624 self.cfg.SetDRBDHelper(new_helper)
3626 feedback_fn("Cluster DRBD helper already in desired state,"
3628 if self.op.hvparams:
3629 self.cluster.hvparams = self.new_hvparams
3631 self.cluster.os_hvp = self.new_os_hvp
3632 if self.op.enabled_hypervisors is not None:
3633 self.cluster.hvparams = self.new_hvparams
3634 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3635 if self.op.beparams:
3636 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3637 if self.op.nicparams:
3638 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3639 if self.op.osparams:
3640 self.cluster.osparams = self.new_osp
3641 if self.op.ndparams:
3642 self.cluster.ndparams = self.new_ndparams
3644 if self.op.candidate_pool_size is not None:
3645 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3646 # we need to update the pool size here, otherwise the save will fail
3647 _AdjustCandidatePool(self, [])
3649 if self.op.maintain_node_health is not None:
3650 self.cluster.maintain_node_health = self.op.maintain_node_health
3652 if self.op.prealloc_wipe_disks is not None:
3653 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3655 if self.op.add_uids is not None:
3656 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3658 if self.op.remove_uids is not None:
3659 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3661 if self.op.uid_pool is not None:
3662 self.cluster.uid_pool = self.op.uid_pool
3664 if self.op.default_iallocator is not None:
3665 self.cluster.default_iallocator = self.op.default_iallocator
3667 if self.op.reserved_lvs is not None:
3668 self.cluster.reserved_lvs = self.op.reserved_lvs
3670 def helper_os(aname, mods, desc):
3672 lst = getattr(self.cluster, aname)
3673 for key, val in mods:
3674 if key == constants.DDM_ADD:
3676 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3679 elif key == constants.DDM_REMOVE:
3683 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3685 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3687 if self.op.hidden_os:
3688 helper_os("hidden_os", self.op.hidden_os, "hidden")
3690 if self.op.blacklisted_os:
3691 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3693 if self.op.master_netdev:
3694 master = self.cfg.GetMasterNode()
3695 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3696 self.cluster.master_netdev)
3697 result = self.rpc.call_node_stop_master(master, False)
3698 result.Raise("Could not disable the master ip")
3699 feedback_fn("Changing master_netdev from %s to %s" %
3700 (self.cluster.master_netdev, self.op.master_netdev))
3701 self.cluster.master_netdev = self.op.master_netdev
3703 self.cfg.Update(self.cluster, feedback_fn)
3705 if self.op.master_netdev:
3706 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3707 self.op.master_netdev)
3708 result = self.rpc.call_node_start_master(master, False, False)
3710 self.LogWarning("Could not re-enable the master ip on"
3711 " the master, please restart manually: %s",
3715 def _UploadHelper(lu, nodes, fname):
3716 """Helper for uploading a file and showing warnings.
3719 if os.path.exists(fname):
3720 result = lu.rpc.call_upload_file(nodes, fname)
3721 for to_node, to_result in result.items():
3722 msg = to_result.fail_msg
3724 msg = ("Copy of file %s to node %s failed: %s" %
3725 (fname, to_node, msg))
3726 lu.proc.LogWarning(msg)
3729 def _ComputeAncillaryFiles(cluster, redist):
3730 """Compute files external to Ganeti which need to be consistent.
3732 @type redist: boolean
3733 @param redist: Whether to include files which need to be redistributed
3736 # Compute files for all nodes
3738 constants.SSH_KNOWN_HOSTS_FILE,
3739 constants.CONFD_HMAC_KEY,
3740 constants.CLUSTER_DOMAIN_SECRET_FILE,
3744 files_all.update(constants.ALL_CERT_FILES)
3745 files_all.update(ssconf.SimpleStore().GetFileList())
3747 # we need to ship at least the RAPI certificate
3748 files_all.add(constants.RAPI_CERT_FILE)
3750 if cluster.modify_etc_hosts:
3751 files_all.add(constants.ETC_HOSTS)
3753 # Files which must either exist on all nodes or on none
3754 files_all_opt = set([
3755 constants.RAPI_USERS_FILE,
3758 # Files which should only be on master candidates
3761 files_mc.add(constants.CLUSTER_CONF_FILE)
3763 # Files which should only be on VM-capable nodes
3764 files_vm = set(filename
3765 for hv_name in cluster.enabled_hypervisors
3766 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3768 # Filenames must be unique
3769 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3770 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3771 "Found file listed in more than one file list"
3773 return (files_all, files_all_opt, files_mc, files_vm)
3776 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3777 """Distribute additional files which are part of the cluster configuration.
3779 ConfigWriter takes care of distributing the config and ssconf files, but
3780 there are more files which should be distributed to all nodes. This function
3781 makes sure those are copied.
3783 @param lu: calling logical unit
3784 @param additional_nodes: list of nodes not in the config to distribute to
3785 @type additional_vm: boolean
3786 @param additional_vm: whether the additional nodes are vm-capable or not
3789 # Gather target nodes
3790 cluster = lu.cfg.GetClusterInfo()
3791 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3793 online_nodes = lu.cfg.GetOnlineNodeList()
3794 vm_nodes = lu.cfg.GetVmCapableNodeList()
3796 if additional_nodes is not None:
3797 online_nodes.extend(additional_nodes)
3799 vm_nodes.extend(additional_nodes)
3801 # Never distribute to master node
3802 for nodelist in [online_nodes, vm_nodes]:
3803 if master_info.name in nodelist:
3804 nodelist.remove(master_info.name)
3807 (files_all, files_all_opt, files_mc, files_vm) = \
3808 _ComputeAncillaryFiles(cluster, True)
3810 # Never re-distribute configuration file from here
3811 assert not (constants.CLUSTER_CONF_FILE in files_all or
3812 constants.CLUSTER_CONF_FILE in files_vm)
3813 assert not files_mc, "Master candidates not handled in this function"
3816 (online_nodes, files_all),
3817 (online_nodes, files_all_opt),
3818 (vm_nodes, files_vm),
3822 for (node_list, files) in filemap:
3824 _UploadHelper(lu, node_list, fname)
3827 class LUClusterRedistConf(NoHooksLU):
3828 """Force the redistribution of cluster configuration.
3830 This is a very simple LU.
3835 def ExpandNames(self):
3836 self.needed_locks = {
3837 locking.LEVEL_NODE: locking.ALL_SET,
3839 self.share_locks[locking.LEVEL_NODE] = 1
3841 def Exec(self, feedback_fn):
3842 """Redistribute the configuration.
3845 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3846 _RedistributeAncillaryFiles(self)
3849 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3850 """Sleep and poll for an instance's disk to sync.
3853 if not instance.disks or disks is not None and not disks:
3856 disks = _ExpandCheckDisks(instance, disks)
3859 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3861 node = instance.primary_node
3864 lu.cfg.SetDiskID(dev, node)
3866 # TODO: Convert to utils.Retry
3869 degr_retries = 10 # in seconds, as we sleep 1 second each time
3873 cumul_degraded = False
3874 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3875 msg = rstats.fail_msg
3877 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3880 raise errors.RemoteError("Can't contact node %s for mirror data,"
3881 " aborting." % node)
3884 rstats = rstats.payload
3886 for i, mstat in enumerate(rstats):
3888 lu.LogWarning("Can't compute data for node %s/%s",
3889 node, disks[i].iv_name)
3892 cumul_degraded = (cumul_degraded or
3893 (mstat.is_degraded and mstat.sync_percent is None))
3894 if mstat.sync_percent is not None:
3896 if mstat.estimated_time is not None:
3897 rem_time = ("%s remaining (estimated)" %
3898 utils.FormatSeconds(mstat.estimated_time))
3899 max_time = mstat.estimated_time
3901 rem_time = "no time estimate"
3902 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3903 (disks[i].iv_name, mstat.sync_percent, rem_time))
3905 # if we're done but degraded, let's do a few small retries, to
3906 # make sure we see a stable and not transient situation; therefore
3907 # we force restart of the loop
3908 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3909 logging.info("Degraded disks found, %d retries left", degr_retries)
3917 time.sleep(min(60, max_time))
3920 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3921 return not cumul_degraded
3924 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3925 """Check that mirrors are not degraded.
3927 The ldisk parameter, if True, will change the test from the
3928 is_degraded attribute (which represents overall non-ok status for
3929 the device(s)) to the ldisk (representing the local storage status).
3932 lu.cfg.SetDiskID(dev, node)
3936 if on_primary or dev.AssembleOnSecondary():
3937 rstats = lu.rpc.call_blockdev_find(node, dev)
3938 msg = rstats.fail_msg
3940 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3942 elif not rstats.payload:
3943 lu.LogWarning("Can't find disk on node %s", node)
3947 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3949 result = result and not rstats.payload.is_degraded
3952 for child in dev.children:
3953 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3958 class LUOobCommand(NoHooksLU):
3959 """Logical unit for OOB handling.
3963 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3965 def ExpandNames(self):
3966 """Gather locks we need.
3969 if self.op.node_names:
3970 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3971 lock_names = self.op.node_names
3973 lock_names = locking.ALL_SET
3975 self.needed_locks = {
3976 locking.LEVEL_NODE: lock_names,
3979 def CheckPrereq(self):
3980 """Check prerequisites.
3983 - the node exists in the configuration
3986 Any errors are signaled by raising errors.OpPrereqError.
3990 self.master_node = self.cfg.GetMasterNode()
3992 assert self.op.power_delay >= 0.0
3994 if self.op.node_names:
3995 if (self.op.command in self._SKIP_MASTER and
3996 self.master_node in self.op.node_names):
3997 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3998 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4000 if master_oob_handler:
4001 additional_text = ("run '%s %s %s' if you want to operate on the"
4002 " master regardless") % (master_oob_handler,
4006 additional_text = "it does not support out-of-band operations"
4008 raise errors.OpPrereqError(("Operating on the master node %s is not"
4009 " allowed for %s; %s") %
4010 (self.master_node, self.op.command,
4011 additional_text), errors.ECODE_INVAL)
4013 self.op.node_names = self.cfg.GetNodeList()
4014 if self.op.command in self._SKIP_MASTER:
4015 self.op.node_names.remove(self.master_node)
4017 if self.op.command in self._SKIP_MASTER:
4018 assert self.master_node not in self.op.node_names
4020 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4022 raise errors.OpPrereqError("Node %s not found" % node_name,
4025 self.nodes.append(node)
4027 if (not self.op.ignore_status and
4028 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4029 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4030 " not marked offline") % node_name,
4033 def Exec(self, feedback_fn):
4034 """Execute OOB and return result if we expect any.
4037 master_node = self.master_node
4040 for idx, node in enumerate(utils.NiceSort(self.nodes,
4041 key=lambda node: node.name)):
4042 node_entry = [(constants.RS_NORMAL, node.name)]
4043 ret.append(node_entry)
4045 oob_program = _SupportsOob(self.cfg, node)
4048 node_entry.append((constants.RS_UNAVAIL, None))
4051 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4052 self.op.command, oob_program, node.name)
4053 result = self.rpc.call_run_oob(master_node, oob_program,
4054 self.op.command, node.name,
4058 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4059 node.name, result.fail_msg)
4060 node_entry.append((constants.RS_NODATA, None))
4063 self._CheckPayload(result)
4064 except errors.OpExecError, err:
4065 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4067 node_entry.append((constants.RS_NODATA, None))
4069 if self.op.command == constants.OOB_HEALTH:
4070 # For health we should log important events
4071 for item, status in result.payload:
4072 if status in [constants.OOB_STATUS_WARNING,
4073 constants.OOB_STATUS_CRITICAL]:
4074 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4075 item, node.name, status)
4077 if self.op.command == constants.OOB_POWER_ON:
4079 elif self.op.command == constants.OOB_POWER_OFF:
4080 node.powered = False
4081 elif self.op.command == constants.OOB_POWER_STATUS:
4082 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4083 if powered != node.powered:
4084 logging.warning(("Recorded power state (%s) of node '%s' does not"
4085 " match actual power state (%s)"), node.powered,
4088 # For configuration changing commands we should update the node
4089 if self.op.command in (constants.OOB_POWER_ON,
4090 constants.OOB_POWER_OFF):
4091 self.cfg.Update(node, feedback_fn)
4093 node_entry.append((constants.RS_NORMAL, result.payload))
4095 if (self.op.command == constants.OOB_POWER_ON and
4096 idx < len(self.nodes) - 1):
4097 time.sleep(self.op.power_delay)
4101 def _CheckPayload(self, result):
4102 """Checks if the payload is valid.
4104 @param result: RPC result
4105 @raises errors.OpExecError: If payload is not valid
4109 if self.op.command == constants.OOB_HEALTH:
4110 if not isinstance(result.payload, list):
4111 errs.append("command 'health' is expected to return a list but got %s" %
4112 type(result.payload))
4114 for item, status in result.payload:
4115 if status not in constants.OOB_STATUSES:
4116 errs.append("health item '%s' has invalid status '%s'" %
4119 if self.op.command == constants.OOB_POWER_STATUS:
4120 if not isinstance(result.payload, dict):
4121 errs.append("power-status is expected to return a dict but got %s" %
4122 type(result.payload))
4124 if self.op.command in [
4125 constants.OOB_POWER_ON,
4126 constants.OOB_POWER_OFF,
4127 constants.OOB_POWER_CYCLE,
4129 if result.payload is not None:
4130 errs.append("%s is expected to not return payload but got '%s'" %
4131 (self.op.command, result.payload))
4134 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4135 utils.CommaJoin(errs))
4138 class _OsQuery(_QueryBase):
4139 FIELDS = query.OS_FIELDS
4141 def ExpandNames(self, lu):
4142 # Lock all nodes in shared mode
4143 # Temporary removal of locks, should be reverted later
4144 # TODO: reintroduce locks when they are lighter-weight
4145 lu.needed_locks = {}
4146 #self.share_locks[locking.LEVEL_NODE] = 1
4147 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4149 # The following variables interact with _QueryBase._GetNames
4151 self.wanted = self.names
4153 self.wanted = locking.ALL_SET
4155 self.do_locking = self.use_locking
4157 def DeclareLocks(self, lu, level):
4161 def _DiagnoseByOS(rlist):
4162 """Remaps a per-node return list into an a per-os per-node dictionary
4164 @param rlist: a map with node names as keys and OS objects as values
4167 @return: a dictionary with osnames as keys and as value another
4168 map, with nodes as keys and tuples of (path, status, diagnose,
4169 variants, parameters, api_versions) as values, eg::
4171 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4172 (/srv/..., False, "invalid api")],
4173 "node2": [(/srv/..., True, "", [], [])]}
4178 # we build here the list of nodes that didn't fail the RPC (at RPC
4179 # level), so that nodes with a non-responding node daemon don't
4180 # make all OSes invalid
4181 good_nodes = [node_name for node_name in rlist
4182 if not rlist[node_name].fail_msg]
4183 for node_name, nr in rlist.items():
4184 if nr.fail_msg or not nr.payload:
4186 for (name, path, status, diagnose, variants,
4187 params, api_versions) in nr.payload:
4188 if name not in all_os:
4189 # build a list of nodes for this os containing empty lists
4190 # for each node in node_list
4192 for nname in good_nodes:
4193 all_os[name][nname] = []
4194 # convert params from [name, help] to (name, help)
4195 params = [tuple(v) for v in params]
4196 all_os[name][node_name].append((path, status, diagnose,
4197 variants, params, api_versions))
4200 def _GetQueryData(self, lu):
4201 """Computes the list of nodes and their attributes.
4204 # Locking is not used
4205 assert not (compat.any(lu.glm.is_owned(level)
4206 for level in locking.LEVELS
4207 if level != locking.LEVEL_CLUSTER) or
4208 self.do_locking or self.use_locking)
4210 valid_nodes = [node.name
4211 for node in lu.cfg.GetAllNodesInfo().values()
4212 if not node.offline and node.vm_capable]
4213 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4214 cluster = lu.cfg.GetClusterInfo()
4218 for (os_name, os_data) in pol.items():
4219 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4220 hidden=(os_name in cluster.hidden_os),
4221 blacklisted=(os_name in cluster.blacklisted_os))
4225 api_versions = set()
4227 for idx, osl in enumerate(os_data.values()):
4228 info.valid = bool(info.valid and osl and osl[0][1])
4232 (node_variants, node_params, node_api) = osl[0][3:6]
4235 variants.update(node_variants)
4236 parameters.update(node_params)
4237 api_versions.update(node_api)
4239 # Filter out inconsistent values
4240 variants.intersection_update(node_variants)
4241 parameters.intersection_update(node_params)
4242 api_versions.intersection_update(node_api)
4244 info.variants = list(variants)
4245 info.parameters = list(parameters)
4246 info.api_versions = list(api_versions)
4248 data[os_name] = info
4250 # Prepare data in requested order
4251 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4255 class LUOsDiagnose(NoHooksLU):
4256 """Logical unit for OS diagnose/query.
4262 def _BuildFilter(fields, names):
4263 """Builds a filter for querying OSes.
4266 name_filter = qlang.MakeSimpleFilter("name", names)
4268 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4269 # respective field is not requested
4270 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4271 for fname in ["hidden", "blacklisted"]
4272 if fname not in fields]
4273 if "valid" not in fields:
4274 status_filter.append([qlang.OP_TRUE, "valid"])
4277 status_filter.insert(0, qlang.OP_AND)
4279 status_filter = None
4281 if name_filter and status_filter:
4282 return [qlang.OP_AND, name_filter, status_filter]
4286 return status_filter
4288 def CheckArguments(self):
4289 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4290 self.op.output_fields, False)
4292 def ExpandNames(self):
4293 self.oq.ExpandNames(self)
4295 def Exec(self, feedback_fn):
4296 return self.oq.OldStyleQuery(self)
4299 class LUNodeRemove(LogicalUnit):
4300 """Logical unit for removing a node.
4303 HPATH = "node-remove"
4304 HTYPE = constants.HTYPE_NODE
4306 def BuildHooksEnv(self):
4309 This doesn't run on the target node in the pre phase as a failed
4310 node would then be impossible to remove.
4314 "OP_TARGET": self.op.node_name,
4315 "NODE_NAME": self.op.node_name,
4318 def BuildHooksNodes(self):
4319 """Build hooks nodes.
4322 all_nodes = self.cfg.GetNodeList()
4324 all_nodes.remove(self.op.node_name)
4326 logging.warning("Node '%s', which is about to be removed, was not found"
4327 " in the list of all nodes", self.op.node_name)
4328 return (all_nodes, all_nodes)
4330 def CheckPrereq(self):
4331 """Check prerequisites.
4334 - the node exists in the configuration
4335 - it does not have primary or secondary instances
4336 - it's not the master
4338 Any errors are signaled by raising errors.OpPrereqError.
4341 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4342 node = self.cfg.GetNodeInfo(self.op.node_name)
4343 assert node is not None
4345 masternode = self.cfg.GetMasterNode()
4346 if node.name == masternode:
4347 raise errors.OpPrereqError("Node is the master node, failover to another"
4348 " node is required", errors.ECODE_INVAL)
4350 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4351 if node.name in instance.all_nodes:
4352 raise errors.OpPrereqError("Instance %s is still running on the node,"
4353 " please remove first" % instance_name,
4355 self.op.node_name = node.name
4358 def Exec(self, feedback_fn):
4359 """Removes the node from the cluster.
4363 logging.info("Stopping the node daemon and removing configs from node %s",
4366 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4368 # Promote nodes to master candidate as needed
4369 _AdjustCandidatePool(self, exceptions=[node.name])
4370 self.context.RemoveNode(node.name)
4372 # Run post hooks on the node before it's removed
4373 _RunPostHook(self, node.name)
4375 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4376 msg = result.fail_msg
4378 self.LogWarning("Errors encountered on the remote node while leaving"
4379 " the cluster: %s", msg)
4381 # Remove node from our /etc/hosts
4382 if self.cfg.GetClusterInfo().modify_etc_hosts:
4383 master_node = self.cfg.GetMasterNode()
4384 result = self.rpc.call_etc_hosts_modify(master_node,
4385 constants.ETC_HOSTS_REMOVE,
4387 result.Raise("Can't update hosts file with new host data")
4388 _RedistributeAncillaryFiles(self)
4391 class _NodeQuery(_QueryBase):
4392 FIELDS = query.NODE_FIELDS
4394 def ExpandNames(self, lu):
4395 lu.needed_locks = {}
4396 lu.share_locks = _ShareAll()
4399 self.wanted = _GetWantedNodes(lu, self.names)
4401 self.wanted = locking.ALL_SET
4403 self.do_locking = (self.use_locking and
4404 query.NQ_LIVE in self.requested_data)
4407 # If any non-static field is requested we need to lock the nodes
4408 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4410 def DeclareLocks(self, lu, level):
4413 def _GetQueryData(self, lu):
4414 """Computes the list of nodes and their attributes.
4417 all_info = lu.cfg.GetAllNodesInfo()
4419 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4421 # Gather data as requested
4422 if query.NQ_LIVE in self.requested_data:
4423 # filter out non-vm_capable nodes
4424 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4426 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4427 lu.cfg.GetHypervisorType())
4428 live_data = dict((name, nresult.payload)
4429 for (name, nresult) in node_data.items()
4430 if not nresult.fail_msg and nresult.payload)
4434 if query.NQ_INST in self.requested_data:
4435 node_to_primary = dict([(name, set()) for name in nodenames])
4436 node_to_secondary = dict([(name, set()) for name in nodenames])
4438 inst_data = lu.cfg.GetAllInstancesInfo()
4440 for inst in inst_data.values():
4441 if inst.primary_node in node_to_primary:
4442 node_to_primary[inst.primary_node].add(inst.name)
4443 for secnode in inst.secondary_nodes:
4444 if secnode in node_to_secondary:
4445 node_to_secondary[secnode].add(inst.name)
4447 node_to_primary = None
4448 node_to_secondary = None
4450 if query.NQ_OOB in self.requested_data:
4451 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4452 for name, node in all_info.iteritems())
4456 if query.NQ_GROUP in self.requested_data:
4457 groups = lu.cfg.GetAllNodeGroupsInfo()
4461 return query.NodeQueryData([all_info[name] for name in nodenames],
4462 live_data, lu.cfg.GetMasterNode(),
4463 node_to_primary, node_to_secondary, groups,
4464 oob_support, lu.cfg.GetClusterInfo())
4467 class LUNodeQuery(NoHooksLU):
4468 """Logical unit for querying nodes.
4471 # pylint: disable=W0142
4474 def CheckArguments(self):
4475 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4476 self.op.output_fields, self.op.use_locking)
4478 def ExpandNames(self):
4479 self.nq.ExpandNames(self)
4481 def Exec(self, feedback_fn):
4482 return self.nq.OldStyleQuery(self)
4485 class LUNodeQueryvols(NoHooksLU):
4486 """Logical unit for getting volumes on node(s).
4490 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4491 _FIELDS_STATIC = utils.FieldSet("node")
4493 def CheckArguments(self):
4494 _CheckOutputFields(static=self._FIELDS_STATIC,
4495 dynamic=self._FIELDS_DYNAMIC,
4496 selected=self.op.output_fields)
4498 def ExpandNames(self):
4499 self.needed_locks = {}
4500 self.share_locks[locking.LEVEL_NODE] = 1
4501 if not self.op.nodes:
4502 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4504 self.needed_locks[locking.LEVEL_NODE] = \
4505 _GetWantedNodes(self, self.op.nodes)
4507 def Exec(self, feedback_fn):
4508 """Computes the list of nodes and their attributes.
4511 nodenames = self.owned_locks(locking.LEVEL_NODE)
4512 volumes = self.rpc.call_node_volumes(nodenames)
4514 ilist = self.cfg.GetAllInstancesInfo()
4515 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4518 for node in nodenames:
4519 nresult = volumes[node]
4522 msg = nresult.fail_msg
4524 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4527 node_vols = sorted(nresult.payload,
4528 key=operator.itemgetter("dev"))
4530 for vol in node_vols:
4532 for field in self.op.output_fields:
4535 elif field == "phys":
4539 elif field == "name":
4541 elif field == "size":
4542 val = int(float(vol["size"]))
4543 elif field == "instance":
4544 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4546 raise errors.ParameterError(field)
4547 node_output.append(str(val))
4549 output.append(node_output)
4554 class LUNodeQueryStorage(NoHooksLU):
4555 """Logical unit for getting information on storage units on node(s).
4558 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4561 def CheckArguments(self):
4562 _CheckOutputFields(static=self._FIELDS_STATIC,
4563 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4564 selected=self.op.output_fields)
4566 def ExpandNames(self):
4567 self.needed_locks = {}
4568 self.share_locks[locking.LEVEL_NODE] = 1
4571 self.needed_locks[locking.LEVEL_NODE] = \
4572 _GetWantedNodes(self, self.op.nodes)
4574 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4576 def Exec(self, feedback_fn):
4577 """Computes the list of nodes and their attributes.
4580 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4582 # Always get name to sort by
4583 if constants.SF_NAME in self.op.output_fields:
4584 fields = self.op.output_fields[:]
4586 fields = [constants.SF_NAME] + self.op.output_fields
4588 # Never ask for node or type as it's only known to the LU
4589 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4590 while extra in fields:
4591 fields.remove(extra)
4593 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4594 name_idx = field_idx[constants.SF_NAME]
4596 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4597 data = self.rpc.call_storage_list(self.nodes,
4598 self.op.storage_type, st_args,
4599 self.op.name, fields)
4603 for node in utils.NiceSort(self.nodes):
4604 nresult = data[node]
4608 msg = nresult.fail_msg
4610 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4613 rows = dict([(row[name_idx], row) for row in nresult.payload])
4615 for name in utils.NiceSort(rows.keys()):
4620 for field in self.op.output_fields:
4621 if field == constants.SF_NODE:
4623 elif field == constants.SF_TYPE:
4624 val = self.op.storage_type
4625 elif field in field_idx:
4626 val = row[field_idx[field]]
4628 raise errors.ParameterError(field)
4637 class _InstanceQuery(_QueryBase):
4638 FIELDS = query.INSTANCE_FIELDS
4640 def ExpandNames(self, lu):
4641 lu.needed_locks = {}
4642 lu.share_locks = _ShareAll()
4645 self.wanted = _GetWantedInstances(lu, self.names)
4647 self.wanted = locking.ALL_SET
4649 self.do_locking = (self.use_locking and
4650 query.IQ_LIVE in self.requested_data)
4652 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4653 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4654 lu.needed_locks[locking.LEVEL_NODE] = []
4655 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4657 self.do_grouplocks = (self.do_locking and
4658 query.IQ_NODES in self.requested_data)
4660 def DeclareLocks(self, lu, level):
4662 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4663 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4665 # Lock all groups used by instances optimistically; this requires going
4666 # via the node before it's locked, requiring verification later on
4667 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4669 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4670 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4671 elif level == locking.LEVEL_NODE:
4672 lu._LockInstancesNodes() # pylint: disable=W0212
4675 def _CheckGroupLocks(lu):
4676 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4677 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4679 # Check if node groups for locked instances are still correct
4680 for instance_name in owned_instances:
4681 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4683 def _GetQueryData(self, lu):
4684 """Computes the list of instances and their attributes.
4687 if self.do_grouplocks:
4688 self._CheckGroupLocks(lu)
4690 cluster = lu.cfg.GetClusterInfo()
4691 all_info = lu.cfg.GetAllInstancesInfo()
4693 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4695 instance_list = [all_info[name] for name in instance_names]
4696 nodes = frozenset(itertools.chain(*(inst.all_nodes
4697 for inst in instance_list)))
4698 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4701 wrongnode_inst = set()
4703 # Gather data as requested
4704 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4706 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4708 result = node_data[name]
4710 # offline nodes will be in both lists
4711 assert result.fail_msg
4712 offline_nodes.append(name)
4714 bad_nodes.append(name)
4715 elif result.payload:
4716 for inst in result.payload:
4717 if inst in all_info:
4718 if all_info[inst].primary_node == name:
4719 live_data.update(result.payload)
4721 wrongnode_inst.add(inst)
4723 # orphan instance; we don't list it here as we don't
4724 # handle this case yet in the output of instance listing
4725 logging.warning("Orphan instance '%s' found on node %s",
4727 # else no instance is alive
4731 if query.IQ_DISKUSAGE in self.requested_data:
4732 disk_usage = dict((inst.name,
4733 _ComputeDiskSize(inst.disk_template,
4734 [{constants.IDISK_SIZE: disk.size}
4735 for disk in inst.disks]))
4736 for inst in instance_list)
4740 if query.IQ_CONSOLE in self.requested_data:
4742 for inst in instance_list:
4743 if inst.name in live_data:
4744 # Instance is running
4745 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4747 consinfo[inst.name] = None
4748 assert set(consinfo.keys()) == set(instance_names)
4752 if query.IQ_NODES in self.requested_data:
4753 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4755 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4756 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4757 for uuid in set(map(operator.attrgetter("group"),
4763 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4764 disk_usage, offline_nodes, bad_nodes,
4765 live_data, wrongnode_inst, consinfo,
4769 class LUQuery(NoHooksLU):
4770 """Query for resources/items of a certain kind.
4773 # pylint: disable=W0142
4776 def CheckArguments(self):
4777 qcls = _GetQueryImplementation(self.op.what)
4779 self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4781 def ExpandNames(self):
4782 self.impl.ExpandNames(self)
4784 def DeclareLocks(self, level):
4785 self.impl.DeclareLocks(self, level)
4787 def Exec(self, feedback_fn):
4788 return self.impl.NewStyleQuery(self)
4791 class LUQueryFields(NoHooksLU):
4792 """Query for resources/items of a certain kind.
4795 # pylint: disable=W0142
4798 def CheckArguments(self):
4799 self.qcls = _GetQueryImplementation(self.op.what)
4801 def ExpandNames(self):
4802 self.needed_locks = {}
4804 def Exec(self, feedback_fn):
4805 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4808 class LUNodeModifyStorage(NoHooksLU):
4809 """Logical unit for modifying a storage volume on a node.
4814 def CheckArguments(self):
4815 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4817 storage_type = self.op.storage_type
4820 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4822 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4823 " modified" % storage_type,
4826 diff = set(self.op.changes.keys()) - modifiable
4828 raise errors.OpPrereqError("The following fields can not be modified for"
4829 " storage units of type '%s': %r" %
4830 (storage_type, list(diff)),
4833 def ExpandNames(self):
4834 self.needed_locks = {
4835 locking.LEVEL_NODE: self.op.node_name,
4838 def Exec(self, feedback_fn):
4839 """Computes the list of nodes and their attributes.
4842 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4843 result = self.rpc.call_storage_modify(self.op.node_name,
4844 self.op.storage_type, st_args,
4845 self.op.name, self.op.changes)
4846 result.Raise("Failed to modify storage unit '%s' on %s" %
4847 (self.op.name, self.op.node_name))
4850 class LUNodeAdd(LogicalUnit):
4851 """Logical unit for adding node to the cluster.
4855 HTYPE = constants.HTYPE_NODE
4856 _NFLAGS = ["master_capable", "vm_capable"]
4858 def CheckArguments(self):
4859 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4860 # validate/normalize the node name
4861 self.hostname = netutils.GetHostname(name=self.op.node_name,
4862 family=self.primary_ip_family)
4863 self.op.node_name = self.hostname.name
4865 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4866 raise errors.OpPrereqError("Cannot readd the master node",
4869 if self.op.readd and self.op.group:
4870 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4871 " being readded", errors.ECODE_INVAL)
4873 def BuildHooksEnv(self):
4876 This will run on all nodes before, and on all nodes + the new node after.
4880 "OP_TARGET": self.op.node_name,
4881 "NODE_NAME": self.op.node_name,
4882 "NODE_PIP": self.op.primary_ip,
4883 "NODE_SIP": self.op.secondary_ip,
4884 "MASTER_CAPABLE": str(self.op.master_capable),
4885 "VM_CAPABLE": str(self.op.vm_capable),
4888 def BuildHooksNodes(self):
4889 """Build hooks nodes.
4892 # Exclude added node
4893 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4894 post_nodes = pre_nodes + [self.op.node_name, ]
4896 return (pre_nodes, post_nodes)
4898 def CheckPrereq(self):
4899 """Check prerequisites.
4902 - the new node is not already in the config
4904 - its parameters (single/dual homed) matches the cluster
4906 Any errors are signaled by raising errors.OpPrereqError.
4910 hostname = self.hostname
4911 node = hostname.name
4912 primary_ip = self.op.primary_ip = hostname.ip
4913 if self.op.secondary_ip is None:
4914 if self.primary_ip_family == netutils.IP6Address.family:
4915 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4916 " IPv4 address must be given as secondary",
4918 self.op.secondary_ip = primary_ip
4920 secondary_ip = self.op.secondary_ip
4921 if not netutils.IP4Address.IsValid(secondary_ip):
4922 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4923 " address" % secondary_ip, errors.ECODE_INVAL)
4925 node_list = cfg.GetNodeList()
4926 if not self.op.readd and node in node_list:
4927 raise errors.OpPrereqError("Node %s is already in the configuration" %
4928 node, errors.ECODE_EXISTS)
4929 elif self.op.readd and node not in node_list:
4930 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4933 self.changed_primary_ip = False
4935 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4936 if self.op.readd and node == existing_node_name:
4937 if existing_node.secondary_ip != secondary_ip:
4938 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4939 " address configuration as before",
4941 if existing_node.primary_ip != primary_ip:
4942 self.changed_primary_ip = True
4946 if (existing_node.primary_ip == primary_ip or
4947 existing_node.secondary_ip == primary_ip or
4948 existing_node.primary_ip == secondary_ip or
4949 existing_node.secondary_ip == secondary_ip):
4950 raise errors.OpPrereqError("New node ip address(es) conflict with"
4951 " existing node %s" % existing_node.name,
4952 errors.ECODE_NOTUNIQUE)
4954 # After this 'if' block, None is no longer a valid value for the
4955 # _capable op attributes
4957 old_node = self.cfg.GetNodeInfo(node)
4958 assert old_node is not None, "Can't retrieve locked node %s" % node
4959 for attr in self._NFLAGS:
4960 if getattr(self.op, attr) is None:
4961 setattr(self.op, attr, getattr(old_node, attr))
4963 for attr in self._NFLAGS:
4964 if getattr(self.op, attr) is None:
4965 setattr(self.op, attr, True)
4967 if self.op.readd and not self.op.vm_capable:
4968 pri, sec = cfg.GetNodeInstances(node)
4970 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4971 " flag set to false, but it already holds"
4972 " instances" % node,
4975 # check that the type of the node (single versus dual homed) is the
4976 # same as for the master
4977 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4978 master_singlehomed = myself.secondary_ip == myself.primary_ip
4979 newbie_singlehomed = secondary_ip == primary_ip
4980 if master_singlehomed != newbie_singlehomed:
4981 if master_singlehomed:
4982 raise errors.OpPrereqError("The master has no secondary ip but the"
4983 " new node has one",
4986 raise errors.OpPrereqError("The master has a secondary ip but the"
4987 " new node doesn't have one",
4990 # checks reachability
4991 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4992 raise errors.OpPrereqError("Node not reachable by ping",
4993 errors.ECODE_ENVIRON)
4995 if not newbie_singlehomed:
4996 # check reachability from my secondary ip to newbie's secondary ip
4997 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4998 source=myself.secondary_ip):
4999 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5000 " based ping to node daemon port",
5001 errors.ECODE_ENVIRON)
5008 if self.op.master_capable:
5009 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5011 self.master_candidate = False
5014 self.new_node = old_node
5016 node_group = cfg.LookupNodeGroup(self.op.group)
5017 self.new_node = objects.Node(name=node,
5018 primary_ip=primary_ip,
5019 secondary_ip=secondary_ip,
5020 master_candidate=self.master_candidate,
5021 offline=False, drained=False,
5024 if self.op.ndparams:
5025 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5027 def Exec(self, feedback_fn):
5028 """Adds the new node to the cluster.
5031 new_node = self.new_node
5032 node = new_node.name
5034 # We adding a new node so we assume it's powered
5035 new_node.powered = True
5037 # for re-adds, reset the offline/drained/master-candidate flags;
5038 # we need to reset here, otherwise offline would prevent RPC calls
5039 # later in the procedure; this also means that if the re-add
5040 # fails, we are left with a non-offlined, broken node
5042 new_node.drained = new_node.offline = False # pylint: disable=W0201
5043 self.LogInfo("Readding a node, the offline/drained flags were reset")
5044 # if we demote the node, we do cleanup later in the procedure
5045 new_node.master_candidate = self.master_candidate
5046 if self.changed_primary_ip:
5047 new_node.primary_ip = self.op.primary_ip
5049 # copy the master/vm_capable flags
5050 for attr in self._NFLAGS:
5051 setattr(new_node, attr, getattr(self.op, attr))
5053 # notify the user about any possible mc promotion
5054 if new_node.master_candidate:
5055 self.LogInfo("Node will be a master candidate")
5057 if self.op.ndparams:
5058 new_node.ndparams = self.op.ndparams
5060 new_node.ndparams = {}
5062 # check connectivity
5063 result = self.rpc.call_version([node])[node]
5064 result.Raise("Can't get version information from node %s" % node)
5065 if constants.PROTOCOL_VERSION == result.payload:
5066 logging.info("Communication to node %s fine, sw version %s match",
5067 node, result.payload)
5069 raise errors.OpExecError("Version mismatch master version %s,"
5070 " node version %s" %
5071 (constants.PROTOCOL_VERSION, result.payload))
5073 # Add node to our /etc/hosts, and add key to known_hosts
5074 if self.cfg.GetClusterInfo().modify_etc_hosts:
5075 master_node = self.cfg.GetMasterNode()
5076 result = self.rpc.call_etc_hosts_modify(master_node,
5077 constants.ETC_HOSTS_ADD,
5080 result.Raise("Can't update hosts file with new host data")
5082 if new_node.secondary_ip != new_node.primary_ip:
5083 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5086 node_verify_list = [self.cfg.GetMasterNode()]
5087 node_verify_param = {
5088 constants.NV_NODELIST: ([node], {}),
5089 # TODO: do a node-net-test as well?
5092 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5093 self.cfg.GetClusterName())
5094 for verifier in node_verify_list:
5095 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5096 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5098 for failed in nl_payload:
5099 feedback_fn("ssh/hostname verification failed"
5100 " (checking from %s): %s" %
5101 (verifier, nl_payload[failed]))
5102 raise errors.OpExecError("ssh/hostname verification failed")
5105 _RedistributeAncillaryFiles(self)
5106 self.context.ReaddNode(new_node)
5107 # make sure we redistribute the config
5108 self.cfg.Update(new_node, feedback_fn)
5109 # and make sure the new node will not have old files around
5110 if not new_node.master_candidate:
5111 result = self.rpc.call_node_demote_from_mc(new_node.name)
5112 msg = result.fail_msg
5114 self.LogWarning("Node failed to demote itself from master"
5115 " candidate status: %s" % msg)
5117 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5118 additional_vm=self.op.vm_capable)
5119 self.context.AddNode(new_node, self.proc.GetECId())
5122 class LUNodeSetParams(LogicalUnit):
5123 """Modifies the parameters of a node.
5125 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5126 to the node role (as _ROLE_*)
5127 @cvar _R2F: a dictionary from node role to tuples of flags
5128 @cvar _FLAGS: a list of attribute names corresponding to the flags
5131 HPATH = "node-modify"
5132 HTYPE = constants.HTYPE_NODE
5134 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5136 (True, False, False): _ROLE_CANDIDATE,
5137 (False, True, False): _ROLE_DRAINED,
5138 (False, False, True): _ROLE_OFFLINE,
5139 (False, False, False): _ROLE_REGULAR,
5141 _R2F = dict((v, k) for k, v in _F2R.items())
5142 _FLAGS = ["master_candidate", "drained", "offline"]
5144 def CheckArguments(self):
5145 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5146 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5147 self.op.master_capable, self.op.vm_capable,
5148 self.op.secondary_ip, self.op.ndparams]
5149 if all_mods.count(None) == len(all_mods):
5150 raise errors.OpPrereqError("Please pass at least one modification",
5152 if all_mods.count(True) > 1:
5153 raise errors.OpPrereqError("Can't set the node into more than one"
5154 " state at the same time",
5157 # Boolean value that tells us whether we might be demoting from MC
5158 self.might_demote = (self.op.master_candidate == False or
5159 self.op.offline == True or
5160 self.op.drained == True or
5161 self.op.master_capable == False)
5163 if self.op.secondary_ip:
5164 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5165 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5166 " address" % self.op.secondary_ip,
5169 self.lock_all = self.op.auto_promote and self.might_demote
5170 self.lock_instances = self.op.secondary_ip is not None
5172 def ExpandNames(self):
5174 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5176 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5178 if self.lock_instances:
5179 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5181 def DeclareLocks(self, level):
5182 # If we have locked all instances, before waiting to lock nodes, release
5183 # all the ones living on nodes unrelated to the current operation.
5184 if level == locking.LEVEL_NODE and self.lock_instances:
5185 self.affected_instances = []
5186 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5189 # Build list of instances to release
5190 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5191 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5192 if (instance.disk_template in constants.DTS_INT_MIRROR and
5193 self.op.node_name in instance.all_nodes):
5194 instances_keep.append(instance_name)
5195 self.affected_instances.append(instance)
5197 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5199 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5200 set(instances_keep))
5202 def BuildHooksEnv(self):
5205 This runs on the master node.
5209 "OP_TARGET": self.op.node_name,
5210 "MASTER_CANDIDATE": str(self.op.master_candidate),
5211 "OFFLINE": str(self.op.offline),
5212 "DRAINED": str(self.op.drained),
5213 "MASTER_CAPABLE": str(self.op.master_capable),
5214 "VM_CAPABLE": str(self.op.vm_capable),
5217 def BuildHooksNodes(self):
5218 """Build hooks nodes.
5221 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5224 def CheckPrereq(self):
5225 """Check prerequisites.
5227 This only checks the instance list against the existing names.
5230 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5232 if (self.op.master_candidate is not None or
5233 self.op.drained is not None or
5234 self.op.offline is not None):
5235 # we can't change the master's node flags
5236 if self.op.node_name == self.cfg.GetMasterNode():
5237 raise errors.OpPrereqError("The master role can be changed"
5238 " only via master-failover",
5241 if self.op.master_candidate and not node.master_capable:
5242 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5243 " it a master candidate" % node.name,
5246 if self.op.vm_capable == False:
5247 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5249 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5250 " the vm_capable flag" % node.name,
5253 if node.master_candidate and self.might_demote and not self.lock_all:
5254 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5255 # check if after removing the current node, we're missing master
5257 (mc_remaining, mc_should, _) = \
5258 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5259 if mc_remaining < mc_should:
5260 raise errors.OpPrereqError("Not enough master candidates, please"
5261 " pass auto promote option to allow"
5262 " promotion", errors.ECODE_STATE)
5264 self.old_flags = old_flags = (node.master_candidate,
5265 node.drained, node.offline)
5266 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5267 self.old_role = old_role = self._F2R[old_flags]
5269 # Check for ineffective changes
5270 for attr in self._FLAGS:
5271 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5272 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5273 setattr(self.op, attr, None)
5275 # Past this point, any flag change to False means a transition
5276 # away from the respective state, as only real changes are kept
5278 # TODO: We might query the real power state if it supports OOB
5279 if _SupportsOob(self.cfg, node):
5280 if self.op.offline is False and not (node.powered or
5281 self.op.powered == True):
5282 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5283 " offline status can be reset") %
5285 elif self.op.powered is not None:
5286 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5287 " as it does not support out-of-band"
5288 " handling") % self.op.node_name)
5290 # If we're being deofflined/drained, we'll MC ourself if needed
5291 if (self.op.drained == False or self.op.offline == False or
5292 (self.op.master_capable and not node.master_capable)):
5293 if _DecideSelfPromotion(self):
5294 self.op.master_candidate = True
5295 self.LogInfo("Auto-promoting node to master candidate")
5297 # If we're no longer master capable, we'll demote ourselves from MC
5298 if self.op.master_capable == False and node.master_candidate:
5299 self.LogInfo("Demoting from master candidate")
5300 self.op.master_candidate = False
5303 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5304 if self.op.master_candidate:
5305 new_role = self._ROLE_CANDIDATE
5306 elif self.op.drained:
5307 new_role = self._ROLE_DRAINED
5308 elif self.op.offline:
5309 new_role = self._ROLE_OFFLINE
5310 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5311 # False is still in new flags, which means we're un-setting (the
5313 new_role = self._ROLE_REGULAR
5314 else: # no new flags, nothing, keep old role
5317 self.new_role = new_role
5319 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5320 # Trying to transition out of offline status
5321 result = self.rpc.call_version([node.name])[node.name]
5323 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5324 " to report its version: %s" %
5325 (node.name, result.fail_msg),
5328 self.LogWarning("Transitioning node from offline to online state"
5329 " without using re-add. Please make sure the node"
5332 if self.op.secondary_ip:
5333 # Ok even without locking, because this can't be changed by any LU
5334 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5335 master_singlehomed = master.secondary_ip == master.primary_ip
5336 if master_singlehomed and self.op.secondary_ip:
5337 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5338 " homed cluster", errors.ECODE_INVAL)
5341 if self.affected_instances:
5342 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5343 " node has instances (%s) configured"
5344 " to use it" % self.affected_instances)
5346 # On online nodes, check that no instances are running, and that
5347 # the node has the new ip and we can reach it.
5348 for instance in self.affected_instances:
5349 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5351 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5352 if master.name != node.name:
5353 # check reachability from master secondary ip to new secondary ip
5354 if not netutils.TcpPing(self.op.secondary_ip,
5355 constants.DEFAULT_NODED_PORT,
5356 source=master.secondary_ip):
5357 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5358 " based ping to node daemon port",
5359 errors.ECODE_ENVIRON)
5361 if self.op.ndparams:
5362 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5363 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5364 self.new_ndparams = new_ndparams
5366 def Exec(self, feedback_fn):
5371 old_role = self.old_role
5372 new_role = self.new_role
5376 if self.op.ndparams:
5377 node.ndparams = self.new_ndparams
5379 if self.op.powered is not None:
5380 node.powered = self.op.powered
5382 for attr in ["master_capable", "vm_capable"]:
5383 val = getattr(self.op, attr)
5385 setattr(node, attr, val)
5386 result.append((attr, str(val)))
5388 if new_role != old_role:
5389 # Tell the node to demote itself, if no longer MC and not offline
5390 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5391 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5393 self.LogWarning("Node failed to demote itself: %s", msg)
5395 new_flags = self._R2F[new_role]
5396 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5398 result.append((desc, str(nf)))
5399 (node.master_candidate, node.drained, node.offline) = new_flags
5401 # we locked all nodes, we adjust the CP before updating this node
5403 _AdjustCandidatePool(self, [node.name])
5405 if self.op.secondary_ip:
5406 node.secondary_ip = self.op.secondary_ip
5407 result.append(("secondary_ip", self.op.secondary_ip))
5409 # this will trigger configuration file update, if needed
5410 self.cfg.Update(node, feedback_fn)
5412 # this will trigger job queue propagation or cleanup if the mc
5414 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5415 self.context.ReaddNode(node)
5420 class LUNodePowercycle(NoHooksLU):
5421 """Powercycles a node.
5426 def CheckArguments(self):
5427 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5428 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5429 raise errors.OpPrereqError("The node is the master and the force"
5430 " parameter was not set",
5433 def ExpandNames(self):
5434 """Locking for PowercycleNode.
5436 This is a last-resort option and shouldn't block on other
5437 jobs. Therefore, we grab no locks.
5440 self.needed_locks = {}
5442 def Exec(self, feedback_fn):
5446 result = self.rpc.call_node_powercycle(self.op.node_name,
5447 self.cfg.GetHypervisorType())
5448 result.Raise("Failed to schedule the reboot")
5449 return result.payload
5452 class LUClusterQuery(NoHooksLU):
5453 """Query cluster configuration.
5458 def ExpandNames(self):
5459 self.needed_locks = {}
5461 def Exec(self, feedback_fn):
5462 """Return cluster config.
5465 cluster = self.cfg.GetClusterInfo()
5468 # Filter just for enabled hypervisors
5469 for os_name, hv_dict in cluster.os_hvp.items():
5470 os_hvp[os_name] = {}
5471 for hv_name, hv_params in hv_dict.items():
5472 if hv_name in cluster.enabled_hypervisors:
5473 os_hvp[os_name][hv_name] = hv_params
5475 # Convert ip_family to ip_version
5476 primary_ip_version = constants.IP4_VERSION
5477 if cluster.primary_ip_family == netutils.IP6Address.family:
5478 primary_ip_version = constants.IP6_VERSION
5481 "software_version": constants.RELEASE_VERSION,
5482 "protocol_version": constants.PROTOCOL_VERSION,
5483 "config_version": constants.CONFIG_VERSION,
5484 "os_api_version": max(constants.OS_API_VERSIONS),
5485 "export_version": constants.EXPORT_VERSION,
5486 "architecture": (platform.architecture()[0], platform.machine()),
5487 "name": cluster.cluster_name,
5488 "master": cluster.master_node,
5489 "default_hypervisor": cluster.enabled_hypervisors[0],
5490 "enabled_hypervisors": cluster.enabled_hypervisors,
5491 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5492 for hypervisor_name in cluster.enabled_hypervisors]),
5494 "beparams": cluster.beparams,
5495 "osparams": cluster.osparams,
5496 "nicparams": cluster.nicparams,
5497 "ndparams": cluster.ndparams,
5498 "candidate_pool_size": cluster.candidate_pool_size,
5499 "master_netdev": cluster.master_netdev,
5500 "volume_group_name": cluster.volume_group_name,
5501 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5502 "file_storage_dir": cluster.file_storage_dir,
5503 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5504 "maintain_node_health": cluster.maintain_node_health,
5505 "ctime": cluster.ctime,
5506 "mtime": cluster.mtime,
5507 "uuid": cluster.uuid,
5508 "tags": list(cluster.GetTags()),
5509 "uid_pool": cluster.uid_pool,
5510 "default_iallocator": cluster.default_iallocator,
5511 "reserved_lvs": cluster.reserved_lvs,
5512 "primary_ip_version": primary_ip_version,
5513 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5514 "hidden_os": cluster.hidden_os,
5515 "blacklisted_os": cluster.blacklisted_os,
5521 class LUClusterConfigQuery(NoHooksLU):
5522 """Return configuration values.
5526 _FIELDS_DYNAMIC = utils.FieldSet()
5527 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5528 "watcher_pause", "volume_group_name")
5530 def CheckArguments(self):
5531 _CheckOutputFields(static=self._FIELDS_STATIC,
5532 dynamic=self._FIELDS_DYNAMIC,
5533 selected=self.op.output_fields)
5535 def ExpandNames(self):
5536 self.needed_locks = {}
5538 def Exec(self, feedback_fn):
5539 """Dump a representation of the cluster config to the standard output.
5543 for field in self.op.output_fields:
5544 if field == "cluster_name":
5545 entry = self.cfg.GetClusterName()
5546 elif field == "master_node":
5547 entry = self.cfg.GetMasterNode()
5548 elif field == "drain_flag":
5549 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5550 elif field == "watcher_pause":
5551 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5552 elif field == "volume_group_name":
5553 entry = self.cfg.GetVGName()
5555 raise errors.ParameterError(field)
5556 values.append(entry)
5560 class LUInstanceActivateDisks(NoHooksLU):
5561 """Bring up an instance's disks.
5566 def ExpandNames(self):
5567 self._ExpandAndLockInstance()
5568 self.needed_locks[locking.LEVEL_NODE] = []
5569 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5571 def DeclareLocks(self, level):
5572 if level == locking.LEVEL_NODE:
5573 self._LockInstancesNodes()
5575 def CheckPrereq(self):
5576 """Check prerequisites.
5578 This checks that the instance is in the cluster.
5581 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5582 assert self.instance is not None, \
5583 "Cannot retrieve locked instance %s" % self.op.instance_name
5584 _CheckNodeOnline(self, self.instance.primary_node)
5586 def Exec(self, feedback_fn):
5587 """Activate the disks.
5590 disks_ok, disks_info = \
5591 _AssembleInstanceDisks(self, self.instance,
5592 ignore_size=self.op.ignore_size)
5594 raise errors.OpExecError("Cannot activate block devices")
5599 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5601 """Prepare the block devices for an instance.
5603 This sets up the block devices on all nodes.
5605 @type lu: L{LogicalUnit}
5606 @param lu: the logical unit on whose behalf we execute
5607 @type instance: L{objects.Instance}
5608 @param instance: the instance for whose disks we assemble
5609 @type disks: list of L{objects.Disk} or None
5610 @param disks: which disks to assemble (or all, if None)
5611 @type ignore_secondaries: boolean
5612 @param ignore_secondaries: if true, errors on secondary nodes
5613 won't result in an error return from the function
5614 @type ignore_size: boolean
5615 @param ignore_size: if true, the current known size of the disk
5616 will not be used during the disk activation, useful for cases
5617 when the size is wrong
5618 @return: False if the operation failed, otherwise a list of
5619 (host, instance_visible_name, node_visible_name)
5620 with the mapping from node devices to instance devices
5625 iname = instance.name
5626 disks = _ExpandCheckDisks(instance, disks)
5628 # With the two passes mechanism we try to reduce the window of
5629 # opportunity for the race condition of switching DRBD to primary
5630 # before handshaking occured, but we do not eliminate it
5632 # The proper fix would be to wait (with some limits) until the
5633 # connection has been made and drbd transitions from WFConnection
5634 # into any other network-connected state (Connected, SyncTarget,
5637 # 1st pass, assemble on all nodes in secondary mode
5638 for idx, inst_disk in enumerate(disks):
5639 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5641 node_disk = node_disk.Copy()
5642 node_disk.UnsetSize()
5643 lu.cfg.SetDiskID(node_disk, node)
5644 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5645 msg = result.fail_msg
5647 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5648 " (is_primary=False, pass=1): %s",
5649 inst_disk.iv_name, node, msg)
5650 if not ignore_secondaries:
5653 # FIXME: race condition on drbd migration to primary
5655 # 2nd pass, do only the primary node
5656 for idx, inst_disk in enumerate(disks):
5659 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5660 if node != instance.primary_node:
5663 node_disk = node_disk.Copy()
5664 node_disk.UnsetSize()
5665 lu.cfg.SetDiskID(node_disk, node)
5666 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5667 msg = result.fail_msg
5669 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5670 " (is_primary=True, pass=2): %s",
5671 inst_disk.iv_name, node, msg)
5674 dev_path = result.payload
5676 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5678 # leave the disks configured for the primary node
5679 # this is a workaround that would be fixed better by
5680 # improving the logical/physical id handling
5682 lu.cfg.SetDiskID(disk, instance.primary_node)
5684 return disks_ok, device_info
5687 def _StartInstanceDisks(lu, instance, force):
5688 """Start the disks of an instance.
5691 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5692 ignore_secondaries=force)
5694 _ShutdownInstanceDisks(lu, instance)
5695 if force is not None and not force:
5696 lu.proc.LogWarning("", hint="If the message above refers to a"
5698 " you can retry the operation using '--force'.")
5699 raise errors.OpExecError("Disk consistency error")
5702 class LUInstanceDeactivateDisks(NoHooksLU):
5703 """Shutdown an instance's disks.
5708 def ExpandNames(self):
5709 self._ExpandAndLockInstance()
5710 self.needed_locks[locking.LEVEL_NODE] = []
5711 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5713 def DeclareLocks(self, level):
5714 if level == locking.LEVEL_NODE:
5715 self._LockInstancesNodes()
5717 def CheckPrereq(self):
5718 """Check prerequisites.
5720 This checks that the instance is in the cluster.
5723 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5724 assert self.instance is not None, \
5725 "Cannot retrieve locked instance %s" % self.op.instance_name
5727 def Exec(self, feedback_fn):
5728 """Deactivate the disks
5731 instance = self.instance
5733 _ShutdownInstanceDisks(self, instance)
5735 _SafeShutdownInstanceDisks(self, instance)
5738 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5739 """Shutdown block devices of an instance.
5741 This function checks if an instance is running, before calling
5742 _ShutdownInstanceDisks.
5745 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5746 _ShutdownInstanceDisks(lu, instance, disks=disks)
5749 def _ExpandCheckDisks(instance, disks):
5750 """Return the instance disks selected by the disks list
5752 @type disks: list of L{objects.Disk} or None
5753 @param disks: selected disks
5754 @rtype: list of L{objects.Disk}
5755 @return: selected instance disks to act on
5759 return instance.disks
5761 if not set(disks).issubset(instance.disks):
5762 raise errors.ProgrammerError("Can only act on disks belonging to the"
5767 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5768 """Shutdown block devices of an instance.
5770 This does the shutdown on all nodes of the instance.
5772 If the ignore_primary is false, errors on the primary node are
5777 disks = _ExpandCheckDisks(instance, disks)
5780 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5781 lu.cfg.SetDiskID(top_disk, node)
5782 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5783 msg = result.fail_msg
5785 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5786 disk.iv_name, node, msg)
5787 if ((node == instance.primary_node and not ignore_primary) or
5788 (node != instance.primary_node and not result.offline)):
5793 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5794 """Checks if a node has enough free memory.
5796 This function check if a given node has the needed amount of free
5797 memory. In case the node has less memory or we cannot get the
5798 information from the node, this function raise an OpPrereqError
5801 @type lu: C{LogicalUnit}
5802 @param lu: a logical unit from which we get configuration data
5804 @param node: the node to check
5805 @type reason: C{str}
5806 @param reason: string to use in the error message
5807 @type requested: C{int}
5808 @param requested: the amount of memory in MiB to check for
5809 @type hypervisor_name: C{str}
5810 @param hypervisor_name: the hypervisor to ask for memory stats
5811 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5812 we cannot check the node
5815 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5816 nodeinfo[node].Raise("Can't get data from node %s" % node,
5817 prereq=True, ecode=errors.ECODE_ENVIRON)
5818 free_mem = nodeinfo[node].payload.get("memory_free", None)
5819 if not isinstance(free_mem, int):
5820 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5821 " was '%s'" % (node, free_mem),
5822 errors.ECODE_ENVIRON)
5823 if requested > free_mem:
5824 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5825 " needed %s MiB, available %s MiB" %
5826 (node, reason, requested, free_mem),
5830 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5831 """Checks if nodes have enough free disk space in the all VGs.
5833 This function check if all given nodes have the needed amount of
5834 free disk. In case any node has less disk or we cannot get the
5835 information from the node, this function raise an OpPrereqError
5838 @type lu: C{LogicalUnit}
5839 @param lu: a logical unit from which we get configuration data
5840 @type nodenames: C{list}
5841 @param nodenames: the list of node names to check
5842 @type req_sizes: C{dict}
5843 @param req_sizes: the hash of vg and corresponding amount of disk in
5845 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5846 or we cannot check the node
5849 for vg, req_size in req_sizes.items():
5850 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5853 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5854 """Checks if nodes have enough free disk space in the specified VG.
5856 This function check if all given nodes have the needed amount of
5857 free disk. In case any node has less disk or we cannot get the
5858 information from the node, this function raise an OpPrereqError
5861 @type lu: C{LogicalUnit}
5862 @param lu: a logical unit from which we get configuration data
5863 @type nodenames: C{list}
5864 @param nodenames: the list of node names to check
5866 @param vg: the volume group to check
5867 @type requested: C{int}
5868 @param requested: the amount of disk in MiB to check for
5869 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5870 or we cannot check the node
5873 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5874 for node in nodenames:
5875 info = nodeinfo[node]
5876 info.Raise("Cannot get current information from node %s" % node,
5877 prereq=True, ecode=errors.ECODE_ENVIRON)
5878 vg_free = info.payload.get("vg_free", None)
5879 if not isinstance(vg_free, int):
5880 raise errors.OpPrereqError("Can't compute free disk space on node"
5881 " %s for vg %s, result was '%s'" %
5882 (node, vg, vg_free), errors.ECODE_ENVIRON)
5883 if requested > vg_free:
5884 raise errors.OpPrereqError("Not enough disk space on target node %s"
5885 " vg %s: required %d MiB, available %d MiB" %
5886 (node, vg, requested, vg_free),
5890 class LUInstanceStartup(LogicalUnit):
5891 """Starts an instance.
5894 HPATH = "instance-start"
5895 HTYPE = constants.HTYPE_INSTANCE
5898 def CheckArguments(self):
5900 if self.op.beparams:
5901 # fill the beparams dict
5902 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5904 def ExpandNames(self):
5905 self._ExpandAndLockInstance()
5907 def BuildHooksEnv(self):
5910 This runs on master, primary and secondary nodes of the instance.
5914 "FORCE": self.op.force,
5917 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5921 def BuildHooksNodes(self):
5922 """Build hooks nodes.
5925 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5928 def CheckPrereq(self):
5929 """Check prerequisites.
5931 This checks that the instance is in the cluster.
5934 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5935 assert self.instance is not None, \
5936 "Cannot retrieve locked instance %s" % self.op.instance_name
5939 if self.op.hvparams:
5940 # check hypervisor parameter syntax (locally)
5941 cluster = self.cfg.GetClusterInfo()
5942 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5943 filled_hvp = cluster.FillHV(instance)
5944 filled_hvp.update(self.op.hvparams)
5945 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5946 hv_type.CheckParameterSyntax(filled_hvp)
5947 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5949 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5951 if self.primary_offline and self.op.ignore_offline_nodes:
5952 self.proc.LogWarning("Ignoring offline primary node")
5954 if self.op.hvparams or self.op.beparams:
5955 self.proc.LogWarning("Overridden parameters are ignored")
5957 _CheckNodeOnline(self, instance.primary_node)
5959 bep = self.cfg.GetClusterInfo().FillBE(instance)
5961 # check bridges existence
5962 _CheckInstanceBridgesExist(self, instance)
5964 remote_info = self.rpc.call_instance_info(instance.primary_node,
5966 instance.hypervisor)
5967 remote_info.Raise("Error checking node %s" % instance.primary_node,
5968 prereq=True, ecode=errors.ECODE_ENVIRON)
5969 if not remote_info.payload: # not running already
5970 _CheckNodeFreeMemory(self, instance.primary_node,
5971 "starting instance %s" % instance.name,
5972 bep[constants.BE_MEMORY], instance.hypervisor)
5974 def Exec(self, feedback_fn):
5975 """Start the instance.
5978 instance = self.instance
5979 force = self.op.force
5981 if not self.op.no_remember:
5982 self.cfg.MarkInstanceUp(instance.name)
5984 if self.primary_offline:
5985 assert self.op.ignore_offline_nodes
5986 self.proc.LogInfo("Primary node offline, marked instance as started")
5988 node_current = instance.primary_node
5990 _StartInstanceDisks(self, instance, force)
5992 result = self.rpc.call_instance_start(node_current, instance,
5993 self.op.hvparams, self.op.beparams,
5994 self.op.startup_paused)
5995 msg = result.fail_msg
5997 _ShutdownInstanceDisks(self, instance)
5998 raise errors.OpExecError("Could not start instance: %s" % msg)
6001 class LUInstanceReboot(LogicalUnit):
6002 """Reboot an instance.
6005 HPATH = "instance-reboot"
6006 HTYPE = constants.HTYPE_INSTANCE
6009 def ExpandNames(self):
6010 self._ExpandAndLockInstance()
6012 def BuildHooksEnv(self):
6015 This runs on master, primary and secondary nodes of the instance.
6019 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6020 "REBOOT_TYPE": self.op.reboot_type,
6021 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6024 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6028 def BuildHooksNodes(self):
6029 """Build hooks nodes.
6032 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6035 def CheckPrereq(self):
6036 """Check prerequisites.
6038 This checks that the instance is in the cluster.
6041 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6042 assert self.instance is not None, \
6043 "Cannot retrieve locked instance %s" % self.op.instance_name
6045 _CheckNodeOnline(self, instance.primary_node)
6047 # check bridges existence
6048 _CheckInstanceBridgesExist(self, instance)
6050 def Exec(self, feedback_fn):
6051 """Reboot the instance.
6054 instance = self.instance
6055 ignore_secondaries = self.op.ignore_secondaries
6056 reboot_type = self.op.reboot_type
6058 remote_info = self.rpc.call_instance_info(instance.primary_node,
6060 instance.hypervisor)
6061 remote_info.Raise("Error checking node %s" % instance.primary_node)
6062 instance_running = bool(remote_info.payload)
6064 node_current = instance.primary_node
6066 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6067 constants.INSTANCE_REBOOT_HARD]:
6068 for disk in instance.disks:
6069 self.cfg.SetDiskID(disk, node_current)
6070 result = self.rpc.call_instance_reboot(node_current, instance,
6072 self.op.shutdown_timeout)
6073 result.Raise("Could not reboot instance")
6075 if instance_running:
6076 result = self.rpc.call_instance_shutdown(node_current, instance,
6077 self.op.shutdown_timeout)
6078 result.Raise("Could not shutdown instance for full reboot")
6079 _ShutdownInstanceDisks(self, instance)
6081 self.LogInfo("Instance %s was already stopped, starting now",
6083 _StartInstanceDisks(self, instance, ignore_secondaries)
6084 result = self.rpc.call_instance_start(node_current, instance,
6086 msg = result.fail_msg
6088 _ShutdownInstanceDisks(self, instance)
6089 raise errors.OpExecError("Could not start instance for"
6090 " full reboot: %s" % msg)
6092 self.cfg.MarkInstanceUp(instance.name)
6095 class LUInstanceShutdown(LogicalUnit):
6096 """Shutdown an instance.
6099 HPATH = "instance-stop"
6100 HTYPE = constants.HTYPE_INSTANCE
6103 def ExpandNames(self):
6104 self._ExpandAndLockInstance()
6106 def BuildHooksEnv(self):
6109 This runs on master, primary and secondary nodes of the instance.
6112 env = _BuildInstanceHookEnvByObject(self, self.instance)
6113 env["TIMEOUT"] = self.op.timeout
6116 def BuildHooksNodes(self):
6117 """Build hooks nodes.
6120 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6123 def CheckPrereq(self):
6124 """Check prerequisites.
6126 This checks that the instance is in the cluster.
6129 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6130 assert self.instance is not None, \
6131 "Cannot retrieve locked instance %s" % self.op.instance_name
6133 self.primary_offline = \
6134 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6136 if self.primary_offline and self.op.ignore_offline_nodes:
6137 self.proc.LogWarning("Ignoring offline primary node")
6139 _CheckNodeOnline(self, self.instance.primary_node)
6141 def Exec(self, feedback_fn):
6142 """Shutdown the instance.
6145 instance = self.instance
6146 node_current = instance.primary_node
6147 timeout = self.op.timeout
6149 if not self.op.no_remember:
6150 self.cfg.MarkInstanceDown(instance.name)
6152 if self.primary_offline:
6153 assert self.op.ignore_offline_nodes
6154 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6156 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6157 msg = result.fail_msg
6159 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6161 _ShutdownInstanceDisks(self, instance)
6164 class LUInstanceReinstall(LogicalUnit):
6165 """Reinstall an instance.
6168 HPATH = "instance-reinstall"
6169 HTYPE = constants.HTYPE_INSTANCE
6172 def ExpandNames(self):
6173 self._ExpandAndLockInstance()
6175 def BuildHooksEnv(self):
6178 This runs on master, primary and secondary nodes of the instance.
6181 return _BuildInstanceHookEnvByObject(self, self.instance)
6183 def BuildHooksNodes(self):
6184 """Build hooks nodes.
6187 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6190 def CheckPrereq(self):
6191 """Check prerequisites.
6193 This checks that the instance is in the cluster and is not running.
6196 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6197 assert instance is not None, \
6198 "Cannot retrieve locked instance %s" % self.op.instance_name
6199 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6200 " offline, cannot reinstall")
6201 for node in instance.secondary_nodes:
6202 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6203 " cannot reinstall")
6205 if instance.disk_template == constants.DT_DISKLESS:
6206 raise errors.OpPrereqError("Instance '%s' has no disks" %
6207 self.op.instance_name,
6209 _CheckInstanceDown(self, instance, "cannot reinstall")
6211 if self.op.os_type is not None:
6213 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6214 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6215 instance_os = self.op.os_type
6217 instance_os = instance.os
6219 nodelist = list(instance.all_nodes)
6221 if self.op.osparams:
6222 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6223 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6224 self.os_inst = i_osdict # the new dict (without defaults)
6228 self.instance = instance
6230 def Exec(self, feedback_fn):
6231 """Reinstall the instance.
6234 inst = self.instance
6236 if self.op.os_type is not None:
6237 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6238 inst.os = self.op.os_type
6239 # Write to configuration
6240 self.cfg.Update(inst, feedback_fn)
6242 _StartInstanceDisks(self, inst, None)
6244 feedback_fn("Running the instance OS create scripts...")
6245 # FIXME: pass debug option from opcode to backend
6246 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6247 self.op.debug_level,
6248 osparams=self.os_inst)
6249 result.Raise("Could not install OS for instance %s on node %s" %
6250 (inst.name, inst.primary_node))
6252 _ShutdownInstanceDisks(self, inst)
6255 class LUInstanceRecreateDisks(LogicalUnit):
6256 """Recreate an instance's missing disks.
6259 HPATH = "instance-recreate-disks"
6260 HTYPE = constants.HTYPE_INSTANCE
6263 def CheckArguments(self):
6264 # normalise the disk list
6265 self.op.disks = sorted(frozenset(self.op.disks))
6267 def ExpandNames(self):
6268 self._ExpandAndLockInstance()
6269 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6271 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6272 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6274 self.needed_locks[locking.LEVEL_NODE] = []
6276 def DeclareLocks(self, level):
6277 if level == locking.LEVEL_NODE:
6278 # if we replace the nodes, we only need to lock the old primary,
6279 # otherwise we need to lock all nodes for disk re-creation
6280 primary_only = bool(self.op.nodes)
6281 self._LockInstancesNodes(primary_only=primary_only)
6283 def BuildHooksEnv(self):
6286 This runs on master, primary and secondary nodes of the instance.
6289 return _BuildInstanceHookEnvByObject(self, self.instance)
6291 def BuildHooksNodes(self):
6292 """Build hooks nodes.
6295 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6298 def CheckPrereq(self):
6299 """Check prerequisites.
6301 This checks that the instance is in the cluster and is not running.
6304 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6305 assert instance is not None, \
6306 "Cannot retrieve locked instance %s" % self.op.instance_name
6308 if len(self.op.nodes) != len(instance.all_nodes):
6309 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6310 " %d replacement nodes were specified" %
6311 (instance.name, len(instance.all_nodes),
6312 len(self.op.nodes)),
6314 assert instance.disk_template != constants.DT_DRBD8 or \
6315 len(self.op.nodes) == 2
6316 assert instance.disk_template != constants.DT_PLAIN or \
6317 len(self.op.nodes) == 1
6318 primary_node = self.op.nodes[0]
6320 primary_node = instance.primary_node
6321 _CheckNodeOnline(self, primary_node)
6323 if instance.disk_template == constants.DT_DISKLESS:
6324 raise errors.OpPrereqError("Instance '%s' has no disks" %
6325 self.op.instance_name, errors.ECODE_INVAL)
6326 # if we replace nodes *and* the old primary is offline, we don't
6328 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6329 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6330 if not (self.op.nodes and old_pnode.offline):
6331 _CheckInstanceDown(self, instance, "cannot recreate disks")
6333 if not self.op.disks:
6334 self.op.disks = range(len(instance.disks))
6336 for idx in self.op.disks:
6337 if idx >= len(instance.disks):
6338 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6340 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6341 raise errors.OpPrereqError("Can't recreate disks partially and"
6342 " change the nodes at the same time",
6344 self.instance = instance
6346 def Exec(self, feedback_fn):
6347 """Recreate the disks.
6350 instance = self.instance
6353 mods = [] # keeps track of needed logical_id changes
6355 for idx, disk in enumerate(instance.disks):
6356 if idx not in self.op.disks: # disk idx has not been passed in
6359 # update secondaries for disks, if needed
6361 if disk.dev_type == constants.LD_DRBD8:
6362 # need to update the nodes and minors
6363 assert len(self.op.nodes) == 2
6364 assert len(disk.logical_id) == 6 # otherwise disk internals
6366 (_, _, old_port, _, _, old_secret) = disk.logical_id
6367 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6368 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6369 new_minors[0], new_minors[1], old_secret)
6370 assert len(disk.logical_id) == len(new_id)
6371 mods.append((idx, new_id))
6373 # now that we have passed all asserts above, we can apply the mods
6374 # in a single run (to avoid partial changes)
6375 for idx, new_id in mods:
6376 instance.disks[idx].logical_id = new_id
6378 # change primary node, if needed
6380 instance.primary_node = self.op.nodes[0]
6381 self.LogWarning("Changing the instance's nodes, you will have to"
6382 " remove any disks left on the older nodes manually")
6385 self.cfg.Update(instance, feedback_fn)
6387 _CreateDisks(self, instance, to_skip=to_skip)
6390 class LUInstanceRename(LogicalUnit):
6391 """Rename an instance.
6394 HPATH = "instance-rename"
6395 HTYPE = constants.HTYPE_INSTANCE
6397 def CheckArguments(self):
6401 if self.op.ip_check and not self.op.name_check:
6402 # TODO: make the ip check more flexible and not depend on the name check
6403 raise errors.OpPrereqError("IP address check requires a name check",
6406 def BuildHooksEnv(self):
6409 This runs on master, primary and secondary nodes of the instance.
6412 env = _BuildInstanceHookEnvByObject(self, self.instance)
6413 env["INSTANCE_NEW_NAME"] = self.op.new_name
6416 def BuildHooksNodes(self):
6417 """Build hooks nodes.
6420 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6423 def CheckPrereq(self):
6424 """Check prerequisites.
6426 This checks that the instance is in the cluster and is not running.
6429 self.op.instance_name = _ExpandInstanceName(self.cfg,
6430 self.op.instance_name)
6431 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6432 assert instance is not None
6433 _CheckNodeOnline(self, instance.primary_node)
6434 _CheckInstanceDown(self, instance, "cannot rename")
6435 self.instance = instance
6437 new_name = self.op.new_name
6438 if self.op.name_check:
6439 hostname = netutils.GetHostname(name=new_name)
6440 if hostname.name != new_name:
6441 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6443 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6444 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6445 " same as given hostname '%s'") %
6446 (hostname.name, self.op.new_name),
6448 new_name = self.op.new_name = hostname.name
6449 if (self.op.ip_check and
6450 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6451 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6452 (hostname.ip, new_name),
6453 errors.ECODE_NOTUNIQUE)
6455 instance_list = self.cfg.GetInstanceList()
6456 if new_name in instance_list and new_name != instance.name:
6457 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6458 new_name, errors.ECODE_EXISTS)
6460 def Exec(self, feedback_fn):
6461 """Rename the instance.
6464 inst = self.instance
6465 old_name = inst.name
6467 rename_file_storage = False
6468 if (inst.disk_template in constants.DTS_FILEBASED and
6469 self.op.new_name != inst.name):
6470 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6471 rename_file_storage = True
6473 self.cfg.RenameInstance(inst.name, self.op.new_name)
6474 # Change the instance lock. This is definitely safe while we hold the BGL.
6475 # Otherwise the new lock would have to be added in acquired mode.
6477 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6478 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6480 # re-read the instance from the configuration after rename
6481 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6483 if rename_file_storage:
6484 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6485 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6486 old_file_storage_dir,
6487 new_file_storage_dir)
6488 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6489 " (but the instance has been renamed in Ganeti)" %
6490 (inst.primary_node, old_file_storage_dir,
6491 new_file_storage_dir))
6493 _StartInstanceDisks(self, inst, None)
6495 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6496 old_name, self.op.debug_level)
6497 msg = result.fail_msg
6499 msg = ("Could not run OS rename script for instance %s on node %s"
6500 " (but the instance has been renamed in Ganeti): %s" %
6501 (inst.name, inst.primary_node, msg))
6502 self.proc.LogWarning(msg)
6504 _ShutdownInstanceDisks(self, inst)
6509 class LUInstanceRemove(LogicalUnit):
6510 """Remove an instance.
6513 HPATH = "instance-remove"
6514 HTYPE = constants.HTYPE_INSTANCE
6517 def ExpandNames(self):
6518 self._ExpandAndLockInstance()
6519 self.needed_locks[locking.LEVEL_NODE] = []
6520 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6522 def DeclareLocks(self, level):
6523 if level == locking.LEVEL_NODE:
6524 self._LockInstancesNodes()
6526 def BuildHooksEnv(self):
6529 This runs on master, primary and secondary nodes of the instance.
6532 env = _BuildInstanceHookEnvByObject(self, self.instance)
6533 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6536 def BuildHooksNodes(self):
6537 """Build hooks nodes.
6540 nl = [self.cfg.GetMasterNode()]
6541 nl_post = list(self.instance.all_nodes) + nl
6542 return (nl, nl_post)
6544 def CheckPrereq(self):
6545 """Check prerequisites.
6547 This checks that the instance is in the cluster.
6550 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6551 assert self.instance is not None, \
6552 "Cannot retrieve locked instance %s" % self.op.instance_name
6554 def Exec(self, feedback_fn):
6555 """Remove the instance.
6558 instance = self.instance
6559 logging.info("Shutting down instance %s on node %s",
6560 instance.name, instance.primary_node)
6562 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6563 self.op.shutdown_timeout)
6564 msg = result.fail_msg
6566 if self.op.ignore_failures:
6567 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6569 raise errors.OpExecError("Could not shutdown instance %s on"
6571 (instance.name, instance.primary_node, msg))
6573 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6576 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6577 """Utility function to remove an instance.
6580 logging.info("Removing block devices for instance %s", instance.name)
6582 if not _RemoveDisks(lu, instance):
6583 if not ignore_failures:
6584 raise errors.OpExecError("Can't remove instance's disks")
6585 feedback_fn("Warning: can't remove instance's disks")
6587 logging.info("Removing instance %s out of cluster config", instance.name)
6589 lu.cfg.RemoveInstance(instance.name)
6591 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6592 "Instance lock removal conflict"
6594 # Remove lock for the instance
6595 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6598 class LUInstanceQuery(NoHooksLU):
6599 """Logical unit for querying instances.
6602 # pylint: disable=W0142
6605 def CheckArguments(self):
6606 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6607 self.op.output_fields, self.op.use_locking)
6609 def ExpandNames(self):
6610 self.iq.ExpandNames(self)
6612 def DeclareLocks(self, level):
6613 self.iq.DeclareLocks(self, level)
6615 def Exec(self, feedback_fn):
6616 return self.iq.OldStyleQuery(self)
6619 class LUInstanceFailover(LogicalUnit):
6620 """Failover an instance.
6623 HPATH = "instance-failover"
6624 HTYPE = constants.HTYPE_INSTANCE
6627 def CheckArguments(self):
6628 """Check the arguments.
6631 self.iallocator = getattr(self.op, "iallocator", None)
6632 self.target_node = getattr(self.op, "target_node", None)
6634 def ExpandNames(self):
6635 self._ExpandAndLockInstance()
6637 if self.op.target_node is not None:
6638 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6640 self.needed_locks[locking.LEVEL_NODE] = []
6641 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6643 ignore_consistency = self.op.ignore_consistency
6644 shutdown_timeout = self.op.shutdown_timeout
6645 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6648 ignore_consistency=ignore_consistency,
6649 shutdown_timeout=shutdown_timeout)
6650 self.tasklets = [self._migrater]
6652 def DeclareLocks(self, level):
6653 if level == locking.LEVEL_NODE:
6654 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6655 if instance.disk_template in constants.DTS_EXT_MIRROR:
6656 if self.op.target_node is None:
6657 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6659 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6660 self.op.target_node]
6661 del self.recalculate_locks[locking.LEVEL_NODE]
6663 self._LockInstancesNodes()
6665 def BuildHooksEnv(self):
6668 This runs on master, primary and secondary nodes of the instance.
6671 instance = self._migrater.instance
6672 source_node = instance.primary_node
6673 target_node = self.op.target_node
6675 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6676 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6677 "OLD_PRIMARY": source_node,
6678 "NEW_PRIMARY": target_node,
6681 if instance.disk_template in constants.DTS_INT_MIRROR:
6682 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6683 env["NEW_SECONDARY"] = source_node
6685 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6687 env.update(_BuildInstanceHookEnvByObject(self, instance))
6691 def BuildHooksNodes(self):
6692 """Build hooks nodes.
6695 instance = self._migrater.instance
6696 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6697 return (nl, nl + [instance.primary_node])
6700 class LUInstanceMigrate(LogicalUnit):
6701 """Migrate an instance.
6703 This is migration without shutting down, compared to the failover,
6704 which is done with shutdown.
6707 HPATH = "instance-migrate"
6708 HTYPE = constants.HTYPE_INSTANCE
6711 def ExpandNames(self):
6712 self._ExpandAndLockInstance()
6714 if self.op.target_node is not None:
6715 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6717 self.needed_locks[locking.LEVEL_NODE] = []
6718 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6720 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6721 cleanup=self.op.cleanup,
6723 fallback=self.op.allow_failover)
6724 self.tasklets = [self._migrater]
6726 def DeclareLocks(self, level):
6727 if level == locking.LEVEL_NODE:
6728 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6729 if instance.disk_template in constants.DTS_EXT_MIRROR:
6730 if self.op.target_node is None:
6731 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6733 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6734 self.op.target_node]
6735 del self.recalculate_locks[locking.LEVEL_NODE]
6737 self._LockInstancesNodes()
6739 def BuildHooksEnv(self):
6742 This runs on master, primary and secondary nodes of the instance.
6745 instance = self._migrater.instance
6746 source_node = instance.primary_node
6747 target_node = self.op.target_node
6748 env = _BuildInstanceHookEnvByObject(self, instance)
6750 "MIGRATE_LIVE": self._migrater.live,
6751 "MIGRATE_CLEANUP": self.op.cleanup,
6752 "OLD_PRIMARY": source_node,
6753 "NEW_PRIMARY": target_node,
6756 if instance.disk_template in constants.DTS_INT_MIRROR:
6757 env["OLD_SECONDARY"] = target_node
6758 env["NEW_SECONDARY"] = source_node
6760 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6764 def BuildHooksNodes(self):
6765 """Build hooks nodes.
6768 instance = self._migrater.instance
6769 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6770 return (nl, nl + [instance.primary_node])
6773 class LUInstanceMove(LogicalUnit):
6774 """Move an instance by data-copying.
6777 HPATH = "instance-move"
6778 HTYPE = constants.HTYPE_INSTANCE
6781 def ExpandNames(self):
6782 self._ExpandAndLockInstance()
6783 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6784 self.op.target_node = target_node
6785 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6786 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6788 def DeclareLocks(self, level):
6789 if level == locking.LEVEL_NODE:
6790 self._LockInstancesNodes(primary_only=True)
6792 def BuildHooksEnv(self):
6795 This runs on master, primary and secondary nodes of the instance.
6799 "TARGET_NODE": self.op.target_node,
6800 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6802 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6805 def BuildHooksNodes(self):
6806 """Build hooks nodes.
6810 self.cfg.GetMasterNode(),
6811 self.instance.primary_node,
6812 self.op.target_node,
6816 def CheckPrereq(self):
6817 """Check prerequisites.
6819 This checks that the instance is in the cluster.
6822 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6823 assert self.instance is not None, \
6824 "Cannot retrieve locked instance %s" % self.op.instance_name
6826 node = self.cfg.GetNodeInfo(self.op.target_node)
6827 assert node is not None, \
6828 "Cannot retrieve locked node %s" % self.op.target_node
6830 self.target_node = target_node = node.name
6832 if target_node == instance.primary_node:
6833 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6834 (instance.name, target_node),
6837 bep = self.cfg.GetClusterInfo().FillBE(instance)
6839 for idx, dsk in enumerate(instance.disks):
6840 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6841 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6842 " cannot copy" % idx, errors.ECODE_STATE)
6844 _CheckNodeOnline(self, target_node)
6845 _CheckNodeNotDrained(self, target_node)
6846 _CheckNodeVmCapable(self, target_node)
6848 if instance.admin_up:
6849 # check memory requirements on the secondary node
6850 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6851 instance.name, bep[constants.BE_MEMORY],
6852 instance.hypervisor)
6854 self.LogInfo("Not checking memory on the secondary node as"
6855 " instance will not be started")
6857 # check bridge existance
6858 _CheckInstanceBridgesExist(self, instance, node=target_node)
6860 def Exec(self, feedback_fn):
6861 """Move an instance.
6863 The move is done by shutting it down on its present node, copying
6864 the data over (slow) and starting it on the new node.
6867 instance = self.instance
6869 source_node = instance.primary_node
6870 target_node = self.target_node
6872 self.LogInfo("Shutting down instance %s on source node %s",
6873 instance.name, source_node)
6875 result = self.rpc.call_instance_shutdown(source_node, instance,
6876 self.op.shutdown_timeout)
6877 msg = result.fail_msg
6879 if self.op.ignore_consistency:
6880 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6881 " Proceeding anyway. Please make sure node"
6882 " %s is down. Error details: %s",
6883 instance.name, source_node, source_node, msg)
6885 raise errors.OpExecError("Could not shutdown instance %s on"
6887 (instance.name, source_node, msg))
6889 # create the target disks
6891 _CreateDisks(self, instance, target_node=target_node)
6892 except errors.OpExecError:
6893 self.LogWarning("Device creation failed, reverting...")
6895 _RemoveDisks(self, instance, target_node=target_node)
6897 self.cfg.ReleaseDRBDMinors(instance.name)
6900 cluster_name = self.cfg.GetClusterInfo().cluster_name
6903 # activate, get path, copy the data over
6904 for idx, disk in enumerate(instance.disks):
6905 self.LogInfo("Copying data for disk %d", idx)
6906 result = self.rpc.call_blockdev_assemble(target_node, disk,
6907 instance.name, True, idx)
6909 self.LogWarning("Can't assemble newly created disk %d: %s",
6910 idx, result.fail_msg)
6911 errs.append(result.fail_msg)
6913 dev_path = result.payload
6914 result = self.rpc.call_blockdev_export(source_node, disk,
6915 target_node, dev_path,
6918 self.LogWarning("Can't copy data over for disk %d: %s",
6919 idx, result.fail_msg)
6920 errs.append(result.fail_msg)
6924 self.LogWarning("Some disks failed to copy, aborting")
6926 _RemoveDisks(self, instance, target_node=target_node)
6928 self.cfg.ReleaseDRBDMinors(instance.name)
6929 raise errors.OpExecError("Errors during disk copy: %s" %
6932 instance.primary_node = target_node
6933 self.cfg.Update(instance, feedback_fn)
6935 self.LogInfo("Removing the disks on the original node")
6936 _RemoveDisks(self, instance, target_node=source_node)
6938 # Only start the instance if it's marked as up
6939 if instance.admin_up:
6940 self.LogInfo("Starting instance %s on node %s",
6941 instance.name, target_node)
6943 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6944 ignore_secondaries=True)
6946 _ShutdownInstanceDisks(self, instance)
6947 raise errors.OpExecError("Can't activate the instance's disks")
6949 result = self.rpc.call_instance_start(target_node, instance,
6951 msg = result.fail_msg
6953 _ShutdownInstanceDisks(self, instance)
6954 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6955 (instance.name, target_node, msg))
6958 class LUNodeMigrate(LogicalUnit):
6959 """Migrate all instances from a node.
6962 HPATH = "node-migrate"
6963 HTYPE = constants.HTYPE_NODE
6966 def CheckArguments(self):
6969 def ExpandNames(self):
6970 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6972 self.share_locks = _ShareAll()
6973 self.needed_locks = {
6974 locking.LEVEL_NODE: [self.op.node_name],
6977 def BuildHooksEnv(self):
6980 This runs on the master, the primary and all the secondaries.
6984 "NODE_NAME": self.op.node_name,
6987 def BuildHooksNodes(self):
6988 """Build hooks nodes.
6991 nl = [self.cfg.GetMasterNode()]
6994 def CheckPrereq(self):
6997 def Exec(self, feedback_fn):
6998 # Prepare jobs for migration instances
7000 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7003 iallocator=self.op.iallocator,
7004 target_node=self.op.target_node)]
7005 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7008 # TODO: Run iallocator in this opcode and pass correct placement options to
7009 # OpInstanceMigrate. Since other jobs can modify the cluster between
7010 # running the iallocator and the actual migration, a good consistency model
7011 # will have to be found.
7013 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7014 frozenset([self.op.node_name]))
7016 return ResultWithJobs(jobs)
7019 class TLMigrateInstance(Tasklet):
7020 """Tasklet class for instance migration.
7023 @ivar live: whether the migration will be done live or non-live;
7024 this variable is initalized only after CheckPrereq has run
7025 @type cleanup: boolean
7026 @ivar cleanup: Wheater we cleanup from a failed migration
7027 @type iallocator: string
7028 @ivar iallocator: The iallocator used to determine target_node
7029 @type target_node: string
7030 @ivar target_node: If given, the target_node to reallocate the instance to
7031 @type failover: boolean
7032 @ivar failover: Whether operation results in failover or migration
7033 @type fallback: boolean
7034 @ivar fallback: Whether fallback to failover is allowed if migration not
7036 @type ignore_consistency: boolean
7037 @ivar ignore_consistency: Wheter we should ignore consistency between source
7039 @type shutdown_timeout: int
7040 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7043 def __init__(self, lu, instance_name, cleanup=False,
7044 failover=False, fallback=False,
7045 ignore_consistency=False,
7046 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7047 """Initializes this class.
7050 Tasklet.__init__(self, lu)
7053 self.instance_name = instance_name
7054 self.cleanup = cleanup
7055 self.live = False # will be overridden later
7056 self.failover = failover
7057 self.fallback = fallback
7058 self.ignore_consistency = ignore_consistency
7059 self.shutdown_timeout = shutdown_timeout
7061 def CheckPrereq(self):
7062 """Check prerequisites.
7064 This checks that the instance is in the cluster.
7067 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7068 instance = self.cfg.GetInstanceInfo(instance_name)
7069 assert instance is not None
7070 self.instance = instance
7072 if (not self.cleanup and not instance.admin_up and not self.failover and
7074 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7076 self.failover = True
7078 if instance.disk_template not in constants.DTS_MIRRORED:
7083 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7084 " %s" % (instance.disk_template, text),
7087 if instance.disk_template in constants.DTS_EXT_MIRROR:
7088 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7090 if self.lu.op.iallocator:
7091 self._RunAllocator()
7093 # We set set self.target_node as it is required by
7095 self.target_node = self.lu.op.target_node
7097 # self.target_node is already populated, either directly or by the
7099 target_node = self.target_node
7100 if self.target_node == instance.primary_node:
7101 raise errors.OpPrereqError("Cannot migrate instance %s"
7102 " to its primary (%s)" %
7103 (instance.name, instance.primary_node))
7105 if len(self.lu.tasklets) == 1:
7106 # It is safe to release locks only when we're the only tasklet
7108 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7109 keep=[instance.primary_node, self.target_node])
7112 secondary_nodes = instance.secondary_nodes
7113 if not secondary_nodes:
7114 raise errors.ConfigurationError("No secondary node but using"
7115 " %s disk template" %
7116 instance.disk_template)
7117 target_node = secondary_nodes[0]
7118 if self.lu.op.iallocator or (self.lu.op.target_node and
7119 self.lu.op.target_node != target_node):
7121 text = "failed over"
7124 raise errors.OpPrereqError("Instances with disk template %s cannot"
7125 " be %s to arbitrary nodes"
7126 " (neither an iallocator nor a target"
7127 " node can be passed)" %
7128 (instance.disk_template, text),
7131 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7133 # check memory requirements on the secondary node
7134 if not self.failover or instance.admin_up:
7135 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7136 instance.name, i_be[constants.BE_MEMORY],
7137 instance.hypervisor)
7139 self.lu.LogInfo("Not checking memory on the secondary node as"
7140 " instance will not be started")
7142 # check bridge existance
7143 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7145 if not self.cleanup:
7146 _CheckNodeNotDrained(self.lu, target_node)
7147 if not self.failover:
7148 result = self.rpc.call_instance_migratable(instance.primary_node,
7150 if result.fail_msg and self.fallback:
7151 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7153 self.failover = True
7155 result.Raise("Can't migrate, please use failover",
7156 prereq=True, ecode=errors.ECODE_STATE)
7158 assert not (self.failover and self.cleanup)
7160 if not self.failover:
7161 if self.lu.op.live is not None and self.lu.op.mode is not None:
7162 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7163 " parameters are accepted",
7165 if self.lu.op.live is not None:
7167 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7169 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7170 # reset the 'live' parameter to None so that repeated
7171 # invocations of CheckPrereq do not raise an exception
7172 self.lu.op.live = None
7173 elif self.lu.op.mode is None:
7174 # read the default value from the hypervisor
7175 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7177 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7179 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7181 # Failover is never live
7184 def _RunAllocator(self):
7185 """Run the allocator based on input opcode.
7188 ial = IAllocator(self.cfg, self.rpc,
7189 mode=constants.IALLOCATOR_MODE_RELOC,
7190 name=self.instance_name,
7191 # TODO See why hail breaks with a single node below
7192 relocate_from=[self.instance.primary_node,
7193 self.instance.primary_node],
7196 ial.Run(self.lu.op.iallocator)
7199 raise errors.OpPrereqError("Can't compute nodes using"
7200 " iallocator '%s': %s" %
7201 (self.lu.op.iallocator, ial.info),
7203 if len(ial.result) != ial.required_nodes:
7204 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7205 " of nodes (%s), required %s" %
7206 (self.lu.op.iallocator, len(ial.result),
7207 ial.required_nodes), errors.ECODE_FAULT)
7208 self.target_node = ial.result[0]
7209 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7210 self.instance_name, self.lu.op.iallocator,
7211 utils.CommaJoin(ial.result))
7213 def _WaitUntilSync(self):
7214 """Poll with custom rpc for disk sync.
7216 This uses our own step-based rpc call.
7219 self.feedback_fn("* wait until resync is done")
7223 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7225 self.instance.disks)
7227 for node, nres in result.items():
7228 nres.Raise("Cannot resync disks on node %s" % node)
7229 node_done, node_percent = nres.payload
7230 all_done = all_done and node_done
7231 if node_percent is not None:
7232 min_percent = min(min_percent, node_percent)
7234 if min_percent < 100:
7235 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7238 def _EnsureSecondary(self, node):
7239 """Demote a node to secondary.
7242 self.feedback_fn("* switching node %s to secondary mode" % node)
7244 for dev in self.instance.disks:
7245 self.cfg.SetDiskID(dev, node)
7247 result = self.rpc.call_blockdev_close(node, self.instance.name,
7248 self.instance.disks)
7249 result.Raise("Cannot change disk to secondary on node %s" % node)
7251 def _GoStandalone(self):
7252 """Disconnect from the network.
7255 self.feedback_fn("* changing into standalone mode")
7256 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7257 self.instance.disks)
7258 for node, nres in result.items():
7259 nres.Raise("Cannot disconnect disks node %s" % node)
7261 def _GoReconnect(self, multimaster):
7262 """Reconnect to the network.
7268 msg = "single-master"
7269 self.feedback_fn("* changing disks into %s mode" % msg)
7270 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7271 self.instance.disks,
7272 self.instance.name, multimaster)
7273 for node, nres in result.items():
7274 nres.Raise("Cannot change disks config on node %s" % node)
7276 def _ExecCleanup(self):
7277 """Try to cleanup after a failed migration.
7279 The cleanup is done by:
7280 - check that the instance is running only on one node
7281 (and update the config if needed)
7282 - change disks on its secondary node to secondary
7283 - wait until disks are fully synchronized
7284 - disconnect from the network
7285 - change disks into single-master mode
7286 - wait again until disks are fully synchronized
7289 instance = self.instance
7290 target_node = self.target_node
7291 source_node = self.source_node
7293 # check running on only one node
7294 self.feedback_fn("* checking where the instance actually runs"
7295 " (if this hangs, the hypervisor might be in"
7297 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7298 for node, result in ins_l.items():
7299 result.Raise("Can't contact node %s" % node)
7301 runningon_source = instance.name in ins_l[source_node].payload
7302 runningon_target = instance.name in ins_l[target_node].payload
7304 if runningon_source and runningon_target:
7305 raise errors.OpExecError("Instance seems to be running on two nodes,"
7306 " or the hypervisor is confused; you will have"
7307 " to ensure manually that it runs only on one"
7308 " and restart this operation")
7310 if not (runningon_source or runningon_target):
7311 raise errors.OpExecError("Instance does not seem to be running at all;"
7312 " in this case it's safer to repair by"
7313 " running 'gnt-instance stop' to ensure disk"
7314 " shutdown, and then restarting it")
7316 if runningon_target:
7317 # the migration has actually succeeded, we need to update the config
7318 self.feedback_fn("* instance running on secondary node (%s),"
7319 " updating config" % target_node)
7320 instance.primary_node = target_node
7321 self.cfg.Update(instance, self.feedback_fn)
7322 demoted_node = source_node
7324 self.feedback_fn("* instance confirmed to be running on its"
7325 " primary node (%s)" % source_node)
7326 demoted_node = target_node
7328 if instance.disk_template in constants.DTS_INT_MIRROR:
7329 self._EnsureSecondary(demoted_node)
7331 self._WaitUntilSync()
7332 except errors.OpExecError:
7333 # we ignore here errors, since if the device is standalone, it
7334 # won't be able to sync
7336 self._GoStandalone()
7337 self._GoReconnect(False)
7338 self._WaitUntilSync()
7340 self.feedback_fn("* done")
7342 def _RevertDiskStatus(self):
7343 """Try to revert the disk status after a failed migration.
7346 target_node = self.target_node
7347 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7351 self._EnsureSecondary(target_node)
7352 self._GoStandalone()
7353 self._GoReconnect(False)
7354 self._WaitUntilSync()
7355 except errors.OpExecError, err:
7356 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7357 " please try to recover the instance manually;"
7358 " error '%s'" % str(err))
7360 def _AbortMigration(self):
7361 """Call the hypervisor code to abort a started migration.
7364 instance = self.instance
7365 target_node = self.target_node
7366 migration_info = self.migration_info
7368 abort_result = self.rpc.call_finalize_migration(target_node,
7372 abort_msg = abort_result.fail_msg
7374 logging.error("Aborting migration failed on target node %s: %s",
7375 target_node, abort_msg)
7376 # Don't raise an exception here, as we stil have to try to revert the
7377 # disk status, even if this step failed.
7379 def _ExecMigration(self):
7380 """Migrate an instance.
7382 The migrate is done by:
7383 - change the disks into dual-master mode
7384 - wait until disks are fully synchronized again
7385 - migrate the instance
7386 - change disks on the new secondary node (the old primary) to secondary
7387 - wait until disks are fully synchronized
7388 - change disks into single-master mode
7391 instance = self.instance
7392 target_node = self.target_node
7393 source_node = self.source_node
7395 self.feedback_fn("* checking disk consistency between source and target")
7396 for dev in instance.disks:
7397 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7398 raise errors.OpExecError("Disk %s is degraded or not fully"
7399 " synchronized on target node,"
7400 " aborting migration" % dev.iv_name)
7402 # First get the migration information from the remote node
7403 result = self.rpc.call_migration_info(source_node, instance)
7404 msg = result.fail_msg
7406 log_err = ("Failed fetching source migration information from %s: %s" %
7408 logging.error(log_err)
7409 raise errors.OpExecError(log_err)
7411 self.migration_info = migration_info = result.payload
7413 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7414 # Then switch the disks to master/master mode
7415 self._EnsureSecondary(target_node)
7416 self._GoStandalone()
7417 self._GoReconnect(True)
7418 self._WaitUntilSync()
7420 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7421 result = self.rpc.call_accept_instance(target_node,
7424 self.nodes_ip[target_node])
7426 msg = result.fail_msg
7428 logging.error("Instance pre-migration failed, trying to revert"
7429 " disk status: %s", msg)
7430 self.feedback_fn("Pre-migration failed, aborting")
7431 self._AbortMigration()
7432 self._RevertDiskStatus()
7433 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7434 (instance.name, msg))
7436 self.feedback_fn("* migrating instance to %s" % target_node)
7437 result = self.rpc.call_instance_migrate(source_node, instance,
7438 self.nodes_ip[target_node],
7440 msg = result.fail_msg
7442 logging.error("Instance migration failed, trying to revert"
7443 " disk status: %s", msg)
7444 self.feedback_fn("Migration failed, aborting")
7445 self._AbortMigration()
7446 self._RevertDiskStatus()
7447 raise errors.OpExecError("Could not migrate instance %s: %s" %
7448 (instance.name, msg))
7450 instance.primary_node = target_node
7451 # distribute new instance config to the other nodes
7452 self.cfg.Update(instance, self.feedback_fn)
7454 result = self.rpc.call_finalize_migration(target_node,
7458 msg = result.fail_msg
7460 logging.error("Instance migration succeeded, but finalization failed:"
7462 raise errors.OpExecError("Could not finalize instance migration: %s" %
7465 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7466 self._EnsureSecondary(source_node)
7467 self._WaitUntilSync()
7468 self._GoStandalone()
7469 self._GoReconnect(False)
7470 self._WaitUntilSync()
7472 self.feedback_fn("* done")
7474 def _ExecFailover(self):
7475 """Failover an instance.
7477 The failover is done by shutting it down on its present node and
7478 starting it on the secondary.
7481 instance = self.instance
7482 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7484 source_node = instance.primary_node
7485 target_node = self.target_node
7487 if instance.admin_up:
7488 self.feedback_fn("* checking disk consistency between source and target")
7489 for dev in instance.disks:
7490 # for drbd, these are drbd over lvm
7491 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7492 if primary_node.offline:
7493 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7495 (primary_node.name, dev.iv_name, target_node))
7496 elif not self.ignore_consistency:
7497 raise errors.OpExecError("Disk %s is degraded on target node,"
7498 " aborting failover" % dev.iv_name)
7500 self.feedback_fn("* not checking disk consistency as instance is not"
7503 self.feedback_fn("* shutting down instance on source node")
7504 logging.info("Shutting down instance %s on node %s",
7505 instance.name, source_node)
7507 result = self.rpc.call_instance_shutdown(source_node, instance,
7508 self.shutdown_timeout)
7509 msg = result.fail_msg
7511 if self.ignore_consistency or primary_node.offline:
7512 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7513 " proceeding anyway; please make sure node"
7514 " %s is down; error details: %s",
7515 instance.name, source_node, source_node, msg)
7517 raise errors.OpExecError("Could not shutdown instance %s on"
7519 (instance.name, source_node, msg))
7521 self.feedback_fn("* deactivating the instance's disks on source node")
7522 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7523 raise errors.OpExecError("Can't shut down the instance's disks")
7525 instance.primary_node = target_node
7526 # distribute new instance config to the other nodes
7527 self.cfg.Update(instance, self.feedback_fn)
7529 # Only start the instance if it's marked as up
7530 if instance.admin_up:
7531 self.feedback_fn("* activating the instance's disks on target node %s" %
7533 logging.info("Starting instance %s on node %s",
7534 instance.name, target_node)
7536 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7537 ignore_secondaries=True)
7539 _ShutdownInstanceDisks(self.lu, instance)
7540 raise errors.OpExecError("Can't activate the instance's disks")
7542 self.feedback_fn("* starting the instance on the target node %s" %
7544 result = self.rpc.call_instance_start(target_node, instance, None, None,
7546 msg = result.fail_msg
7548 _ShutdownInstanceDisks(self.lu, instance)
7549 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7550 (instance.name, target_node, msg))
7552 def Exec(self, feedback_fn):
7553 """Perform the migration.
7556 self.feedback_fn = feedback_fn
7557 self.source_node = self.instance.primary_node
7559 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7560 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7561 self.target_node = self.instance.secondary_nodes[0]
7562 # Otherwise self.target_node has been populated either
7563 # directly, or through an iallocator.
7565 self.all_nodes = [self.source_node, self.target_node]
7566 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7567 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7570 feedback_fn("Failover instance %s" % self.instance.name)
7571 self._ExecFailover()
7573 feedback_fn("Migrating instance %s" % self.instance.name)
7576 return self._ExecCleanup()
7578 return self._ExecMigration()
7581 def _CreateBlockDev(lu, node, instance, device, force_create,
7583 """Create a tree of block devices on a given node.
7585 If this device type has to be created on secondaries, create it and
7588 If not, just recurse to children keeping the same 'force' value.
7590 @param lu: the lu on whose behalf we execute
7591 @param node: the node on which to create the device
7592 @type instance: L{objects.Instance}
7593 @param instance: the instance which owns the device
7594 @type device: L{objects.Disk}
7595 @param device: the device to create
7596 @type force_create: boolean
7597 @param force_create: whether to force creation of this device; this
7598 will be change to True whenever we find a device which has
7599 CreateOnSecondary() attribute
7600 @param info: the extra 'metadata' we should attach to the device
7601 (this will be represented as a LVM tag)
7602 @type force_open: boolean
7603 @param force_open: this parameter will be passes to the
7604 L{backend.BlockdevCreate} function where it specifies
7605 whether we run on primary or not, and it affects both
7606 the child assembly and the device own Open() execution
7609 if device.CreateOnSecondary():
7613 for child in device.children:
7614 _CreateBlockDev(lu, node, instance, child, force_create,
7617 if not force_create:
7620 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7623 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7624 """Create a single block device on a given node.
7626 This will not recurse over children of the device, so they must be
7629 @param lu: the lu on whose behalf we execute
7630 @param node: the node on which to create the device
7631 @type instance: L{objects.Instance}
7632 @param instance: the instance which owns the device
7633 @type device: L{objects.Disk}
7634 @param device: the device to create
7635 @param info: the extra 'metadata' we should attach to the device
7636 (this will be represented as a LVM tag)
7637 @type force_open: boolean
7638 @param force_open: this parameter will be passes to the
7639 L{backend.BlockdevCreate} function where it specifies
7640 whether we run on primary or not, and it affects both
7641 the child assembly and the device own Open() execution
7644 lu.cfg.SetDiskID(device, node)
7645 result = lu.rpc.call_blockdev_create(node, device, device.size,
7646 instance.name, force_open, info)
7647 result.Raise("Can't create block device %s on"
7648 " node %s for instance %s" % (device, node, instance.name))
7649 if device.physical_id is None:
7650 device.physical_id = result.payload
7653 def _GenerateUniqueNames(lu, exts):
7654 """Generate a suitable LV name.
7656 This will generate a logical volume name for the given instance.
7661 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7662 results.append("%s%s" % (new_id, val))
7666 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7667 iv_name, p_minor, s_minor):
7668 """Generate a drbd8 device complete with its children.
7671 assert len(vgnames) == len(names) == 2
7672 port = lu.cfg.AllocatePort()
7673 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7674 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7675 logical_id=(vgnames[0], names[0]))
7676 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7677 logical_id=(vgnames[1], names[1]))
7678 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7679 logical_id=(primary, secondary, port,
7682 children=[dev_data, dev_meta],
7687 def _GenerateDiskTemplate(lu, template_name,
7688 instance_name, primary_node,
7689 secondary_nodes, disk_info,
7690 file_storage_dir, file_driver,
7691 base_index, feedback_fn):
7692 """Generate the entire disk layout for a given template type.
7695 #TODO: compute space requirements
7697 vgname = lu.cfg.GetVGName()
7698 disk_count = len(disk_info)
7700 if template_name == constants.DT_DISKLESS:
7702 elif template_name == constants.DT_PLAIN:
7703 if len(secondary_nodes) != 0:
7704 raise errors.ProgrammerError("Wrong template configuration")
7706 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7707 for i in range(disk_count)])
7708 for idx, disk in enumerate(disk_info):
7709 disk_index = idx + base_index
7710 vg = disk.get(constants.IDISK_VG, vgname)
7711 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7712 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7713 size=disk[constants.IDISK_SIZE],
7714 logical_id=(vg, names[idx]),
7715 iv_name="disk/%d" % disk_index,
7716 mode=disk[constants.IDISK_MODE])
7717 disks.append(disk_dev)
7718 elif template_name == constants.DT_DRBD8:
7719 if len(secondary_nodes) != 1:
7720 raise errors.ProgrammerError("Wrong template configuration")
7721 remote_node = secondary_nodes[0]
7722 minors = lu.cfg.AllocateDRBDMinor(
7723 [primary_node, remote_node] * len(disk_info), instance_name)
7726 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7727 for i in range(disk_count)]):
7728 names.append(lv_prefix + "_data")
7729 names.append(lv_prefix + "_meta")
7730 for idx, disk in enumerate(disk_info):
7731 disk_index = idx + base_index
7732 data_vg = disk.get(constants.IDISK_VG, vgname)
7733 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7734 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7735 disk[constants.IDISK_SIZE],
7737 names[idx * 2:idx * 2 + 2],
7738 "disk/%d" % disk_index,
7739 minors[idx * 2], minors[idx * 2 + 1])
7740 disk_dev.mode = disk[constants.IDISK_MODE]
7741 disks.append(disk_dev)
7742 elif template_name == constants.DT_FILE:
7743 if len(secondary_nodes) != 0:
7744 raise errors.ProgrammerError("Wrong template configuration")
7746 opcodes.RequireFileStorage()
7748 for idx, disk in enumerate(disk_info):
7749 disk_index = idx + base_index
7750 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7751 size=disk[constants.IDISK_SIZE],
7752 iv_name="disk/%d" % disk_index,
7753 logical_id=(file_driver,
7754 "%s/disk%d" % (file_storage_dir,
7756 mode=disk[constants.IDISK_MODE])
7757 disks.append(disk_dev)
7758 elif template_name == constants.DT_SHARED_FILE:
7759 if len(secondary_nodes) != 0:
7760 raise errors.ProgrammerError("Wrong template configuration")
7762 opcodes.RequireSharedFileStorage()
7764 for idx, disk in enumerate(disk_info):
7765 disk_index = idx + base_index
7766 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7767 size=disk[constants.IDISK_SIZE],
7768 iv_name="disk/%d" % disk_index,
7769 logical_id=(file_driver,
7770 "%s/disk%d" % (file_storage_dir,
7772 mode=disk[constants.IDISK_MODE])
7773 disks.append(disk_dev)
7774 elif template_name == constants.DT_BLOCK:
7775 if len(secondary_nodes) != 0:
7776 raise errors.ProgrammerError("Wrong template configuration")
7778 for idx, disk in enumerate(disk_info):
7779 disk_index = idx + base_index
7780 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7781 size=disk[constants.IDISK_SIZE],
7782 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7783 disk[constants.IDISK_ADOPT]),
7784 iv_name="disk/%d" % disk_index,
7785 mode=disk[constants.IDISK_MODE])
7786 disks.append(disk_dev)
7789 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7793 def _GetInstanceInfoText(instance):
7794 """Compute that text that should be added to the disk's metadata.
7797 return "originstname+%s" % instance.name
7800 def _CalcEta(time_taken, written, total_size):
7801 """Calculates the ETA based on size written and total size.
7803 @param time_taken: The time taken so far
7804 @param written: amount written so far
7805 @param total_size: The total size of data to be written
7806 @return: The remaining time in seconds
7809 avg_time = time_taken / float(written)
7810 return (total_size - written) * avg_time
7813 def _WipeDisks(lu, instance):
7814 """Wipes instance disks.
7816 @type lu: L{LogicalUnit}
7817 @param lu: the logical unit on whose behalf we execute
7818 @type instance: L{objects.Instance}
7819 @param instance: the instance whose disks we should create
7820 @return: the success of the wipe
7823 node = instance.primary_node
7825 for device in instance.disks:
7826 lu.cfg.SetDiskID(device, node)
7828 logging.info("Pause sync of instance %s disks", instance.name)
7829 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7831 for idx, success in enumerate(result.payload):
7833 logging.warn("pause-sync of instance %s for disks %d failed",
7837 for idx, device in enumerate(instance.disks):
7838 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7839 # MAX_WIPE_CHUNK at max
7840 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7841 constants.MIN_WIPE_CHUNK_PERCENT)
7842 # we _must_ make this an int, otherwise rounding errors will
7844 wipe_chunk_size = int(wipe_chunk_size)
7846 lu.LogInfo("* Wiping disk %d", idx)
7847 logging.info("Wiping disk %d for instance %s, node %s using"
7848 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7853 start_time = time.time()
7855 while offset < size:
7856 wipe_size = min(wipe_chunk_size, size - offset)
7857 logging.debug("Wiping disk %d, offset %s, chunk %s",
7858 idx, offset, wipe_size)
7859 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7860 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7861 (idx, offset, wipe_size))
7864 if now - last_output >= 60:
7865 eta = _CalcEta(now - start_time, offset, size)
7866 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7867 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7870 logging.info("Resume sync of instance %s disks", instance.name)
7872 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7874 for idx, success in enumerate(result.payload):
7876 lu.LogWarning("Resume sync of disk %d failed, please have a"
7877 " look at the status and troubleshoot the issue", idx)
7878 logging.warn("resume-sync of instance %s for disks %d failed",
7882 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7883 """Create all disks for an instance.
7885 This abstracts away some work from AddInstance.
7887 @type lu: L{LogicalUnit}
7888 @param lu: the logical unit on whose behalf we execute
7889 @type instance: L{objects.Instance}
7890 @param instance: the instance whose disks we should create
7892 @param to_skip: list of indices to skip
7893 @type target_node: string
7894 @param target_node: if passed, overrides the target node for creation
7896 @return: the success of the creation
7899 info = _GetInstanceInfoText(instance)
7900 if target_node is None:
7901 pnode = instance.primary_node
7902 all_nodes = instance.all_nodes
7907 if instance.disk_template in constants.DTS_FILEBASED:
7908 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7909 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7911 result.Raise("Failed to create directory '%s' on"
7912 " node %s" % (file_storage_dir, pnode))
7914 # Note: this needs to be kept in sync with adding of disks in
7915 # LUInstanceSetParams
7916 for idx, device in enumerate(instance.disks):
7917 if to_skip and idx in to_skip:
7919 logging.info("Creating volume %s for instance %s",
7920 device.iv_name, instance.name)
7922 for node in all_nodes:
7923 f_create = node == pnode
7924 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7927 def _RemoveDisks(lu, instance, target_node=None):
7928 """Remove all disks for an instance.
7930 This abstracts away some work from `AddInstance()` and
7931 `RemoveInstance()`. Note that in case some of the devices couldn't
7932 be removed, the removal will continue with the other ones (compare
7933 with `_CreateDisks()`).
7935 @type lu: L{LogicalUnit}
7936 @param lu: the logical unit on whose behalf we execute
7937 @type instance: L{objects.Instance}
7938 @param instance: the instance whose disks we should remove
7939 @type target_node: string
7940 @param target_node: used to override the node on which to remove the disks
7942 @return: the success of the removal
7945 logging.info("Removing block devices for instance %s", instance.name)
7948 for device in instance.disks:
7950 edata = [(target_node, device)]
7952 edata = device.ComputeNodeTree(instance.primary_node)
7953 for node, disk in edata:
7954 lu.cfg.SetDiskID(disk, node)
7955 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7957 lu.LogWarning("Could not remove block device %s on node %s,"
7958 " continuing anyway: %s", device.iv_name, node, msg)
7961 if instance.disk_template == constants.DT_FILE:
7962 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7966 tgt = instance.primary_node
7967 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7969 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7970 file_storage_dir, instance.primary_node, result.fail_msg)
7976 def _ComputeDiskSizePerVG(disk_template, disks):
7977 """Compute disk size requirements in the volume group
7980 def _compute(disks, payload):
7981 """Universal algorithm.
7986 vgs[disk[constants.IDISK_VG]] = \
7987 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7991 # Required free disk space as a function of disk and swap space
7993 constants.DT_DISKLESS: {},
7994 constants.DT_PLAIN: _compute(disks, 0),
7995 # 128 MB are added for drbd metadata for each disk
7996 constants.DT_DRBD8: _compute(disks, 128),
7997 constants.DT_FILE: {},
7998 constants.DT_SHARED_FILE: {},
8001 if disk_template not in req_size_dict:
8002 raise errors.ProgrammerError("Disk template '%s' size requirement"
8003 " is unknown" % disk_template)
8005 return req_size_dict[disk_template]
8008 def _ComputeDiskSize(disk_template, disks):
8009 """Compute disk size requirements in the volume group
8012 # Required free disk space as a function of disk and swap space
8014 constants.DT_DISKLESS: None,
8015 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8016 # 128 MB are added for drbd metadata for each disk
8017 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8018 constants.DT_FILE: None,
8019 constants.DT_SHARED_FILE: 0,
8020 constants.DT_BLOCK: 0,
8023 if disk_template not in req_size_dict:
8024 raise errors.ProgrammerError("Disk template '%s' size requirement"
8025 " is unknown" % disk_template)
8027 return req_size_dict[disk_template]
8030 def _FilterVmNodes(lu, nodenames):
8031 """Filters out non-vm_capable nodes from a list.
8033 @type lu: L{LogicalUnit}
8034 @param lu: the logical unit for which we check
8035 @type nodenames: list
8036 @param nodenames: the list of nodes on which we should check
8038 @return: the list of vm-capable nodes
8041 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8042 return [name for name in nodenames if name not in vm_nodes]
8045 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8046 """Hypervisor parameter validation.
8048 This function abstract the hypervisor parameter validation to be
8049 used in both instance create and instance modify.
8051 @type lu: L{LogicalUnit}
8052 @param lu: the logical unit for which we check
8053 @type nodenames: list
8054 @param nodenames: the list of nodes on which we should check
8055 @type hvname: string
8056 @param hvname: the name of the hypervisor we should use
8057 @type hvparams: dict
8058 @param hvparams: the parameters which we need to check
8059 @raise errors.OpPrereqError: if the parameters are not valid
8062 nodenames = _FilterVmNodes(lu, nodenames)
8063 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8066 for node in nodenames:
8070 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8073 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8074 """OS parameters validation.
8076 @type lu: L{LogicalUnit}
8077 @param lu: the logical unit for which we check
8078 @type required: boolean
8079 @param required: whether the validation should fail if the OS is not
8081 @type nodenames: list
8082 @param nodenames: the list of nodes on which we should check
8083 @type osname: string
8084 @param osname: the name of the hypervisor we should use
8085 @type osparams: dict
8086 @param osparams: the parameters which we need to check
8087 @raise errors.OpPrereqError: if the parameters are not valid
8090 nodenames = _FilterVmNodes(lu, nodenames)
8091 result = lu.rpc.call_os_validate(required, nodenames, osname,
8092 [constants.OS_VALIDATE_PARAMETERS],
8094 for node, nres in result.items():
8095 # we don't check for offline cases since this should be run only
8096 # against the master node and/or an instance's nodes
8097 nres.Raise("OS Parameters validation failed on node %s" % node)
8098 if not nres.payload:
8099 lu.LogInfo("OS %s not found on node %s, validation skipped",
8103 class LUInstanceCreate(LogicalUnit):
8104 """Create an instance.
8107 HPATH = "instance-add"
8108 HTYPE = constants.HTYPE_INSTANCE
8111 def CheckArguments(self):
8115 # do not require name_check to ease forward/backward compatibility
8117 if self.op.no_install and self.op.start:
8118 self.LogInfo("No-installation mode selected, disabling startup")
8119 self.op.start = False
8120 # validate/normalize the instance name
8121 self.op.instance_name = \
8122 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8124 if self.op.ip_check and not self.op.name_check:
8125 # TODO: make the ip check more flexible and not depend on the name check
8126 raise errors.OpPrereqError("Cannot do IP address check without a name"
8127 " check", errors.ECODE_INVAL)
8129 # check nics' parameter names
8130 for nic in self.op.nics:
8131 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8133 # check disks. parameter names and consistent adopt/no-adopt strategy
8134 has_adopt = has_no_adopt = False
8135 for disk in self.op.disks:
8136 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8137 if constants.IDISK_ADOPT in disk:
8141 if has_adopt and has_no_adopt:
8142 raise errors.OpPrereqError("Either all disks are adopted or none is",
8145 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8146 raise errors.OpPrereqError("Disk adoption is not supported for the"
8147 " '%s' disk template" %
8148 self.op.disk_template,
8150 if self.op.iallocator is not None:
8151 raise errors.OpPrereqError("Disk adoption not allowed with an"
8152 " iallocator script", errors.ECODE_INVAL)
8153 if self.op.mode == constants.INSTANCE_IMPORT:
8154 raise errors.OpPrereqError("Disk adoption not allowed for"
8155 " instance import", errors.ECODE_INVAL)
8157 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8158 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8159 " but no 'adopt' parameter given" %
8160 self.op.disk_template,
8163 self.adopt_disks = has_adopt
8165 # instance name verification
8166 if self.op.name_check:
8167 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8168 self.op.instance_name = self.hostname1.name
8169 # used in CheckPrereq for ip ping check
8170 self.check_ip = self.hostname1.ip
8172 self.check_ip = None
8174 # file storage checks
8175 if (self.op.file_driver and
8176 not self.op.file_driver in constants.FILE_DRIVER):
8177 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8178 self.op.file_driver, errors.ECODE_INVAL)
8180 if self.op.disk_template == constants.DT_FILE:
8181 opcodes.RequireFileStorage()
8182 elif self.op.disk_template == constants.DT_SHARED_FILE:
8183 opcodes.RequireSharedFileStorage()
8185 ### Node/iallocator related checks
8186 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8188 if self.op.pnode is not None:
8189 if self.op.disk_template in constants.DTS_INT_MIRROR:
8190 if self.op.snode is None:
8191 raise errors.OpPrereqError("The networked disk templates need"
8192 " a mirror node", errors.ECODE_INVAL)
8194 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8196 self.op.snode = None
8198 self._cds = _GetClusterDomainSecret()
8200 if self.op.mode == constants.INSTANCE_IMPORT:
8201 # On import force_variant must be True, because if we forced it at
8202 # initial install, our only chance when importing it back is that it
8204 self.op.force_variant = True
8206 if self.op.no_install:
8207 self.LogInfo("No-installation mode has no effect during import")
8209 elif self.op.mode == constants.INSTANCE_CREATE:
8210 if self.op.os_type is None:
8211 raise errors.OpPrereqError("No guest OS specified",
8213 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8214 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8215 " installation" % self.op.os_type,
8217 if self.op.disk_template is None:
8218 raise errors.OpPrereqError("No disk template specified",
8221 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8222 # Check handshake to ensure both clusters have the same domain secret
8223 src_handshake = self.op.source_handshake
8224 if not src_handshake:
8225 raise errors.OpPrereqError("Missing source handshake",
8228 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8231 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8234 # Load and check source CA
8235 self.source_x509_ca_pem = self.op.source_x509_ca
8236 if not self.source_x509_ca_pem:
8237 raise errors.OpPrereqError("Missing source X509 CA",
8241 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8243 except OpenSSL.crypto.Error, err:
8244 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8245 (err, ), errors.ECODE_INVAL)
8247 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8248 if errcode is not None:
8249 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8252 self.source_x509_ca = cert
8254 src_instance_name = self.op.source_instance_name
8255 if not src_instance_name:
8256 raise errors.OpPrereqError("Missing source instance name",
8259 self.source_instance_name = \
8260 netutils.GetHostname(name=src_instance_name).name
8263 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8264 self.op.mode, errors.ECODE_INVAL)
8266 def ExpandNames(self):
8267 """ExpandNames for CreateInstance.
8269 Figure out the right locks for instance creation.
8272 self.needed_locks = {}
8274 instance_name = self.op.instance_name
8275 # this is just a preventive check, but someone might still add this
8276 # instance in the meantime, and creation will fail at lock-add time
8277 if instance_name in self.cfg.GetInstanceList():
8278 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8279 instance_name, errors.ECODE_EXISTS)
8281 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8283 if self.op.iallocator:
8284 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8286 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8287 nodelist = [self.op.pnode]
8288 if self.op.snode is not None:
8289 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8290 nodelist.append(self.op.snode)
8291 self.needed_locks[locking.LEVEL_NODE] = nodelist
8293 # in case of import lock the source node too
8294 if self.op.mode == constants.INSTANCE_IMPORT:
8295 src_node = self.op.src_node
8296 src_path = self.op.src_path
8298 if src_path is None:
8299 self.op.src_path = src_path = self.op.instance_name
8301 if src_node is None:
8302 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8303 self.op.src_node = None
8304 if os.path.isabs(src_path):
8305 raise errors.OpPrereqError("Importing an instance from a path"
8306 " requires a source node option",
8309 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8310 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8311 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8312 if not os.path.isabs(src_path):
8313 self.op.src_path = src_path = \
8314 utils.PathJoin(constants.EXPORT_DIR, src_path)
8316 def _RunAllocator(self):
8317 """Run the allocator based on input opcode.
8320 nics = [n.ToDict() for n in self.nics]
8321 ial = IAllocator(self.cfg, self.rpc,
8322 mode=constants.IALLOCATOR_MODE_ALLOC,
8323 name=self.op.instance_name,
8324 disk_template=self.op.disk_template,
8327 vcpus=self.be_full[constants.BE_VCPUS],
8328 memory=self.be_full[constants.BE_MEMORY],
8331 hypervisor=self.op.hypervisor,
8334 ial.Run(self.op.iallocator)
8337 raise errors.OpPrereqError("Can't compute nodes using"
8338 " iallocator '%s': %s" %
8339 (self.op.iallocator, ial.info),
8341 if len(ial.result) != ial.required_nodes:
8342 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8343 " of nodes (%s), required %s" %
8344 (self.op.iallocator, len(ial.result),
8345 ial.required_nodes), errors.ECODE_FAULT)
8346 self.op.pnode = ial.result[0]
8347 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8348 self.op.instance_name, self.op.iallocator,
8349 utils.CommaJoin(ial.result))
8350 if ial.required_nodes == 2:
8351 self.op.snode = ial.result[1]
8353 def BuildHooksEnv(self):
8356 This runs on master, primary and secondary nodes of the instance.
8360 "ADD_MODE": self.op.mode,
8362 if self.op.mode == constants.INSTANCE_IMPORT:
8363 env["SRC_NODE"] = self.op.src_node
8364 env["SRC_PATH"] = self.op.src_path
8365 env["SRC_IMAGES"] = self.src_images
8367 env.update(_BuildInstanceHookEnv(
8368 name=self.op.instance_name,
8369 primary_node=self.op.pnode,
8370 secondary_nodes=self.secondaries,
8371 status=self.op.start,
8372 os_type=self.op.os_type,
8373 memory=self.be_full[constants.BE_MEMORY],
8374 vcpus=self.be_full[constants.BE_VCPUS],
8375 nics=_NICListToTuple(self, self.nics),
8376 disk_template=self.op.disk_template,
8377 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8378 for d in self.disks],
8381 hypervisor_name=self.op.hypervisor,
8387 def BuildHooksNodes(self):
8388 """Build hooks nodes.
8391 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8394 def _ReadExportInfo(self):
8395 """Reads the export information from disk.
8397 It will override the opcode source node and path with the actual
8398 information, if these two were not specified before.
8400 @return: the export information
8403 assert self.op.mode == constants.INSTANCE_IMPORT
8405 src_node = self.op.src_node
8406 src_path = self.op.src_path
8408 if src_node is None:
8409 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8410 exp_list = self.rpc.call_export_list(locked_nodes)
8412 for node in exp_list:
8413 if exp_list[node].fail_msg:
8415 if src_path in exp_list[node].payload:
8417 self.op.src_node = src_node = node
8418 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8422 raise errors.OpPrereqError("No export found for relative path %s" %
8423 src_path, errors.ECODE_INVAL)
8425 _CheckNodeOnline(self, src_node)
8426 result = self.rpc.call_export_info(src_node, src_path)
8427 result.Raise("No export or invalid export found in dir %s" % src_path)
8429 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8430 if not export_info.has_section(constants.INISECT_EXP):
8431 raise errors.ProgrammerError("Corrupted export config",
8432 errors.ECODE_ENVIRON)
8434 ei_version = export_info.get(constants.INISECT_EXP, "version")
8435 if (int(ei_version) != constants.EXPORT_VERSION):
8436 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8437 (ei_version, constants.EXPORT_VERSION),
8438 errors.ECODE_ENVIRON)
8441 def _ReadExportParams(self, einfo):
8442 """Use export parameters as defaults.
8444 In case the opcode doesn't specify (as in override) some instance
8445 parameters, then try to use them from the export information, if
8449 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8451 if self.op.disk_template is None:
8452 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8453 self.op.disk_template = einfo.get(constants.INISECT_INS,
8456 raise errors.OpPrereqError("No disk template specified and the export"
8457 " is missing the disk_template information",
8460 if not self.op.disks:
8461 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8463 # TODO: import the disk iv_name too
8464 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8465 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8466 disks.append({constants.IDISK_SIZE: disk_sz})
8467 self.op.disks = disks
8469 raise errors.OpPrereqError("No disk info specified and the export"
8470 " is missing the disk information",
8473 if (not self.op.nics and
8474 einfo.has_option(constants.INISECT_INS, "nic_count")):
8476 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8478 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8479 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8484 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8485 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8487 if (self.op.hypervisor is None and
8488 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8489 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8491 if einfo.has_section(constants.INISECT_HYP):
8492 # use the export parameters but do not override the ones
8493 # specified by the user
8494 for name, value in einfo.items(constants.INISECT_HYP):
8495 if name not in self.op.hvparams:
8496 self.op.hvparams[name] = value
8498 if einfo.has_section(constants.INISECT_BEP):
8499 # use the parameters, without overriding
8500 for name, value in einfo.items(constants.INISECT_BEP):
8501 if name not in self.op.beparams:
8502 self.op.beparams[name] = value
8504 # try to read the parameters old style, from the main section
8505 for name in constants.BES_PARAMETERS:
8506 if (name not in self.op.beparams and
8507 einfo.has_option(constants.INISECT_INS, name)):
8508 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8510 if einfo.has_section(constants.INISECT_OSP):
8511 # use the parameters, without overriding
8512 for name, value in einfo.items(constants.INISECT_OSP):
8513 if name not in self.op.osparams:
8514 self.op.osparams[name] = value
8516 def _RevertToDefaults(self, cluster):
8517 """Revert the instance parameters to the default values.
8521 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8522 for name in self.op.hvparams.keys():
8523 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8524 del self.op.hvparams[name]
8526 be_defs = cluster.SimpleFillBE({})
8527 for name in self.op.beparams.keys():
8528 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8529 del self.op.beparams[name]
8531 nic_defs = cluster.SimpleFillNIC({})
8532 for nic in self.op.nics:
8533 for name in constants.NICS_PARAMETERS:
8534 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8537 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8538 for name in self.op.osparams.keys():
8539 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8540 del self.op.osparams[name]
8542 def _CalculateFileStorageDir(self):
8543 """Calculate final instance file storage dir.
8546 # file storage dir calculation/check
8547 self.instance_file_storage_dir = None
8548 if self.op.disk_template in constants.DTS_FILEBASED:
8549 # build the full file storage dir path
8552 if self.op.disk_template == constants.DT_SHARED_FILE:
8553 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8555 get_fsd_fn = self.cfg.GetFileStorageDir
8557 cfg_storagedir = get_fsd_fn()
8558 if not cfg_storagedir:
8559 raise errors.OpPrereqError("Cluster file storage dir not defined")
8560 joinargs.append(cfg_storagedir)
8562 if self.op.file_storage_dir is not None:
8563 joinargs.append(self.op.file_storage_dir)
8565 joinargs.append(self.op.instance_name)
8567 # pylint: disable=W0142
8568 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8570 def CheckPrereq(self):
8571 """Check prerequisites.
8574 self._CalculateFileStorageDir()
8576 if self.op.mode == constants.INSTANCE_IMPORT:
8577 export_info = self._ReadExportInfo()
8578 self._ReadExportParams(export_info)
8580 if (not self.cfg.GetVGName() and
8581 self.op.disk_template not in constants.DTS_NOT_LVM):
8582 raise errors.OpPrereqError("Cluster does not support lvm-based"
8583 " instances", errors.ECODE_STATE)
8585 if self.op.hypervisor is None:
8586 self.op.hypervisor = self.cfg.GetHypervisorType()
8588 cluster = self.cfg.GetClusterInfo()
8589 enabled_hvs = cluster.enabled_hypervisors
8590 if self.op.hypervisor not in enabled_hvs:
8591 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8592 " cluster (%s)" % (self.op.hypervisor,
8593 ",".join(enabled_hvs)),
8596 # Check tag validity
8597 for tag in self.op.tags:
8598 objects.TaggableObject.ValidateTag(tag)
8600 # check hypervisor parameter syntax (locally)
8601 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8602 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8604 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8605 hv_type.CheckParameterSyntax(filled_hvp)
8606 self.hv_full = filled_hvp
8607 # check that we don't specify global parameters on an instance
8608 _CheckGlobalHvParams(self.op.hvparams)
8610 # fill and remember the beparams dict
8611 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8612 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8614 # build os parameters
8615 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8617 # now that hvp/bep are in final format, let's reset to defaults,
8619 if self.op.identify_defaults:
8620 self._RevertToDefaults(cluster)
8624 for idx, nic in enumerate(self.op.nics):
8625 nic_mode_req = nic.get(constants.INIC_MODE, None)
8626 nic_mode = nic_mode_req
8627 if nic_mode is None:
8628 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8630 # in routed mode, for the first nic, the default ip is 'auto'
8631 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8632 default_ip_mode = constants.VALUE_AUTO
8634 default_ip_mode = constants.VALUE_NONE
8636 # ip validity checks
8637 ip = nic.get(constants.INIC_IP, default_ip_mode)
8638 if ip is None or ip.lower() == constants.VALUE_NONE:
8640 elif ip.lower() == constants.VALUE_AUTO:
8641 if not self.op.name_check:
8642 raise errors.OpPrereqError("IP address set to auto but name checks"
8643 " have been skipped",
8645 nic_ip = self.hostname1.ip
8647 if not netutils.IPAddress.IsValid(ip):
8648 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8652 # TODO: check the ip address for uniqueness
8653 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8654 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8657 # MAC address verification
8658 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8659 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8660 mac = utils.NormalizeAndValidateMac(mac)
8663 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8664 except errors.ReservationError:
8665 raise errors.OpPrereqError("MAC address %s already in use"
8666 " in cluster" % mac,
8667 errors.ECODE_NOTUNIQUE)
8669 # Build nic parameters
8670 link = nic.get(constants.INIC_LINK, None)
8673 nicparams[constants.NIC_MODE] = nic_mode_req
8675 nicparams[constants.NIC_LINK] = link
8677 check_params = cluster.SimpleFillNIC(nicparams)
8678 objects.NIC.CheckParameterSyntax(check_params)
8679 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8681 # disk checks/pre-build
8682 default_vg = self.cfg.GetVGName()
8684 for disk in self.op.disks:
8685 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8686 if mode not in constants.DISK_ACCESS_SET:
8687 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8688 mode, errors.ECODE_INVAL)
8689 size = disk.get(constants.IDISK_SIZE, None)
8691 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8694 except (TypeError, ValueError):
8695 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8698 data_vg = disk.get(constants.IDISK_VG, default_vg)
8700 constants.IDISK_SIZE: size,
8701 constants.IDISK_MODE: mode,
8702 constants.IDISK_VG: data_vg,
8703 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8705 if constants.IDISK_ADOPT in disk:
8706 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8707 self.disks.append(new_disk)
8709 if self.op.mode == constants.INSTANCE_IMPORT:
8711 # Check that the new instance doesn't have less disks than the export
8712 instance_disks = len(self.disks)
8713 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8714 if instance_disks < export_disks:
8715 raise errors.OpPrereqError("Not enough disks to import."
8716 " (instance: %d, export: %d)" %
8717 (instance_disks, export_disks),
8721 for idx in range(export_disks):
8722 option = "disk%d_dump" % idx
8723 if export_info.has_option(constants.INISECT_INS, option):
8724 # FIXME: are the old os-es, disk sizes, etc. useful?
8725 export_name = export_info.get(constants.INISECT_INS, option)
8726 image = utils.PathJoin(self.op.src_path, export_name)
8727 disk_images.append(image)
8729 disk_images.append(False)
8731 self.src_images = disk_images
8733 old_name = export_info.get(constants.INISECT_INS, "name")
8735 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8736 except (TypeError, ValueError), err:
8737 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8738 " an integer: %s" % str(err),
8740 if self.op.instance_name == old_name:
8741 for idx, nic in enumerate(self.nics):
8742 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8743 nic_mac_ini = "nic%d_mac" % idx
8744 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8746 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8748 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8749 if self.op.ip_check:
8750 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8751 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8752 (self.check_ip, self.op.instance_name),
8753 errors.ECODE_NOTUNIQUE)
8755 #### mac address generation
8756 # By generating here the mac address both the allocator and the hooks get
8757 # the real final mac address rather than the 'auto' or 'generate' value.
8758 # There is a race condition between the generation and the instance object
8759 # creation, which means that we know the mac is valid now, but we're not
8760 # sure it will be when we actually add the instance. If things go bad
8761 # adding the instance will abort because of a duplicate mac, and the
8762 # creation job will fail.
8763 for nic in self.nics:
8764 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8765 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8769 if self.op.iallocator is not None:
8770 self._RunAllocator()
8772 #### node related checks
8774 # check primary node
8775 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8776 assert self.pnode is not None, \
8777 "Cannot retrieve locked node %s" % self.op.pnode
8779 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8780 pnode.name, errors.ECODE_STATE)
8782 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8783 pnode.name, errors.ECODE_STATE)
8784 if not pnode.vm_capable:
8785 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8786 " '%s'" % pnode.name, errors.ECODE_STATE)
8788 self.secondaries = []
8790 # mirror node verification
8791 if self.op.disk_template in constants.DTS_INT_MIRROR:
8792 if self.op.snode == pnode.name:
8793 raise errors.OpPrereqError("The secondary node cannot be the"
8794 " primary node", errors.ECODE_INVAL)
8795 _CheckNodeOnline(self, self.op.snode)
8796 _CheckNodeNotDrained(self, self.op.snode)
8797 _CheckNodeVmCapable(self, self.op.snode)
8798 self.secondaries.append(self.op.snode)
8800 nodenames = [pnode.name] + self.secondaries
8802 if not self.adopt_disks:
8803 # Check lv size requirements, if not adopting
8804 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8805 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8807 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8808 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8809 disk[constants.IDISK_ADOPT])
8810 for disk in self.disks])
8811 if len(all_lvs) != len(self.disks):
8812 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8814 for lv_name in all_lvs:
8816 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8817 # to ReserveLV uses the same syntax
8818 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8819 except errors.ReservationError:
8820 raise errors.OpPrereqError("LV named %s used by another instance" %
8821 lv_name, errors.ECODE_NOTUNIQUE)
8823 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8824 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8826 node_lvs = self.rpc.call_lv_list([pnode.name],
8827 vg_names.payload.keys())[pnode.name]
8828 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8829 node_lvs = node_lvs.payload
8831 delta = all_lvs.difference(node_lvs.keys())
8833 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8834 utils.CommaJoin(delta),
8836 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8838 raise errors.OpPrereqError("Online logical volumes found, cannot"
8839 " adopt: %s" % utils.CommaJoin(online_lvs),
8841 # update the size of disk based on what is found
8842 for dsk in self.disks:
8843 dsk[constants.IDISK_SIZE] = \
8844 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8845 dsk[constants.IDISK_ADOPT])][0]))
8847 elif self.op.disk_template == constants.DT_BLOCK:
8848 # Normalize and de-duplicate device paths
8849 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8850 for disk in self.disks])
8851 if len(all_disks) != len(self.disks):
8852 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8854 baddisks = [d for d in all_disks
8855 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8857 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8858 " cannot be adopted" %
8859 (", ".join(baddisks),
8860 constants.ADOPTABLE_BLOCKDEV_ROOT),
8863 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8864 list(all_disks))[pnode.name]
8865 node_disks.Raise("Cannot get block device information from node %s" %
8867 node_disks = node_disks.payload
8868 delta = all_disks.difference(node_disks.keys())
8870 raise errors.OpPrereqError("Missing block device(s): %s" %
8871 utils.CommaJoin(delta),
8873 for dsk in self.disks:
8874 dsk[constants.IDISK_SIZE] = \
8875 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8877 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8879 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8880 # check OS parameters (remotely)
8881 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8883 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8885 # memory check on primary node
8887 _CheckNodeFreeMemory(self, self.pnode.name,
8888 "creating instance %s" % self.op.instance_name,
8889 self.be_full[constants.BE_MEMORY],
8892 self.dry_run_result = list(nodenames)
8894 def Exec(self, feedback_fn):
8895 """Create and add the instance to the cluster.
8898 instance = self.op.instance_name
8899 pnode_name = self.pnode.name
8901 ht_kind = self.op.hypervisor
8902 if ht_kind in constants.HTS_REQ_PORT:
8903 network_port = self.cfg.AllocatePort()
8907 disks = _GenerateDiskTemplate(self,
8908 self.op.disk_template,
8909 instance, pnode_name,
8912 self.instance_file_storage_dir,
8913 self.op.file_driver,
8917 iobj = objects.Instance(name=instance, os=self.op.os_type,
8918 primary_node=pnode_name,
8919 nics=self.nics, disks=disks,
8920 disk_template=self.op.disk_template,
8922 network_port=network_port,
8923 beparams=self.op.beparams,
8924 hvparams=self.op.hvparams,
8925 hypervisor=self.op.hypervisor,
8926 osparams=self.op.osparams,
8930 for tag in self.op.tags:
8933 if self.adopt_disks:
8934 if self.op.disk_template == constants.DT_PLAIN:
8935 # rename LVs to the newly-generated names; we need to construct
8936 # 'fake' LV disks with the old data, plus the new unique_id
8937 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8939 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
8940 rename_to.append(t_dsk.logical_id)
8941 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8942 self.cfg.SetDiskID(t_dsk, pnode_name)
8943 result = self.rpc.call_blockdev_rename(pnode_name,
8944 zip(tmp_disks, rename_to))
8945 result.Raise("Failed to rename adoped LVs")
8947 feedback_fn("* creating instance disks...")
8949 _CreateDisks(self, iobj)
8950 except errors.OpExecError:
8951 self.LogWarning("Device creation failed, reverting...")
8953 _RemoveDisks(self, iobj)
8955 self.cfg.ReleaseDRBDMinors(instance)
8958 feedback_fn("adding instance %s to cluster config" % instance)
8960 self.cfg.AddInstance(iobj, self.proc.GetECId())
8962 # Declare that we don't want to remove the instance lock anymore, as we've
8963 # added the instance to the config
8964 del self.remove_locks[locking.LEVEL_INSTANCE]
8966 if self.op.mode == constants.INSTANCE_IMPORT:
8967 # Release unused nodes
8968 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8971 _ReleaseLocks(self, locking.LEVEL_NODE)
8974 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8975 feedback_fn("* wiping instance disks...")
8977 _WipeDisks(self, iobj)
8978 except errors.OpExecError, err:
8979 logging.exception("Wiping disks failed")
8980 self.LogWarning("Wiping instance disks failed (%s)", err)
8984 # Something is already wrong with the disks, don't do anything else
8986 elif self.op.wait_for_sync:
8987 disk_abort = not _WaitForSync(self, iobj)
8988 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8989 # make sure the disks are not degraded (still sync-ing is ok)
8990 feedback_fn("* checking mirrors status")
8991 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8996 _RemoveDisks(self, iobj)
8997 self.cfg.RemoveInstance(iobj.name)
8998 # Make sure the instance lock gets removed
8999 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9000 raise errors.OpExecError("There are some degraded disks for"
9003 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9004 if self.op.mode == constants.INSTANCE_CREATE:
9005 if not self.op.no_install:
9006 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9007 not self.op.wait_for_sync)
9009 feedback_fn("* pausing disk sync to install instance OS")
9010 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9012 for idx, success in enumerate(result.payload):
9014 logging.warn("pause-sync of instance %s for disk %d failed",
9017 feedback_fn("* running the instance OS create scripts...")
9018 # FIXME: pass debug option from opcode to backend
9019 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
9020 self.op.debug_level)
9022 feedback_fn("* resuming disk sync")
9023 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9025 for idx, success in enumerate(result.payload):
9027 logging.warn("resume-sync of instance %s for disk %d failed",
9030 result.Raise("Could not add os for instance %s"
9031 " on node %s" % (instance, pnode_name))
9033 elif self.op.mode == constants.INSTANCE_IMPORT:
9034 feedback_fn("* running the instance OS import scripts...")
9038 for idx, image in enumerate(self.src_images):
9042 # FIXME: pass debug option from opcode to backend
9043 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9044 constants.IEIO_FILE, (image, ),
9045 constants.IEIO_SCRIPT,
9046 (iobj.disks[idx], idx),
9048 transfers.append(dt)
9051 masterd.instance.TransferInstanceData(self, feedback_fn,
9052 self.op.src_node, pnode_name,
9053 self.pnode.secondary_ip,
9055 if not compat.all(import_result):
9056 self.LogWarning("Some disks for instance %s on node %s were not"
9057 " imported successfully" % (instance, pnode_name))
9059 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9060 feedback_fn("* preparing remote import...")
9061 # The source cluster will stop the instance before attempting to make a
9062 # connection. In some cases stopping an instance can take a long time,
9063 # hence the shutdown timeout is added to the connection timeout.
9064 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9065 self.op.source_shutdown_timeout)
9066 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9068 assert iobj.primary_node == self.pnode.name
9070 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9071 self.source_x509_ca,
9072 self._cds, timeouts)
9073 if not compat.all(disk_results):
9074 # TODO: Should the instance still be started, even if some disks
9075 # failed to import (valid for local imports, too)?
9076 self.LogWarning("Some disks for instance %s on node %s were not"
9077 " imported successfully" % (instance, pnode_name))
9079 # Run rename script on newly imported instance
9080 assert iobj.name == instance
9081 feedback_fn("Running rename script for %s" % instance)
9082 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9083 self.source_instance_name,
9084 self.op.debug_level)
9086 self.LogWarning("Failed to run rename script for %s on node"
9087 " %s: %s" % (instance, pnode_name, result.fail_msg))
9090 # also checked in the prereq part
9091 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9095 iobj.admin_up = True
9096 self.cfg.Update(iobj, feedback_fn)
9097 logging.info("Starting instance %s on node %s", instance, pnode_name)
9098 feedback_fn("* starting instance...")
9099 result = self.rpc.call_instance_start(pnode_name, iobj,
9101 result.Raise("Could not start instance")
9103 return list(iobj.all_nodes)
9106 class LUInstanceConsole(NoHooksLU):
9107 """Connect to an instance's console.
9109 This is somewhat special in that it returns the command line that
9110 you need to run on the master node in order to connect to the
9116 def ExpandNames(self):
9117 self._ExpandAndLockInstance()
9119 def CheckPrereq(self):
9120 """Check prerequisites.
9122 This checks that the instance is in the cluster.
9125 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9126 assert self.instance is not None, \
9127 "Cannot retrieve locked instance %s" % self.op.instance_name
9128 _CheckNodeOnline(self, self.instance.primary_node)
9130 def Exec(self, feedback_fn):
9131 """Connect to the console of an instance
9134 instance = self.instance
9135 node = instance.primary_node
9137 node_insts = self.rpc.call_instance_list([node],
9138 [instance.hypervisor])[node]
9139 node_insts.Raise("Can't get node information from %s" % node)
9141 if instance.name not in node_insts.payload:
9142 if instance.admin_up:
9143 state = constants.INSTST_ERRORDOWN
9145 state = constants.INSTST_ADMINDOWN
9146 raise errors.OpExecError("Instance %s is not running (state %s)" %
9147 (instance.name, state))
9149 logging.debug("Connecting to console of %s on %s", instance.name, node)
9151 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9154 def _GetInstanceConsole(cluster, instance):
9155 """Returns console information for an instance.
9157 @type cluster: L{objects.Cluster}
9158 @type instance: L{objects.Instance}
9162 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9163 # beparams and hvparams are passed separately, to avoid editing the
9164 # instance and then saving the defaults in the instance itself.
9165 hvparams = cluster.FillHV(instance)
9166 beparams = cluster.FillBE(instance)
9167 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9169 assert console.instance == instance.name
9170 assert console.Validate()
9172 return console.ToDict()
9175 class LUInstanceReplaceDisks(LogicalUnit):
9176 """Replace the disks of an instance.
9179 HPATH = "mirrors-replace"
9180 HTYPE = constants.HTYPE_INSTANCE
9183 def CheckArguments(self):
9184 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9187 def ExpandNames(self):
9188 self._ExpandAndLockInstance()
9190 assert locking.LEVEL_NODE not in self.needed_locks
9191 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9193 assert self.op.iallocator is None or self.op.remote_node is None, \
9194 "Conflicting options"
9196 if self.op.remote_node is not None:
9197 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9199 # Warning: do not remove the locking of the new secondary here
9200 # unless DRBD8.AddChildren is changed to work in parallel;
9201 # currently it doesn't since parallel invocations of
9202 # FindUnusedMinor will conflict
9203 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9204 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9206 self.needed_locks[locking.LEVEL_NODE] = []
9207 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9209 if self.op.iallocator is not None:
9210 # iallocator will select a new node in the same group
9211 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9213 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9214 self.op.iallocator, self.op.remote_node,
9215 self.op.disks, False, self.op.early_release)
9217 self.tasklets = [self.replacer]
9219 def DeclareLocks(self, level):
9220 if level == locking.LEVEL_NODEGROUP:
9221 assert self.op.remote_node is None
9222 assert self.op.iallocator is not None
9223 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9225 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9226 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9227 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9229 elif level == locking.LEVEL_NODE:
9230 if self.op.iallocator is not None:
9231 assert self.op.remote_node is None
9232 assert not self.needed_locks[locking.LEVEL_NODE]
9234 # Lock member nodes of all locked groups
9235 self.needed_locks[locking.LEVEL_NODE] = [node_name
9236 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9237 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9239 self._LockInstancesNodes()
9241 def BuildHooksEnv(self):
9244 This runs on the master, the primary and all the secondaries.
9247 instance = self.replacer.instance
9249 "MODE": self.op.mode,
9250 "NEW_SECONDARY": self.op.remote_node,
9251 "OLD_SECONDARY": instance.secondary_nodes[0],
9253 env.update(_BuildInstanceHookEnvByObject(self, instance))
9256 def BuildHooksNodes(self):
9257 """Build hooks nodes.
9260 instance = self.replacer.instance
9262 self.cfg.GetMasterNode(),
9263 instance.primary_node,
9265 if self.op.remote_node is not None:
9266 nl.append(self.op.remote_node)
9269 def CheckPrereq(self):
9270 """Check prerequisites.
9273 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9274 self.op.iallocator is None)
9276 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9278 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9280 return LogicalUnit.CheckPrereq(self)
9283 class TLReplaceDisks(Tasklet):
9284 """Replaces disks for an instance.
9286 Note: Locking is not within the scope of this class.
9289 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9290 disks, delay_iallocator, early_release):
9291 """Initializes this class.
9294 Tasklet.__init__(self, lu)
9297 self.instance_name = instance_name
9299 self.iallocator_name = iallocator_name
9300 self.remote_node = remote_node
9302 self.delay_iallocator = delay_iallocator
9303 self.early_release = early_release
9306 self.instance = None
9307 self.new_node = None
9308 self.target_node = None
9309 self.other_node = None
9310 self.remote_node_info = None
9311 self.node_secondary_ip = None
9314 def CheckArguments(mode, remote_node, iallocator):
9315 """Helper function for users of this class.
9318 # check for valid parameter combination
9319 if mode == constants.REPLACE_DISK_CHG:
9320 if remote_node is None and iallocator is None:
9321 raise errors.OpPrereqError("When changing the secondary either an"
9322 " iallocator script must be used or the"
9323 " new node given", errors.ECODE_INVAL)
9325 if remote_node is not None and iallocator is not None:
9326 raise errors.OpPrereqError("Give either the iallocator or the new"
9327 " secondary, not both", errors.ECODE_INVAL)
9329 elif remote_node is not None or iallocator is not None:
9330 # Not replacing the secondary
9331 raise errors.OpPrereqError("The iallocator and new node options can"
9332 " only be used when changing the"
9333 " secondary node", errors.ECODE_INVAL)
9336 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9337 """Compute a new secondary node using an IAllocator.
9340 ial = IAllocator(lu.cfg, lu.rpc,
9341 mode=constants.IALLOCATOR_MODE_RELOC,
9343 relocate_from=list(relocate_from))
9345 ial.Run(iallocator_name)
9348 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9349 " %s" % (iallocator_name, ial.info),
9352 if len(ial.result) != ial.required_nodes:
9353 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9354 " of nodes (%s), required %s" %
9356 len(ial.result), ial.required_nodes),
9359 remote_node_name = ial.result[0]
9361 lu.LogInfo("Selected new secondary for instance '%s': %s",
9362 instance_name, remote_node_name)
9364 return remote_node_name
9366 def _FindFaultyDisks(self, node_name):
9367 """Wrapper for L{_FindFaultyInstanceDisks}.
9370 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9373 def _CheckDisksActivated(self, instance):
9374 """Checks if the instance disks are activated.
9376 @param instance: The instance to check disks
9377 @return: True if they are activated, False otherwise
9380 nodes = instance.all_nodes
9382 for idx, dev in enumerate(instance.disks):
9384 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9385 self.cfg.SetDiskID(dev, node)
9387 result = self.rpc.call_blockdev_find(node, dev)
9391 elif result.fail_msg or not result.payload:
9396 def CheckPrereq(self):
9397 """Check prerequisites.
9399 This checks that the instance is in the cluster.
9402 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9403 assert instance is not None, \
9404 "Cannot retrieve locked instance %s" % self.instance_name
9406 if instance.disk_template != constants.DT_DRBD8:
9407 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9408 " instances", errors.ECODE_INVAL)
9410 if len(instance.secondary_nodes) != 1:
9411 raise errors.OpPrereqError("The instance has a strange layout,"
9412 " expected one secondary but found %d" %
9413 len(instance.secondary_nodes),
9416 if not self.delay_iallocator:
9417 self._CheckPrereq2()
9419 def _CheckPrereq2(self):
9420 """Check prerequisites, second part.
9422 This function should always be part of CheckPrereq. It was separated and is
9423 now called from Exec because during node evacuation iallocator was only
9424 called with an unmodified cluster model, not taking planned changes into
9428 instance = self.instance
9429 secondary_node = instance.secondary_nodes[0]
9431 if self.iallocator_name is None:
9432 remote_node = self.remote_node
9434 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9435 instance.name, instance.secondary_nodes)
9437 if remote_node is None:
9438 self.remote_node_info = None
9440 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9441 "Remote node '%s' is not locked" % remote_node
9443 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9444 assert self.remote_node_info is not None, \
9445 "Cannot retrieve locked node %s" % remote_node
9447 if remote_node == self.instance.primary_node:
9448 raise errors.OpPrereqError("The specified node is the primary node of"
9449 " the instance", errors.ECODE_INVAL)
9451 if remote_node == secondary_node:
9452 raise errors.OpPrereqError("The specified node is already the"
9453 " secondary node of the instance",
9456 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9457 constants.REPLACE_DISK_CHG):
9458 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9461 if self.mode == constants.REPLACE_DISK_AUTO:
9462 if not self._CheckDisksActivated(instance):
9463 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9464 " first" % self.instance_name,
9466 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9467 faulty_secondary = self._FindFaultyDisks(secondary_node)
9469 if faulty_primary and faulty_secondary:
9470 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9471 " one node and can not be repaired"
9472 " automatically" % self.instance_name,
9476 self.disks = faulty_primary
9477 self.target_node = instance.primary_node
9478 self.other_node = secondary_node
9479 check_nodes = [self.target_node, self.other_node]
9480 elif faulty_secondary:
9481 self.disks = faulty_secondary
9482 self.target_node = secondary_node
9483 self.other_node = instance.primary_node
9484 check_nodes = [self.target_node, self.other_node]
9490 # Non-automatic modes
9491 if self.mode == constants.REPLACE_DISK_PRI:
9492 self.target_node = instance.primary_node
9493 self.other_node = secondary_node
9494 check_nodes = [self.target_node, self.other_node]
9496 elif self.mode == constants.REPLACE_DISK_SEC:
9497 self.target_node = secondary_node
9498 self.other_node = instance.primary_node
9499 check_nodes = [self.target_node, self.other_node]
9501 elif self.mode == constants.REPLACE_DISK_CHG:
9502 self.new_node = remote_node
9503 self.other_node = instance.primary_node
9504 self.target_node = secondary_node
9505 check_nodes = [self.new_node, self.other_node]
9507 _CheckNodeNotDrained(self.lu, remote_node)
9508 _CheckNodeVmCapable(self.lu, remote_node)
9510 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9511 assert old_node_info is not None
9512 if old_node_info.offline and not self.early_release:
9513 # doesn't make sense to delay the release
9514 self.early_release = True
9515 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9516 " early-release mode", secondary_node)
9519 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9522 # If not specified all disks should be replaced
9524 self.disks = range(len(self.instance.disks))
9526 for node in check_nodes:
9527 _CheckNodeOnline(self.lu, node)
9529 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9532 if node_name is not None)
9534 # Release unneeded node locks
9535 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9537 # Release any owned node group
9538 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9539 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9541 # Check whether disks are valid
9542 for disk_idx in self.disks:
9543 instance.FindDisk(disk_idx)
9545 # Get secondary node IP addresses
9546 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9547 in self.cfg.GetMultiNodeInfo(touched_nodes))
9549 def Exec(self, feedback_fn):
9550 """Execute disk replacement.
9552 This dispatches the disk replacement to the appropriate handler.
9555 if self.delay_iallocator:
9556 self._CheckPrereq2()
9559 # Verify owned locks before starting operation
9560 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9561 assert set(owned_nodes) == set(self.node_secondary_ip), \
9562 ("Incorrect node locks, owning %s, expected %s" %
9563 (owned_nodes, self.node_secondary_ip.keys()))
9565 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9566 assert list(owned_instances) == [self.instance_name], \
9567 "Instance '%s' not locked" % self.instance_name
9569 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9570 "Should not own any node group lock at this point"
9573 feedback_fn("No disks need replacement")
9576 feedback_fn("Replacing disk(s) %s for %s" %
9577 (utils.CommaJoin(self.disks), self.instance.name))
9579 activate_disks = (not self.instance.admin_up)
9581 # Activate the instance disks if we're replacing them on a down instance
9583 _StartInstanceDisks(self.lu, self.instance, True)
9586 # Should we replace the secondary node?
9587 if self.new_node is not None:
9588 fn = self._ExecDrbd8Secondary
9590 fn = self._ExecDrbd8DiskOnly
9592 result = fn(feedback_fn)
9594 # Deactivate the instance disks if we're replacing them on a
9597 _SafeShutdownInstanceDisks(self.lu, self.instance)
9600 # Verify owned locks
9601 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9602 nodes = frozenset(self.node_secondary_ip)
9603 assert ((self.early_release and not owned_nodes) or
9604 (not self.early_release and not (set(owned_nodes) - nodes))), \
9605 ("Not owning the correct locks, early_release=%s, owned=%r,"
9606 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9610 def _CheckVolumeGroup(self, nodes):
9611 self.lu.LogInfo("Checking volume groups")
9613 vgname = self.cfg.GetVGName()
9615 # Make sure volume group exists on all involved nodes
9616 results = self.rpc.call_vg_list(nodes)
9618 raise errors.OpExecError("Can't list volume groups on the nodes")
9622 res.Raise("Error checking node %s" % node)
9623 if vgname not in res.payload:
9624 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9627 def _CheckDisksExistence(self, nodes):
9628 # Check disk existence
9629 for idx, dev in enumerate(self.instance.disks):
9630 if idx not in self.disks:
9634 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9635 self.cfg.SetDiskID(dev, node)
9637 result = self.rpc.call_blockdev_find(node, dev)
9639 msg = result.fail_msg
9640 if msg or not result.payload:
9642 msg = "disk not found"
9643 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9646 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9647 for idx, dev in enumerate(self.instance.disks):
9648 if idx not in self.disks:
9651 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9654 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9656 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9657 " replace disks for instance %s" %
9658 (node_name, self.instance.name))
9660 def _CreateNewStorage(self, node_name):
9661 """Create new storage on the primary or secondary node.
9663 This is only used for same-node replaces, not for changing the
9664 secondary node, hence we don't want to modify the existing disk.
9669 for idx, dev in enumerate(self.instance.disks):
9670 if idx not in self.disks:
9673 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9675 self.cfg.SetDiskID(dev, node_name)
9677 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9678 names = _GenerateUniqueNames(self.lu, lv_names)
9680 vg_data = dev.children[0].logical_id[0]
9681 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9682 logical_id=(vg_data, names[0]))
9683 vg_meta = dev.children[1].logical_id[0]
9684 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9685 logical_id=(vg_meta, names[1]))
9687 new_lvs = [lv_data, lv_meta]
9688 old_lvs = [child.Copy() for child in dev.children]
9689 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9691 # we pass force_create=True to force the LVM creation
9692 for new_lv in new_lvs:
9693 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9694 _GetInstanceInfoText(self.instance), False)
9698 def _CheckDevices(self, node_name, iv_names):
9699 for name, (dev, _, _) in iv_names.iteritems():
9700 self.cfg.SetDiskID(dev, node_name)
9702 result = self.rpc.call_blockdev_find(node_name, dev)
9704 msg = result.fail_msg
9705 if msg or not result.payload:
9707 msg = "disk not found"
9708 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9711 if result.payload.is_degraded:
9712 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9714 def _RemoveOldStorage(self, node_name, iv_names):
9715 for name, (_, old_lvs, _) in iv_names.iteritems():
9716 self.lu.LogInfo("Remove logical volumes for %s" % name)
9719 self.cfg.SetDiskID(lv, node_name)
9721 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9723 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9724 hint="remove unused LVs manually")
9726 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9727 """Replace a disk on the primary or secondary for DRBD 8.
9729 The algorithm for replace is quite complicated:
9731 1. for each disk to be replaced:
9733 1. create new LVs on the target node with unique names
9734 1. detach old LVs from the drbd device
9735 1. rename old LVs to name_replaced.<time_t>
9736 1. rename new LVs to old LVs
9737 1. attach the new LVs (with the old names now) to the drbd device
9739 1. wait for sync across all devices
9741 1. for each modified disk:
9743 1. remove old LVs (which have the name name_replaces.<time_t>)
9745 Failures are not very well handled.
9750 # Step: check device activation
9751 self.lu.LogStep(1, steps_total, "Check device existence")
9752 self._CheckDisksExistence([self.other_node, self.target_node])
9753 self._CheckVolumeGroup([self.target_node, self.other_node])
9755 # Step: check other node consistency
9756 self.lu.LogStep(2, steps_total, "Check peer consistency")
9757 self._CheckDisksConsistency(self.other_node,
9758 self.other_node == self.instance.primary_node,
9761 # Step: create new storage
9762 self.lu.LogStep(3, steps_total, "Allocate new storage")
9763 iv_names = self._CreateNewStorage(self.target_node)
9765 # Step: for each lv, detach+rename*2+attach
9766 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9767 for dev, old_lvs, new_lvs in iv_names.itervalues():
9768 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9770 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9772 result.Raise("Can't detach drbd from local storage on node"
9773 " %s for device %s" % (self.target_node, dev.iv_name))
9775 #cfg.Update(instance)
9777 # ok, we created the new LVs, so now we know we have the needed
9778 # storage; as such, we proceed on the target node to rename
9779 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9780 # using the assumption that logical_id == physical_id (which in
9781 # turn is the unique_id on that node)
9783 # FIXME(iustin): use a better name for the replaced LVs
9784 temp_suffix = int(time.time())
9785 ren_fn = lambda d, suff: (d.physical_id[0],
9786 d.physical_id[1] + "_replaced-%s" % suff)
9788 # Build the rename list based on what LVs exist on the node
9789 rename_old_to_new = []
9790 for to_ren in old_lvs:
9791 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9792 if not result.fail_msg and result.payload:
9794 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9796 self.lu.LogInfo("Renaming the old LVs on the target node")
9797 result = self.rpc.call_blockdev_rename(self.target_node,
9799 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9801 # Now we rename the new LVs to the old LVs
9802 self.lu.LogInfo("Renaming the new LVs on the target node")
9803 rename_new_to_old = [(new, old.physical_id)
9804 for old, new in zip(old_lvs, new_lvs)]
9805 result = self.rpc.call_blockdev_rename(self.target_node,
9807 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9809 # Intermediate steps of in memory modifications
9810 for old, new in zip(old_lvs, new_lvs):
9811 new.logical_id = old.logical_id
9812 self.cfg.SetDiskID(new, self.target_node)
9814 # We need to modify old_lvs so that removal later removes the
9815 # right LVs, not the newly added ones; note that old_lvs is a
9817 for disk in old_lvs:
9818 disk.logical_id = ren_fn(disk, temp_suffix)
9819 self.cfg.SetDiskID(disk, self.target_node)
9821 # Now that the new lvs have the old name, we can add them to the device
9822 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9823 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9825 msg = result.fail_msg
9827 for new_lv in new_lvs:
9828 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9831 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9832 hint=("cleanup manually the unused logical"
9834 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9837 if self.early_release:
9838 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9840 self._RemoveOldStorage(self.target_node, iv_names)
9841 # WARNING: we release both node locks here, do not do other RPCs
9842 # than WaitForSync to the primary node
9843 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9844 names=[self.target_node, self.other_node])
9847 # This can fail as the old devices are degraded and _WaitForSync
9848 # does a combined result over all disks, so we don't check its return value
9849 self.lu.LogStep(cstep, steps_total, "Sync devices")
9851 _WaitForSync(self.lu, self.instance)
9853 # Check all devices manually
9854 self._CheckDevices(self.instance.primary_node, iv_names)
9856 # Step: remove old storage
9857 if not self.early_release:
9858 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9860 self._RemoveOldStorage(self.target_node, iv_names)
9862 def _ExecDrbd8Secondary(self, feedback_fn):
9863 """Replace the secondary node for DRBD 8.
9865 The algorithm for replace is quite complicated:
9866 - for all disks of the instance:
9867 - create new LVs on the new node with same names
9868 - shutdown the drbd device on the old secondary
9869 - disconnect the drbd network on the primary
9870 - create the drbd device on the new secondary
9871 - network attach the drbd on the primary, using an artifice:
9872 the drbd code for Attach() will connect to the network if it
9873 finds a device which is connected to the good local disks but
9875 - wait for sync across all devices
9876 - remove all disks from the old secondary
9878 Failures are not very well handled.
9883 pnode = self.instance.primary_node
9885 # Step: check device activation
9886 self.lu.LogStep(1, steps_total, "Check device existence")
9887 self._CheckDisksExistence([self.instance.primary_node])
9888 self._CheckVolumeGroup([self.instance.primary_node])
9890 # Step: check other node consistency
9891 self.lu.LogStep(2, steps_total, "Check peer consistency")
9892 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9894 # Step: create new storage
9895 self.lu.LogStep(3, steps_total, "Allocate new storage")
9896 for idx, dev in enumerate(self.instance.disks):
9897 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9898 (self.new_node, idx))
9899 # we pass force_create=True to force LVM creation
9900 for new_lv in dev.children:
9901 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9902 _GetInstanceInfoText(self.instance), False)
9904 # Step 4: dbrd minors and drbd setups changes
9905 # after this, we must manually remove the drbd minors on both the
9906 # error and the success paths
9907 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9908 minors = self.cfg.AllocateDRBDMinor([self.new_node
9909 for dev in self.instance.disks],
9911 logging.debug("Allocated minors %r", minors)
9914 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9915 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9916 (self.new_node, idx))
9917 # create new devices on new_node; note that we create two IDs:
9918 # one without port, so the drbd will be activated without
9919 # networking information on the new node at this stage, and one
9920 # with network, for the latter activation in step 4
9921 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9922 if self.instance.primary_node == o_node1:
9925 assert self.instance.primary_node == o_node2, "Three-node instance?"
9928 new_alone_id = (self.instance.primary_node, self.new_node, None,
9929 p_minor, new_minor, o_secret)
9930 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9931 p_minor, new_minor, o_secret)
9933 iv_names[idx] = (dev, dev.children, new_net_id)
9934 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9936 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9937 logical_id=new_alone_id,
9938 children=dev.children,
9941 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9942 _GetInstanceInfoText(self.instance), False)
9943 except errors.GenericError:
9944 self.cfg.ReleaseDRBDMinors(self.instance.name)
9947 # We have new devices, shutdown the drbd on the old secondary
9948 for idx, dev in enumerate(self.instance.disks):
9949 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9950 self.cfg.SetDiskID(dev, self.target_node)
9951 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9953 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9954 "node: %s" % (idx, msg),
9955 hint=("Please cleanup this device manually as"
9956 " soon as possible"))
9958 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9959 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
9960 self.instance.disks)[pnode]
9962 msg = result.fail_msg
9964 # detaches didn't succeed (unlikely)
9965 self.cfg.ReleaseDRBDMinors(self.instance.name)
9966 raise errors.OpExecError("Can't detach the disks from the network on"
9967 " old node: %s" % (msg,))
9969 # if we managed to detach at least one, we update all the disks of
9970 # the instance to point to the new secondary
9971 self.lu.LogInfo("Updating instance configuration")
9972 for dev, _, new_logical_id in iv_names.itervalues():
9973 dev.logical_id = new_logical_id
9974 self.cfg.SetDiskID(dev, self.instance.primary_node)
9976 self.cfg.Update(self.instance, feedback_fn)
9978 # and now perform the drbd attach
9979 self.lu.LogInfo("Attaching primary drbds to new secondary"
9980 " (standalone => connected)")
9981 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9983 self.node_secondary_ip,
9984 self.instance.disks,
9987 for to_node, to_result in result.items():
9988 msg = to_result.fail_msg
9990 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9992 hint=("please do a gnt-instance info to see the"
9993 " status of disks"))
9995 if self.early_release:
9996 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9998 self._RemoveOldStorage(self.target_node, iv_names)
9999 # WARNING: we release all node locks here, do not do other RPCs
10000 # than WaitForSync to the primary node
10001 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10002 names=[self.instance.primary_node,
10007 # This can fail as the old devices are degraded and _WaitForSync
10008 # does a combined result over all disks, so we don't check its return value
10009 self.lu.LogStep(cstep, steps_total, "Sync devices")
10011 _WaitForSync(self.lu, self.instance)
10013 # Check all devices manually
10014 self._CheckDevices(self.instance.primary_node, iv_names)
10016 # Step: remove old storage
10017 if not self.early_release:
10018 self.lu.LogStep(cstep, steps_total, "Removing old storage")
10019 self._RemoveOldStorage(self.target_node, iv_names)
10022 class LURepairNodeStorage(NoHooksLU):
10023 """Repairs the volume group on a node.
10028 def CheckArguments(self):
10029 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10031 storage_type = self.op.storage_type
10033 if (constants.SO_FIX_CONSISTENCY not in
10034 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10035 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10036 " repaired" % storage_type,
10037 errors.ECODE_INVAL)
10039 def ExpandNames(self):
10040 self.needed_locks = {
10041 locking.LEVEL_NODE: [self.op.node_name],
10044 def _CheckFaultyDisks(self, instance, node_name):
10045 """Ensure faulty disks abort the opcode or at least warn."""
10047 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10049 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10050 " node '%s'" % (instance.name, node_name),
10051 errors.ECODE_STATE)
10052 except errors.OpPrereqError, err:
10053 if self.op.ignore_consistency:
10054 self.proc.LogWarning(str(err.args[0]))
10058 def CheckPrereq(self):
10059 """Check prerequisites.
10062 # Check whether any instance on this node has faulty disks
10063 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10064 if not inst.admin_up:
10066 check_nodes = set(inst.all_nodes)
10067 check_nodes.discard(self.op.node_name)
10068 for inst_node_name in check_nodes:
10069 self._CheckFaultyDisks(inst, inst_node_name)
10071 def Exec(self, feedback_fn):
10072 feedback_fn("Repairing storage unit '%s' on %s ..." %
10073 (self.op.name, self.op.node_name))
10075 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10076 result = self.rpc.call_storage_execute(self.op.node_name,
10077 self.op.storage_type, st_args,
10079 constants.SO_FIX_CONSISTENCY)
10080 result.Raise("Failed to repair storage unit '%s' on %s" %
10081 (self.op.name, self.op.node_name))
10084 class LUNodeEvacuate(NoHooksLU):
10085 """Evacuates instances off a list of nodes.
10090 def CheckArguments(self):
10091 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10093 def ExpandNames(self):
10094 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10096 if self.op.remote_node is not None:
10097 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10098 assert self.op.remote_node
10100 if self.op.remote_node == self.op.node_name:
10101 raise errors.OpPrereqError("Can not use evacuated node as a new"
10102 " secondary node", errors.ECODE_INVAL)
10104 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10105 raise errors.OpPrereqError("Without the use of an iallocator only"
10106 " secondary instances can be evacuated",
10107 errors.ECODE_INVAL)
10110 self.share_locks = _ShareAll()
10111 self.needed_locks = {
10112 locking.LEVEL_INSTANCE: [],
10113 locking.LEVEL_NODEGROUP: [],
10114 locking.LEVEL_NODE: [],
10117 # Determine nodes (via group) optimistically, needs verification once locks
10118 # have been acquired
10119 self.lock_nodes = self._DetermineNodes()
10121 def _DetermineNodes(self):
10122 """Gets the list of nodes to operate on.
10125 if self.op.remote_node is None:
10126 # Iallocator will choose any node(s) in the same group
10127 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10129 group_nodes = frozenset([self.op.remote_node])
10131 # Determine nodes to be locked
10132 return set([self.op.node_name]) | group_nodes
10134 def _DetermineInstances(self):
10135 """Builds list of instances to operate on.
10138 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10140 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10141 # Primary instances only
10142 inst_fn = _GetNodePrimaryInstances
10143 assert self.op.remote_node is None, \
10144 "Evacuating primary instances requires iallocator"
10145 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10146 # Secondary instances only
10147 inst_fn = _GetNodeSecondaryInstances
10150 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10151 inst_fn = _GetNodeInstances
10152 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10154 raise errors.OpPrereqError("Due to an issue with the iallocator"
10155 " interface it is not possible to evacuate"
10156 " all instances at once; specify explicitly"
10157 " whether to evacuate primary or secondary"
10159 errors.ECODE_INVAL)
10161 return inst_fn(self.cfg, self.op.node_name)
10163 def DeclareLocks(self, level):
10164 if level == locking.LEVEL_INSTANCE:
10165 # Lock instances optimistically, needs verification once node and group
10166 # locks have been acquired
10167 self.needed_locks[locking.LEVEL_INSTANCE] = \
10168 set(i.name for i in self._DetermineInstances())
10170 elif level == locking.LEVEL_NODEGROUP:
10171 # Lock node groups for all potential target nodes optimistically, needs
10172 # verification once nodes have been acquired
10173 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10174 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10176 elif level == locking.LEVEL_NODE:
10177 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10179 def CheckPrereq(self):
10181 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10182 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10183 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10185 need_nodes = self._DetermineNodes()
10187 if not owned_nodes.issuperset(need_nodes):
10188 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10189 " locks were acquired, current nodes are"
10190 " are '%s', used to be '%s'; retry the"
10192 (self.op.node_name,
10193 utils.CommaJoin(need_nodes),
10194 utils.CommaJoin(owned_nodes)),
10195 errors.ECODE_STATE)
10197 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10198 if owned_groups != wanted_groups:
10199 raise errors.OpExecError("Node groups changed since locks were acquired,"
10200 " current groups are '%s', used to be '%s';"
10201 " retry the operation" %
10202 (utils.CommaJoin(wanted_groups),
10203 utils.CommaJoin(owned_groups)))
10205 # Determine affected instances
10206 self.instances = self._DetermineInstances()
10207 self.instance_names = [i.name for i in self.instances]
10209 if set(self.instance_names) != owned_instances:
10210 raise errors.OpExecError("Instances on node '%s' changed since locks"
10211 " were acquired, current instances are '%s',"
10212 " used to be '%s'; retry the operation" %
10213 (self.op.node_name,
10214 utils.CommaJoin(self.instance_names),
10215 utils.CommaJoin(owned_instances)))
10217 if self.instance_names:
10218 self.LogInfo("Evacuating instances from node '%s': %s",
10220 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10222 self.LogInfo("No instances to evacuate from node '%s'",
10225 if self.op.remote_node is not None:
10226 for i in self.instances:
10227 if i.primary_node == self.op.remote_node:
10228 raise errors.OpPrereqError("Node %s is the primary node of"
10229 " instance %s, cannot use it as"
10231 (self.op.remote_node, i.name),
10232 errors.ECODE_INVAL)
10234 def Exec(self, feedback_fn):
10235 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10237 if not self.instance_names:
10238 # No instances to evacuate
10241 elif self.op.iallocator is not None:
10242 # TODO: Implement relocation to other group
10243 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10244 evac_mode=self.op.mode,
10245 instances=list(self.instance_names))
10247 ial.Run(self.op.iallocator)
10249 if not ial.success:
10250 raise errors.OpPrereqError("Can't compute node evacuation using"
10251 " iallocator '%s': %s" %
10252 (self.op.iallocator, ial.info),
10253 errors.ECODE_NORES)
10255 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10257 elif self.op.remote_node is not None:
10258 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10260 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10261 remote_node=self.op.remote_node,
10263 mode=constants.REPLACE_DISK_CHG,
10264 early_release=self.op.early_release)]
10265 for instance_name in self.instance_names
10269 raise errors.ProgrammerError("No iallocator or remote node")
10271 return ResultWithJobs(jobs)
10274 def _SetOpEarlyRelease(early_release, op):
10275 """Sets C{early_release} flag on opcodes if available.
10279 op.early_release = early_release
10280 except AttributeError:
10281 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10286 def _NodeEvacDest(use_nodes, group, nodes):
10287 """Returns group or nodes depending on caller's choice.
10291 return utils.CommaJoin(nodes)
10296 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10297 """Unpacks the result of change-group and node-evacuate iallocator requests.
10299 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10300 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10302 @type lu: L{LogicalUnit}
10303 @param lu: Logical unit instance
10304 @type alloc_result: tuple/list
10305 @param alloc_result: Result from iallocator
10306 @type early_release: bool
10307 @param early_release: Whether to release locks early if possible
10308 @type use_nodes: bool
10309 @param use_nodes: Whether to display node names instead of groups
10312 (moved, failed, jobs) = alloc_result
10315 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10316 for (name, reason) in failed)
10317 lu.LogWarning("Unable to evacuate instances %s", failreason)
10318 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10321 lu.LogInfo("Instances to be moved: %s",
10322 utils.CommaJoin("%s (to %s)" %
10323 (name, _NodeEvacDest(use_nodes, group, nodes))
10324 for (name, group, nodes) in moved))
10326 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10327 map(opcodes.OpCode.LoadOpCode, ops))
10331 class LUInstanceGrowDisk(LogicalUnit):
10332 """Grow a disk of an instance.
10335 HPATH = "disk-grow"
10336 HTYPE = constants.HTYPE_INSTANCE
10339 def ExpandNames(self):
10340 self._ExpandAndLockInstance()
10341 self.needed_locks[locking.LEVEL_NODE] = []
10342 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10344 def DeclareLocks(self, level):
10345 if level == locking.LEVEL_NODE:
10346 self._LockInstancesNodes()
10348 def BuildHooksEnv(self):
10349 """Build hooks env.
10351 This runs on the master, the primary and all the secondaries.
10355 "DISK": self.op.disk,
10356 "AMOUNT": self.op.amount,
10358 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10361 def BuildHooksNodes(self):
10362 """Build hooks nodes.
10365 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10368 def CheckPrereq(self):
10369 """Check prerequisites.
10371 This checks that the instance is in the cluster.
10374 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10375 assert instance is not None, \
10376 "Cannot retrieve locked instance %s" % self.op.instance_name
10377 nodenames = list(instance.all_nodes)
10378 for node in nodenames:
10379 _CheckNodeOnline(self, node)
10381 self.instance = instance
10383 if instance.disk_template not in constants.DTS_GROWABLE:
10384 raise errors.OpPrereqError("Instance's disk layout does not support"
10385 " growing", errors.ECODE_INVAL)
10387 self.disk = instance.FindDisk(self.op.disk)
10389 if instance.disk_template not in (constants.DT_FILE,
10390 constants.DT_SHARED_FILE):
10391 # TODO: check the free disk space for file, when that feature will be
10393 _CheckNodesFreeDiskPerVG(self, nodenames,
10394 self.disk.ComputeGrowth(self.op.amount))
10396 def Exec(self, feedback_fn):
10397 """Execute disk grow.
10400 instance = self.instance
10403 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10405 raise errors.OpExecError("Cannot activate block device to grow")
10407 # First run all grow ops in dry-run mode
10408 for node in instance.all_nodes:
10409 self.cfg.SetDiskID(disk, node)
10410 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10411 result.Raise("Grow request failed to node %s" % node)
10413 # We know that (as far as we can test) operations across different
10414 # nodes will succeed, time to run it for real
10415 for node in instance.all_nodes:
10416 self.cfg.SetDiskID(disk, node)
10417 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10418 result.Raise("Grow request failed to node %s" % node)
10420 # TODO: Rewrite code to work properly
10421 # DRBD goes into sync mode for a short amount of time after executing the
10422 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10423 # calling "resize" in sync mode fails. Sleeping for a short amount of
10424 # time is a work-around.
10427 disk.RecordGrow(self.op.amount)
10428 self.cfg.Update(instance, feedback_fn)
10429 if self.op.wait_for_sync:
10430 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10432 self.proc.LogWarning("Disk sync-ing has not returned a good"
10433 " status; please check the instance")
10434 if not instance.admin_up:
10435 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10436 elif not instance.admin_up:
10437 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10438 " not supposed to be running because no wait for"
10439 " sync mode was requested")
10442 class LUInstanceQueryData(NoHooksLU):
10443 """Query runtime instance data.
10448 def ExpandNames(self):
10449 self.needed_locks = {}
10451 # Use locking if requested or when non-static information is wanted
10452 if not (self.op.static or self.op.use_locking):
10453 self.LogWarning("Non-static data requested, locks need to be acquired")
10454 self.op.use_locking = True
10456 if self.op.instances or not self.op.use_locking:
10457 # Expand instance names right here
10458 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10460 # Will use acquired locks
10461 self.wanted_names = None
10463 if self.op.use_locking:
10464 self.share_locks = _ShareAll()
10466 if self.wanted_names is None:
10467 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10469 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10471 self.needed_locks[locking.LEVEL_NODE] = []
10472 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10474 def DeclareLocks(self, level):
10475 if self.op.use_locking and level == locking.LEVEL_NODE:
10476 self._LockInstancesNodes()
10478 def CheckPrereq(self):
10479 """Check prerequisites.
10481 This only checks the optional instance list against the existing names.
10484 if self.wanted_names is None:
10485 assert self.op.use_locking, "Locking was not used"
10486 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10488 self.wanted_instances = \
10489 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10491 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10492 """Returns the status of a block device
10495 if self.op.static or not node:
10498 self.cfg.SetDiskID(dev, node)
10500 result = self.rpc.call_blockdev_find(node, dev)
10504 result.Raise("Can't compute disk status for %s" % instance_name)
10506 status = result.payload
10510 return (status.dev_path, status.major, status.minor,
10511 status.sync_percent, status.estimated_time,
10512 status.is_degraded, status.ldisk_status)
10514 def _ComputeDiskStatus(self, instance, snode, dev):
10515 """Compute block device status.
10518 if dev.dev_type in constants.LDS_DRBD:
10519 # we change the snode then (otherwise we use the one passed in)
10520 if dev.logical_id[0] == instance.primary_node:
10521 snode = dev.logical_id[1]
10523 snode = dev.logical_id[0]
10525 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10526 instance.name, dev)
10527 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10530 dev_children = map(compat.partial(self._ComputeDiskStatus,
10537 "iv_name": dev.iv_name,
10538 "dev_type": dev.dev_type,
10539 "logical_id": dev.logical_id,
10540 "physical_id": dev.physical_id,
10541 "pstatus": dev_pstatus,
10542 "sstatus": dev_sstatus,
10543 "children": dev_children,
10548 def Exec(self, feedback_fn):
10549 """Gather and return data"""
10552 cluster = self.cfg.GetClusterInfo()
10554 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10555 for i in self.wanted_instances)
10556 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10557 if self.op.static or pnode.offline:
10558 remote_state = None
10560 self.LogWarning("Primary node %s is marked offline, returning static"
10561 " information only for instance %s" %
10562 (pnode.name, instance.name))
10564 remote_info = self.rpc.call_instance_info(instance.primary_node,
10566 instance.hypervisor)
10567 remote_info.Raise("Error checking node %s" % instance.primary_node)
10568 remote_info = remote_info.payload
10569 if remote_info and "state" in remote_info:
10570 remote_state = "up"
10572 remote_state = "down"
10574 if instance.admin_up:
10575 config_state = "up"
10577 config_state = "down"
10579 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10582 result[instance.name] = {
10583 "name": instance.name,
10584 "config_state": config_state,
10585 "run_state": remote_state,
10586 "pnode": instance.primary_node,
10587 "snodes": instance.secondary_nodes,
10589 # this happens to be the same format used for hooks
10590 "nics": _NICListToTuple(self, instance.nics),
10591 "disk_template": instance.disk_template,
10593 "hypervisor": instance.hypervisor,
10594 "network_port": instance.network_port,
10595 "hv_instance": instance.hvparams,
10596 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10597 "be_instance": instance.beparams,
10598 "be_actual": cluster.FillBE(instance),
10599 "os_instance": instance.osparams,
10600 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10601 "serial_no": instance.serial_no,
10602 "mtime": instance.mtime,
10603 "ctime": instance.ctime,
10604 "uuid": instance.uuid,
10610 class LUInstanceSetParams(LogicalUnit):
10611 """Modifies an instances's parameters.
10614 HPATH = "instance-modify"
10615 HTYPE = constants.HTYPE_INSTANCE
10618 def CheckArguments(self):
10619 if not (self.op.nics or self.op.disks or self.op.disk_template or
10620 self.op.hvparams or self.op.beparams or self.op.os_name):
10621 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10623 if self.op.hvparams:
10624 _CheckGlobalHvParams(self.op.hvparams)
10628 for disk_op, disk_dict in self.op.disks:
10629 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10630 if disk_op == constants.DDM_REMOVE:
10631 disk_addremove += 1
10633 elif disk_op == constants.DDM_ADD:
10634 disk_addremove += 1
10636 if not isinstance(disk_op, int):
10637 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10638 if not isinstance(disk_dict, dict):
10639 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10640 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10642 if disk_op == constants.DDM_ADD:
10643 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10644 if mode not in constants.DISK_ACCESS_SET:
10645 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10646 errors.ECODE_INVAL)
10647 size = disk_dict.get(constants.IDISK_SIZE, None)
10649 raise errors.OpPrereqError("Required disk parameter size missing",
10650 errors.ECODE_INVAL)
10653 except (TypeError, ValueError), err:
10654 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10655 str(err), errors.ECODE_INVAL)
10656 disk_dict[constants.IDISK_SIZE] = size
10658 # modification of disk
10659 if constants.IDISK_SIZE in disk_dict:
10660 raise errors.OpPrereqError("Disk size change not possible, use"
10661 " grow-disk", errors.ECODE_INVAL)
10663 if disk_addremove > 1:
10664 raise errors.OpPrereqError("Only one disk add or remove operation"
10665 " supported at a time", errors.ECODE_INVAL)
10667 if self.op.disks and self.op.disk_template is not None:
10668 raise errors.OpPrereqError("Disk template conversion and other disk"
10669 " changes not supported at the same time",
10670 errors.ECODE_INVAL)
10672 if (self.op.disk_template and
10673 self.op.disk_template in constants.DTS_INT_MIRROR and
10674 self.op.remote_node is None):
10675 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10676 " one requires specifying a secondary node",
10677 errors.ECODE_INVAL)
10681 for nic_op, nic_dict in self.op.nics:
10682 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10683 if nic_op == constants.DDM_REMOVE:
10686 elif nic_op == constants.DDM_ADD:
10689 if not isinstance(nic_op, int):
10690 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10691 if not isinstance(nic_dict, dict):
10692 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10693 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10695 # nic_dict should be a dict
10696 nic_ip = nic_dict.get(constants.INIC_IP, None)
10697 if nic_ip is not None:
10698 if nic_ip.lower() == constants.VALUE_NONE:
10699 nic_dict[constants.INIC_IP] = None
10701 if not netutils.IPAddress.IsValid(nic_ip):
10702 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10703 errors.ECODE_INVAL)
10705 nic_bridge = nic_dict.get("bridge", None)
10706 nic_link = nic_dict.get(constants.INIC_LINK, None)
10707 if nic_bridge and nic_link:
10708 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10709 " at the same time", errors.ECODE_INVAL)
10710 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10711 nic_dict["bridge"] = None
10712 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10713 nic_dict[constants.INIC_LINK] = None
10715 if nic_op == constants.DDM_ADD:
10716 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10717 if nic_mac is None:
10718 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10720 if constants.INIC_MAC in nic_dict:
10721 nic_mac = nic_dict[constants.INIC_MAC]
10722 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10723 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10725 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10726 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10727 " modifying an existing nic",
10728 errors.ECODE_INVAL)
10730 if nic_addremove > 1:
10731 raise errors.OpPrereqError("Only one NIC add or remove operation"
10732 " supported at a time", errors.ECODE_INVAL)
10734 def ExpandNames(self):
10735 self._ExpandAndLockInstance()
10736 self.needed_locks[locking.LEVEL_NODE] = []
10737 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10739 def DeclareLocks(self, level):
10740 if level == locking.LEVEL_NODE:
10741 self._LockInstancesNodes()
10742 if self.op.disk_template and self.op.remote_node:
10743 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10744 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10746 def BuildHooksEnv(self):
10747 """Build hooks env.
10749 This runs on the master, primary and secondaries.
10753 if constants.BE_MEMORY in self.be_new:
10754 args["memory"] = self.be_new[constants.BE_MEMORY]
10755 if constants.BE_VCPUS in self.be_new:
10756 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10757 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10758 # information at all.
10761 nic_override = dict(self.op.nics)
10762 for idx, nic in enumerate(self.instance.nics):
10763 if idx in nic_override:
10764 this_nic_override = nic_override[idx]
10766 this_nic_override = {}
10767 if constants.INIC_IP in this_nic_override:
10768 ip = this_nic_override[constants.INIC_IP]
10771 if constants.INIC_MAC in this_nic_override:
10772 mac = this_nic_override[constants.INIC_MAC]
10775 if idx in self.nic_pnew:
10776 nicparams = self.nic_pnew[idx]
10778 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10779 mode = nicparams[constants.NIC_MODE]
10780 link = nicparams[constants.NIC_LINK]
10781 args["nics"].append((ip, mac, mode, link))
10782 if constants.DDM_ADD in nic_override:
10783 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10784 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10785 nicparams = self.nic_pnew[constants.DDM_ADD]
10786 mode = nicparams[constants.NIC_MODE]
10787 link = nicparams[constants.NIC_LINK]
10788 args["nics"].append((ip, mac, mode, link))
10789 elif constants.DDM_REMOVE in nic_override:
10790 del args["nics"][-1]
10792 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10793 if self.op.disk_template:
10794 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10798 def BuildHooksNodes(self):
10799 """Build hooks nodes.
10802 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10805 def CheckPrereq(self):
10806 """Check prerequisites.
10808 This only checks the instance list against the existing names.
10811 # checking the new params on the primary/secondary nodes
10813 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10814 cluster = self.cluster = self.cfg.GetClusterInfo()
10815 assert self.instance is not None, \
10816 "Cannot retrieve locked instance %s" % self.op.instance_name
10817 pnode = instance.primary_node
10818 nodelist = list(instance.all_nodes)
10821 if self.op.os_name and not self.op.force:
10822 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10823 self.op.force_variant)
10824 instance_os = self.op.os_name
10826 instance_os = instance.os
10828 if self.op.disk_template:
10829 if instance.disk_template == self.op.disk_template:
10830 raise errors.OpPrereqError("Instance already has disk template %s" %
10831 instance.disk_template, errors.ECODE_INVAL)
10833 if (instance.disk_template,
10834 self.op.disk_template) not in self._DISK_CONVERSIONS:
10835 raise errors.OpPrereqError("Unsupported disk template conversion from"
10836 " %s to %s" % (instance.disk_template,
10837 self.op.disk_template),
10838 errors.ECODE_INVAL)
10839 _CheckInstanceDown(self, instance, "cannot change disk template")
10840 if self.op.disk_template in constants.DTS_INT_MIRROR:
10841 if self.op.remote_node == pnode:
10842 raise errors.OpPrereqError("Given new secondary node %s is the same"
10843 " as the primary node of the instance" %
10844 self.op.remote_node, errors.ECODE_STATE)
10845 _CheckNodeOnline(self, self.op.remote_node)
10846 _CheckNodeNotDrained(self, self.op.remote_node)
10847 # FIXME: here we assume that the old instance type is DT_PLAIN
10848 assert instance.disk_template == constants.DT_PLAIN
10849 disks = [{constants.IDISK_SIZE: d.size,
10850 constants.IDISK_VG: d.logical_id[0]}
10851 for d in instance.disks]
10852 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10853 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10855 # hvparams processing
10856 if self.op.hvparams:
10857 hv_type = instance.hypervisor
10858 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10859 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10860 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10863 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10864 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10865 self.hv_new = hv_new # the new actual values
10866 self.hv_inst = i_hvdict # the new dict (without defaults)
10868 self.hv_new = self.hv_inst = {}
10870 # beparams processing
10871 if self.op.beparams:
10872 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10874 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10875 be_new = cluster.SimpleFillBE(i_bedict)
10876 self.be_new = be_new # the new actual values
10877 self.be_inst = i_bedict # the new dict (without defaults)
10879 self.be_new = self.be_inst = {}
10880 be_old = cluster.FillBE(instance)
10882 # osparams processing
10883 if self.op.osparams:
10884 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10885 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10886 self.os_inst = i_osdict # the new dict (without defaults)
10892 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10893 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10894 mem_check_list = [pnode]
10895 if be_new[constants.BE_AUTO_BALANCE]:
10896 # either we changed auto_balance to yes or it was from before
10897 mem_check_list.extend(instance.secondary_nodes)
10898 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10899 instance.hypervisor)
10900 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10901 instance.hypervisor)
10902 pninfo = nodeinfo[pnode]
10903 msg = pninfo.fail_msg
10905 # Assume the primary node is unreachable and go ahead
10906 self.warn.append("Can't get info from primary node %s: %s" %
10908 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10909 self.warn.append("Node data from primary node %s doesn't contain"
10910 " free memory information" % pnode)
10911 elif instance_info.fail_msg:
10912 self.warn.append("Can't get instance runtime information: %s" %
10913 instance_info.fail_msg)
10915 if instance_info.payload:
10916 current_mem = int(instance_info.payload["memory"])
10918 # Assume instance not running
10919 # (there is a slight race condition here, but it's not very probable,
10920 # and we have no other way to check)
10922 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10923 pninfo.payload["memory_free"])
10925 raise errors.OpPrereqError("This change will prevent the instance"
10926 " from starting, due to %d MB of memory"
10927 " missing on its primary node" % miss_mem,
10928 errors.ECODE_NORES)
10930 if be_new[constants.BE_AUTO_BALANCE]:
10931 for node, nres in nodeinfo.items():
10932 if node not in instance.secondary_nodes:
10934 nres.Raise("Can't get info from secondary node %s" % node,
10935 prereq=True, ecode=errors.ECODE_STATE)
10936 if not isinstance(nres.payload.get("memory_free", None), int):
10937 raise errors.OpPrereqError("Secondary node %s didn't return free"
10938 " memory information" % node,
10939 errors.ECODE_STATE)
10940 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10941 raise errors.OpPrereqError("This change will prevent the instance"
10942 " from failover to its secondary node"
10943 " %s, due to not enough memory" % node,
10944 errors.ECODE_STATE)
10948 self.nic_pinst = {}
10949 for nic_op, nic_dict in self.op.nics:
10950 if nic_op == constants.DDM_REMOVE:
10951 if not instance.nics:
10952 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10953 errors.ECODE_INVAL)
10955 if nic_op != constants.DDM_ADD:
10957 if not instance.nics:
10958 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10959 " no NICs" % nic_op,
10960 errors.ECODE_INVAL)
10961 if nic_op < 0 or nic_op >= len(instance.nics):
10962 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10964 (nic_op, len(instance.nics) - 1),
10965 errors.ECODE_INVAL)
10966 old_nic_params = instance.nics[nic_op].nicparams
10967 old_nic_ip = instance.nics[nic_op].ip
10969 old_nic_params = {}
10972 update_params_dict = dict([(key, nic_dict[key])
10973 for key in constants.NICS_PARAMETERS
10974 if key in nic_dict])
10976 if "bridge" in nic_dict:
10977 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10979 new_nic_params = _GetUpdatedParams(old_nic_params,
10980 update_params_dict)
10981 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10982 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10983 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10984 self.nic_pinst[nic_op] = new_nic_params
10985 self.nic_pnew[nic_op] = new_filled_nic_params
10986 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10988 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10989 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10990 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10992 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10994 self.warn.append(msg)
10996 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10997 if new_nic_mode == constants.NIC_MODE_ROUTED:
10998 if constants.INIC_IP in nic_dict:
10999 nic_ip = nic_dict[constants.INIC_IP]
11001 nic_ip = old_nic_ip
11003 raise errors.OpPrereqError("Cannot set the nic ip to None"
11004 " on a routed nic", errors.ECODE_INVAL)
11005 if constants.INIC_MAC in nic_dict:
11006 nic_mac = nic_dict[constants.INIC_MAC]
11007 if nic_mac is None:
11008 raise errors.OpPrereqError("Cannot set the nic mac to None",
11009 errors.ECODE_INVAL)
11010 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11011 # otherwise generate the mac
11012 nic_dict[constants.INIC_MAC] = \
11013 self.cfg.GenerateMAC(self.proc.GetECId())
11015 # or validate/reserve the current one
11017 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11018 except errors.ReservationError:
11019 raise errors.OpPrereqError("MAC address %s already in use"
11020 " in cluster" % nic_mac,
11021 errors.ECODE_NOTUNIQUE)
11024 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11025 raise errors.OpPrereqError("Disk operations not supported for"
11026 " diskless instances",
11027 errors.ECODE_INVAL)
11028 for disk_op, _ in self.op.disks:
11029 if disk_op == constants.DDM_REMOVE:
11030 if len(instance.disks) == 1:
11031 raise errors.OpPrereqError("Cannot remove the last disk of"
11032 " an instance", errors.ECODE_INVAL)
11033 _CheckInstanceDown(self, instance, "cannot remove disks")
11035 if (disk_op == constants.DDM_ADD and
11036 len(instance.disks) >= constants.MAX_DISKS):
11037 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11038 " add more" % constants.MAX_DISKS,
11039 errors.ECODE_STATE)
11040 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11042 if disk_op < 0 or disk_op >= len(instance.disks):
11043 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11045 (disk_op, len(instance.disks)),
11046 errors.ECODE_INVAL)
11050 def _ConvertPlainToDrbd(self, feedback_fn):
11051 """Converts an instance from plain to drbd.
11054 feedback_fn("Converting template to drbd")
11055 instance = self.instance
11056 pnode = instance.primary_node
11057 snode = self.op.remote_node
11059 # create a fake disk info for _GenerateDiskTemplate
11060 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11061 constants.IDISK_VG: d.logical_id[0]}
11062 for d in instance.disks]
11063 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11064 instance.name, pnode, [snode],
11065 disk_info, None, None, 0, feedback_fn)
11066 info = _GetInstanceInfoText(instance)
11067 feedback_fn("Creating aditional volumes...")
11068 # first, create the missing data and meta devices
11069 for disk in new_disks:
11070 # unfortunately this is... not too nice
11071 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11073 for child in disk.children:
11074 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11075 # at this stage, all new LVs have been created, we can rename the
11077 feedback_fn("Renaming original volumes...")
11078 rename_list = [(o, n.children[0].logical_id)
11079 for (o, n) in zip(instance.disks, new_disks)]
11080 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11081 result.Raise("Failed to rename original LVs")
11083 feedback_fn("Initializing DRBD devices...")
11084 # all child devices are in place, we can now create the DRBD devices
11085 for disk in new_disks:
11086 for node in [pnode, snode]:
11087 f_create = node == pnode
11088 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11090 # at this point, the instance has been modified
11091 instance.disk_template = constants.DT_DRBD8
11092 instance.disks = new_disks
11093 self.cfg.Update(instance, feedback_fn)
11095 # disks are created, waiting for sync
11096 disk_abort = not _WaitForSync(self, instance,
11097 oneshot=not self.op.wait_for_sync)
11099 raise errors.OpExecError("There are some degraded disks for"
11100 " this instance, please cleanup manually")
11102 def _ConvertDrbdToPlain(self, feedback_fn):
11103 """Converts an instance from drbd to plain.
11106 instance = self.instance
11107 assert len(instance.secondary_nodes) == 1
11108 pnode = instance.primary_node
11109 snode = instance.secondary_nodes[0]
11110 feedback_fn("Converting template to plain")
11112 old_disks = instance.disks
11113 new_disks = [d.children[0] for d in old_disks]
11115 # copy over size and mode
11116 for parent, child in zip(old_disks, new_disks):
11117 child.size = parent.size
11118 child.mode = parent.mode
11120 # update instance structure
11121 instance.disks = new_disks
11122 instance.disk_template = constants.DT_PLAIN
11123 self.cfg.Update(instance, feedback_fn)
11125 feedback_fn("Removing volumes on the secondary node...")
11126 for disk in old_disks:
11127 self.cfg.SetDiskID(disk, snode)
11128 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11130 self.LogWarning("Could not remove block device %s on node %s,"
11131 " continuing anyway: %s", disk.iv_name, snode, msg)
11133 feedback_fn("Removing unneeded volumes on the primary node...")
11134 for idx, disk in enumerate(old_disks):
11135 meta = disk.children[1]
11136 self.cfg.SetDiskID(meta, pnode)
11137 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11139 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11140 " continuing anyway: %s", idx, pnode, msg)
11142 def Exec(self, feedback_fn):
11143 """Modifies an instance.
11145 All parameters take effect only at the next restart of the instance.
11148 # Process here the warnings from CheckPrereq, as we don't have a
11149 # feedback_fn there.
11150 for warn in self.warn:
11151 feedback_fn("WARNING: %s" % warn)
11154 instance = self.instance
11156 for disk_op, disk_dict in self.op.disks:
11157 if disk_op == constants.DDM_REMOVE:
11158 # remove the last disk
11159 device = instance.disks.pop()
11160 device_idx = len(instance.disks)
11161 for node, disk in device.ComputeNodeTree(instance.primary_node):
11162 self.cfg.SetDiskID(disk, node)
11163 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11165 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11166 " continuing anyway", device_idx, node, msg)
11167 result.append(("disk/%d" % device_idx, "remove"))
11168 elif disk_op == constants.DDM_ADD:
11170 if instance.disk_template in (constants.DT_FILE,
11171 constants.DT_SHARED_FILE):
11172 file_driver, file_path = instance.disks[0].logical_id
11173 file_path = os.path.dirname(file_path)
11175 file_driver = file_path = None
11176 disk_idx_base = len(instance.disks)
11177 new_disk = _GenerateDiskTemplate(self,
11178 instance.disk_template,
11179 instance.name, instance.primary_node,
11180 instance.secondary_nodes,
11184 disk_idx_base, feedback_fn)[0]
11185 instance.disks.append(new_disk)
11186 info = _GetInstanceInfoText(instance)
11188 logging.info("Creating volume %s for instance %s",
11189 new_disk.iv_name, instance.name)
11190 # Note: this needs to be kept in sync with _CreateDisks
11192 for node in instance.all_nodes:
11193 f_create = node == instance.primary_node
11195 _CreateBlockDev(self, node, instance, new_disk,
11196 f_create, info, f_create)
11197 except errors.OpExecError, err:
11198 self.LogWarning("Failed to create volume %s (%s) on"
11200 new_disk.iv_name, new_disk, node, err)
11201 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11202 (new_disk.size, new_disk.mode)))
11204 # change a given disk
11205 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11206 result.append(("disk.mode/%d" % disk_op,
11207 disk_dict[constants.IDISK_MODE]))
11209 if self.op.disk_template:
11210 r_shut = _ShutdownInstanceDisks(self, instance)
11212 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11213 " proceed with disk template conversion")
11214 mode = (instance.disk_template, self.op.disk_template)
11216 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11218 self.cfg.ReleaseDRBDMinors(instance.name)
11220 result.append(("disk_template", self.op.disk_template))
11223 for nic_op, nic_dict in self.op.nics:
11224 if nic_op == constants.DDM_REMOVE:
11225 # remove the last nic
11226 del instance.nics[-1]
11227 result.append(("nic.%d" % len(instance.nics), "remove"))
11228 elif nic_op == constants.DDM_ADD:
11229 # mac and bridge should be set, by now
11230 mac = nic_dict[constants.INIC_MAC]
11231 ip = nic_dict.get(constants.INIC_IP, None)
11232 nicparams = self.nic_pinst[constants.DDM_ADD]
11233 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11234 instance.nics.append(new_nic)
11235 result.append(("nic.%d" % (len(instance.nics) - 1),
11236 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11237 (new_nic.mac, new_nic.ip,
11238 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11239 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11242 for key in (constants.INIC_MAC, constants.INIC_IP):
11243 if key in nic_dict:
11244 setattr(instance.nics[nic_op], key, nic_dict[key])
11245 if nic_op in self.nic_pinst:
11246 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11247 for key, val in nic_dict.iteritems():
11248 result.append(("nic.%s/%d" % (key, nic_op), val))
11251 if self.op.hvparams:
11252 instance.hvparams = self.hv_inst
11253 for key, val in self.op.hvparams.iteritems():
11254 result.append(("hv/%s" % key, val))
11257 if self.op.beparams:
11258 instance.beparams = self.be_inst
11259 for key, val in self.op.beparams.iteritems():
11260 result.append(("be/%s" % key, val))
11263 if self.op.os_name:
11264 instance.os = self.op.os_name
11267 if self.op.osparams:
11268 instance.osparams = self.os_inst
11269 for key, val in self.op.osparams.iteritems():
11270 result.append(("os/%s" % key, val))
11272 self.cfg.Update(instance, feedback_fn)
11276 _DISK_CONVERSIONS = {
11277 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11278 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11282 class LUInstanceChangeGroup(LogicalUnit):
11283 HPATH = "instance-change-group"
11284 HTYPE = constants.HTYPE_INSTANCE
11287 def ExpandNames(self):
11288 self.share_locks = _ShareAll()
11289 self.needed_locks = {
11290 locking.LEVEL_NODEGROUP: [],
11291 locking.LEVEL_NODE: [],
11294 self._ExpandAndLockInstance()
11296 if self.op.target_groups:
11297 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11298 self.op.target_groups)
11300 self.req_target_uuids = None
11302 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11304 def DeclareLocks(self, level):
11305 if level == locking.LEVEL_NODEGROUP:
11306 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11308 if self.req_target_uuids:
11309 lock_groups = set(self.req_target_uuids)
11311 # Lock all groups used by instance optimistically; this requires going
11312 # via the node before it's locked, requiring verification later on
11313 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11314 lock_groups.update(instance_groups)
11316 # No target groups, need to lock all of them
11317 lock_groups = locking.ALL_SET
11319 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11321 elif level == locking.LEVEL_NODE:
11322 if self.req_target_uuids:
11323 # Lock all nodes used by instances
11324 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11325 self._LockInstancesNodes()
11327 # Lock all nodes in all potential target groups
11328 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11329 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11330 member_nodes = [node_name
11331 for group in lock_groups
11332 for node_name in self.cfg.GetNodeGroup(group).members]
11333 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11335 # Lock all nodes as all groups are potential targets
11336 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11338 def CheckPrereq(self):
11339 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11340 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11341 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11343 assert (self.req_target_uuids is None or
11344 owned_groups.issuperset(self.req_target_uuids))
11345 assert owned_instances == set([self.op.instance_name])
11347 # Get instance information
11348 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11350 # Check if node groups for locked instance are still correct
11351 assert owned_nodes.issuperset(self.instance.all_nodes), \
11352 ("Instance %s's nodes changed while we kept the lock" %
11353 self.op.instance_name)
11355 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11358 if self.req_target_uuids:
11359 # User requested specific target groups
11360 self.target_uuids = self.req_target_uuids
11362 # All groups except those used by the instance are potential targets
11363 self.target_uuids = owned_groups - inst_groups
11365 conflicting_groups = self.target_uuids & inst_groups
11366 if conflicting_groups:
11367 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11368 " used by the instance '%s'" %
11369 (utils.CommaJoin(conflicting_groups),
11370 self.op.instance_name),
11371 errors.ECODE_INVAL)
11373 if not self.target_uuids:
11374 raise errors.OpPrereqError("There are no possible target groups",
11375 errors.ECODE_INVAL)
11377 def BuildHooksEnv(self):
11378 """Build hooks env.
11381 assert self.target_uuids
11384 "TARGET_GROUPS": " ".join(self.target_uuids),
11387 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11391 def BuildHooksNodes(self):
11392 """Build hooks nodes.
11395 mn = self.cfg.GetMasterNode()
11396 return ([mn], [mn])
11398 def Exec(self, feedback_fn):
11399 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11401 assert instances == [self.op.instance_name], "Instance not locked"
11403 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11404 instances=instances, target_groups=list(self.target_uuids))
11406 ial.Run(self.op.iallocator)
11408 if not ial.success:
11409 raise errors.OpPrereqError("Can't compute solution for changing group of"
11410 " instance '%s' using iallocator '%s': %s" %
11411 (self.op.instance_name, self.op.iallocator,
11413 errors.ECODE_NORES)
11415 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11417 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11418 " instance '%s'", len(jobs), self.op.instance_name)
11420 return ResultWithJobs(jobs)
11423 class LUBackupQuery(NoHooksLU):
11424 """Query the exports list
11429 def ExpandNames(self):
11430 self.needed_locks = {}
11431 self.share_locks[locking.LEVEL_NODE] = 1
11432 if not self.op.nodes:
11433 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11435 self.needed_locks[locking.LEVEL_NODE] = \
11436 _GetWantedNodes(self, self.op.nodes)
11438 def Exec(self, feedback_fn):
11439 """Compute the list of all the exported system images.
11442 @return: a dictionary with the structure node->(export-list)
11443 where export-list is a list of the instances exported on
11447 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11448 rpcresult = self.rpc.call_export_list(self.nodes)
11450 for node in rpcresult:
11451 if rpcresult[node].fail_msg:
11452 result[node] = False
11454 result[node] = rpcresult[node].payload
11459 class LUBackupPrepare(NoHooksLU):
11460 """Prepares an instance for an export and returns useful information.
11465 def ExpandNames(self):
11466 self._ExpandAndLockInstance()
11468 def CheckPrereq(self):
11469 """Check prerequisites.
11472 instance_name = self.op.instance_name
11474 self.instance = self.cfg.GetInstanceInfo(instance_name)
11475 assert self.instance is not None, \
11476 "Cannot retrieve locked instance %s" % self.op.instance_name
11477 _CheckNodeOnline(self, self.instance.primary_node)
11479 self._cds = _GetClusterDomainSecret()
11481 def Exec(self, feedback_fn):
11482 """Prepares an instance for an export.
11485 instance = self.instance
11487 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11488 salt = utils.GenerateSecret(8)
11490 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11491 result = self.rpc.call_x509_cert_create(instance.primary_node,
11492 constants.RIE_CERT_VALIDITY)
11493 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11495 (name, cert_pem) = result.payload
11497 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11501 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11502 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11504 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11510 class LUBackupExport(LogicalUnit):
11511 """Export an instance to an image in the cluster.
11514 HPATH = "instance-export"
11515 HTYPE = constants.HTYPE_INSTANCE
11518 def CheckArguments(self):
11519 """Check the arguments.
11522 self.x509_key_name = self.op.x509_key_name
11523 self.dest_x509_ca_pem = self.op.destination_x509_ca
11525 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11526 if not self.x509_key_name:
11527 raise errors.OpPrereqError("Missing X509 key name for encryption",
11528 errors.ECODE_INVAL)
11530 if not self.dest_x509_ca_pem:
11531 raise errors.OpPrereqError("Missing destination X509 CA",
11532 errors.ECODE_INVAL)
11534 def ExpandNames(self):
11535 self._ExpandAndLockInstance()
11537 # Lock all nodes for local exports
11538 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11539 # FIXME: lock only instance primary and destination node
11541 # Sad but true, for now we have do lock all nodes, as we don't know where
11542 # the previous export might be, and in this LU we search for it and
11543 # remove it from its current node. In the future we could fix this by:
11544 # - making a tasklet to search (share-lock all), then create the
11545 # new one, then one to remove, after
11546 # - removing the removal operation altogether
11547 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11549 def DeclareLocks(self, level):
11550 """Last minute lock declaration."""
11551 # All nodes are locked anyway, so nothing to do here.
11553 def BuildHooksEnv(self):
11554 """Build hooks env.
11556 This will run on the master, primary node and target node.
11560 "EXPORT_MODE": self.op.mode,
11561 "EXPORT_NODE": self.op.target_node,
11562 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11563 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11564 # TODO: Generic function for boolean env variables
11565 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11568 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11572 def BuildHooksNodes(self):
11573 """Build hooks nodes.
11576 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11578 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11579 nl.append(self.op.target_node)
11583 def CheckPrereq(self):
11584 """Check prerequisites.
11586 This checks that the instance and node names are valid.
11589 instance_name = self.op.instance_name
11591 self.instance = self.cfg.GetInstanceInfo(instance_name)
11592 assert self.instance is not None, \
11593 "Cannot retrieve locked instance %s" % self.op.instance_name
11594 _CheckNodeOnline(self, self.instance.primary_node)
11596 if (self.op.remove_instance and self.instance.admin_up and
11597 not self.op.shutdown):
11598 raise errors.OpPrereqError("Can not remove instance without shutting it"
11601 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11602 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11603 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11604 assert self.dst_node is not None
11606 _CheckNodeOnline(self, self.dst_node.name)
11607 _CheckNodeNotDrained(self, self.dst_node.name)
11610 self.dest_disk_info = None
11611 self.dest_x509_ca = None
11613 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11614 self.dst_node = None
11616 if len(self.op.target_node) != len(self.instance.disks):
11617 raise errors.OpPrereqError(("Received destination information for %s"
11618 " disks, but instance %s has %s disks") %
11619 (len(self.op.target_node), instance_name,
11620 len(self.instance.disks)),
11621 errors.ECODE_INVAL)
11623 cds = _GetClusterDomainSecret()
11625 # Check X509 key name
11627 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11628 except (TypeError, ValueError), err:
11629 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11631 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11632 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11633 errors.ECODE_INVAL)
11635 # Load and verify CA
11637 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11638 except OpenSSL.crypto.Error, err:
11639 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11640 (err, ), errors.ECODE_INVAL)
11642 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11643 if errcode is not None:
11644 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11645 (msg, ), errors.ECODE_INVAL)
11647 self.dest_x509_ca = cert
11649 # Verify target information
11651 for idx, disk_data in enumerate(self.op.target_node):
11653 (host, port, magic) = \
11654 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11655 except errors.GenericError, err:
11656 raise errors.OpPrereqError("Target info for disk %s: %s" %
11657 (idx, err), errors.ECODE_INVAL)
11659 disk_info.append((host, port, magic))
11661 assert len(disk_info) == len(self.op.target_node)
11662 self.dest_disk_info = disk_info
11665 raise errors.ProgrammerError("Unhandled export mode %r" %
11668 # instance disk type verification
11669 # TODO: Implement export support for file-based disks
11670 for disk in self.instance.disks:
11671 if disk.dev_type == constants.LD_FILE:
11672 raise errors.OpPrereqError("Export not supported for instances with"
11673 " file-based disks", errors.ECODE_INVAL)
11675 def _CleanupExports(self, feedback_fn):
11676 """Removes exports of current instance from all other nodes.
11678 If an instance in a cluster with nodes A..D was exported to node C, its
11679 exports will be removed from the nodes A, B and D.
11682 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11684 nodelist = self.cfg.GetNodeList()
11685 nodelist.remove(self.dst_node.name)
11687 # on one-node clusters nodelist will be empty after the removal
11688 # if we proceed the backup would be removed because OpBackupQuery
11689 # substitutes an empty list with the full cluster node list.
11690 iname = self.instance.name
11692 feedback_fn("Removing old exports for instance %s" % iname)
11693 exportlist = self.rpc.call_export_list(nodelist)
11694 for node in exportlist:
11695 if exportlist[node].fail_msg:
11697 if iname in exportlist[node].payload:
11698 msg = self.rpc.call_export_remove(node, iname).fail_msg
11700 self.LogWarning("Could not remove older export for instance %s"
11701 " on node %s: %s", iname, node, msg)
11703 def Exec(self, feedback_fn):
11704 """Export an instance to an image in the cluster.
11707 assert self.op.mode in constants.EXPORT_MODES
11709 instance = self.instance
11710 src_node = instance.primary_node
11712 if self.op.shutdown:
11713 # shutdown the instance, but not the disks
11714 feedback_fn("Shutting down instance %s" % instance.name)
11715 result = self.rpc.call_instance_shutdown(src_node, instance,
11716 self.op.shutdown_timeout)
11717 # TODO: Maybe ignore failures if ignore_remove_failures is set
11718 result.Raise("Could not shutdown instance %s on"
11719 " node %s" % (instance.name, src_node))
11721 # set the disks ID correctly since call_instance_start needs the
11722 # correct drbd minor to create the symlinks
11723 for disk in instance.disks:
11724 self.cfg.SetDiskID(disk, src_node)
11726 activate_disks = (not instance.admin_up)
11729 # Activate the instance disks if we'exporting a stopped instance
11730 feedback_fn("Activating disks for %s" % instance.name)
11731 _StartInstanceDisks(self, instance, None)
11734 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11737 helper.CreateSnapshots()
11739 if (self.op.shutdown and instance.admin_up and
11740 not self.op.remove_instance):
11741 assert not activate_disks
11742 feedback_fn("Starting instance %s" % instance.name)
11743 result = self.rpc.call_instance_start(src_node, instance,
11745 msg = result.fail_msg
11747 feedback_fn("Failed to start instance: %s" % msg)
11748 _ShutdownInstanceDisks(self, instance)
11749 raise errors.OpExecError("Could not start instance: %s" % msg)
11751 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11752 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11753 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11754 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11755 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11757 (key_name, _, _) = self.x509_key_name
11760 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11763 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11764 key_name, dest_ca_pem,
11769 # Check for backwards compatibility
11770 assert len(dresults) == len(instance.disks)
11771 assert compat.all(isinstance(i, bool) for i in dresults), \
11772 "Not all results are boolean: %r" % dresults
11776 feedback_fn("Deactivating disks for %s" % instance.name)
11777 _ShutdownInstanceDisks(self, instance)
11779 if not (compat.all(dresults) and fin_resu):
11782 failures.append("export finalization")
11783 if not compat.all(dresults):
11784 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11786 failures.append("disk export: disk(s) %s" % fdsk)
11788 raise errors.OpExecError("Export failed, errors in %s" %
11789 utils.CommaJoin(failures))
11791 # At this point, the export was successful, we can cleanup/finish
11793 # Remove instance if requested
11794 if self.op.remove_instance:
11795 feedback_fn("Removing instance %s" % instance.name)
11796 _RemoveInstance(self, feedback_fn, instance,
11797 self.op.ignore_remove_failures)
11799 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11800 self._CleanupExports(feedback_fn)
11802 return fin_resu, dresults
11805 class LUBackupRemove(NoHooksLU):
11806 """Remove exports related to the named instance.
11811 def ExpandNames(self):
11812 self.needed_locks = {}
11813 # We need all nodes to be locked in order for RemoveExport to work, but we
11814 # don't need to lock the instance itself, as nothing will happen to it (and
11815 # we can remove exports also for a removed instance)
11816 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11818 def Exec(self, feedback_fn):
11819 """Remove any export.
11822 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11823 # If the instance was not found we'll try with the name that was passed in.
11824 # This will only work if it was an FQDN, though.
11826 if not instance_name:
11828 instance_name = self.op.instance_name
11830 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11831 exportlist = self.rpc.call_export_list(locked_nodes)
11833 for node in exportlist:
11834 msg = exportlist[node].fail_msg
11836 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11838 if instance_name in exportlist[node].payload:
11840 result = self.rpc.call_export_remove(node, instance_name)
11841 msg = result.fail_msg
11843 logging.error("Could not remove export for instance %s"
11844 " on node %s: %s", instance_name, node, msg)
11846 if fqdn_warn and not found:
11847 feedback_fn("Export not found. If trying to remove an export belonging"
11848 " to a deleted instance please use its Fully Qualified"
11852 class LUGroupAdd(LogicalUnit):
11853 """Logical unit for creating node groups.
11856 HPATH = "group-add"
11857 HTYPE = constants.HTYPE_GROUP
11860 def ExpandNames(self):
11861 # We need the new group's UUID here so that we can create and acquire the
11862 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11863 # that it should not check whether the UUID exists in the configuration.
11864 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11865 self.needed_locks = {}
11866 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11868 def CheckPrereq(self):
11869 """Check prerequisites.
11871 This checks that the given group name is not an existing node group
11876 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11877 except errors.OpPrereqError:
11880 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11881 " node group (UUID: %s)" %
11882 (self.op.group_name, existing_uuid),
11883 errors.ECODE_EXISTS)
11885 if self.op.ndparams:
11886 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11888 def BuildHooksEnv(self):
11889 """Build hooks env.
11893 "GROUP_NAME": self.op.group_name,
11896 def BuildHooksNodes(self):
11897 """Build hooks nodes.
11900 mn = self.cfg.GetMasterNode()
11901 return ([mn], [mn])
11903 def Exec(self, feedback_fn):
11904 """Add the node group to the cluster.
11907 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11908 uuid=self.group_uuid,
11909 alloc_policy=self.op.alloc_policy,
11910 ndparams=self.op.ndparams)
11912 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11913 del self.remove_locks[locking.LEVEL_NODEGROUP]
11916 class LUGroupAssignNodes(NoHooksLU):
11917 """Logical unit for assigning nodes to groups.
11922 def ExpandNames(self):
11923 # These raise errors.OpPrereqError on their own:
11924 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11925 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11927 # We want to lock all the affected nodes and groups. We have readily
11928 # available the list of nodes, and the *destination* group. To gather the
11929 # list of "source" groups, we need to fetch node information later on.
11930 self.needed_locks = {
11931 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11932 locking.LEVEL_NODE: self.op.nodes,
11935 def DeclareLocks(self, level):
11936 if level == locking.LEVEL_NODEGROUP:
11937 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11939 # Try to get all affected nodes' groups without having the group or node
11940 # lock yet. Needs verification later in the code flow.
11941 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11943 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11945 def CheckPrereq(self):
11946 """Check prerequisites.
11949 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11950 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11951 frozenset(self.op.nodes))
11953 expected_locks = (set([self.group_uuid]) |
11954 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11955 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11956 if actual_locks != expected_locks:
11957 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11958 " current groups are '%s', used to be '%s'" %
11959 (utils.CommaJoin(expected_locks),
11960 utils.CommaJoin(actual_locks)))
11962 self.node_data = self.cfg.GetAllNodesInfo()
11963 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11964 instance_data = self.cfg.GetAllInstancesInfo()
11966 if self.group is None:
11967 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11968 (self.op.group_name, self.group_uuid))
11970 (new_splits, previous_splits) = \
11971 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11972 for node in self.op.nodes],
11973 self.node_data, instance_data)
11976 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11978 if not self.op.force:
11979 raise errors.OpExecError("The following instances get split by this"
11980 " change and --force was not given: %s" %
11983 self.LogWarning("This operation will split the following instances: %s",
11986 if previous_splits:
11987 self.LogWarning("In addition, these already-split instances continue"
11988 " to be split across groups: %s",
11989 utils.CommaJoin(utils.NiceSort(previous_splits)))
11991 def Exec(self, feedback_fn):
11992 """Assign nodes to a new group.
11995 for node in self.op.nodes:
11996 self.node_data[node].group = self.group_uuid
11998 # FIXME: Depends on side-effects of modifying the result of
11999 # C{cfg.GetAllNodesInfo}
12001 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12004 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12005 """Check for split instances after a node assignment.
12007 This method considers a series of node assignments as an atomic operation,
12008 and returns information about split instances after applying the set of
12011 In particular, it returns information about newly split instances, and
12012 instances that were already split, and remain so after the change.
12014 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12017 @type changes: list of (node_name, new_group_uuid) pairs.
12018 @param changes: list of node assignments to consider.
12019 @param node_data: a dict with data for all nodes
12020 @param instance_data: a dict with all instances to consider
12021 @rtype: a two-tuple
12022 @return: a list of instances that were previously okay and result split as a
12023 consequence of this change, and a list of instances that were previously
12024 split and this change does not fix.
12027 changed_nodes = dict((node, group) for node, group in changes
12028 if node_data[node].group != group)
12030 all_split_instances = set()
12031 previously_split_instances = set()
12033 def InstanceNodes(instance):
12034 return [instance.primary_node] + list(instance.secondary_nodes)
12036 for inst in instance_data.values():
12037 if inst.disk_template not in constants.DTS_INT_MIRROR:
12040 instance_nodes = InstanceNodes(inst)
12042 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12043 previously_split_instances.add(inst.name)
12045 if len(set(changed_nodes.get(node, node_data[node].group)
12046 for node in instance_nodes)) > 1:
12047 all_split_instances.add(inst.name)
12049 return (list(all_split_instances - previously_split_instances),
12050 list(previously_split_instances & all_split_instances))
12053 class _GroupQuery(_QueryBase):
12054 FIELDS = query.GROUP_FIELDS
12056 def ExpandNames(self, lu):
12057 lu.needed_locks = {}
12059 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12060 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12063 self.wanted = [name_to_uuid[name]
12064 for name in utils.NiceSort(name_to_uuid.keys())]
12066 # Accept names to be either names or UUIDs.
12069 all_uuid = frozenset(self._all_groups.keys())
12071 for name in self.names:
12072 if name in all_uuid:
12073 self.wanted.append(name)
12074 elif name in name_to_uuid:
12075 self.wanted.append(name_to_uuid[name])
12077 missing.append(name)
12080 raise errors.OpPrereqError("Some groups do not exist: %s" %
12081 utils.CommaJoin(missing),
12082 errors.ECODE_NOENT)
12084 def DeclareLocks(self, lu, level):
12087 def _GetQueryData(self, lu):
12088 """Computes the list of node groups and their attributes.
12091 do_nodes = query.GQ_NODE in self.requested_data
12092 do_instances = query.GQ_INST in self.requested_data
12094 group_to_nodes = None
12095 group_to_instances = None
12097 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12098 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12099 # latter GetAllInstancesInfo() is not enough, for we have to go through
12100 # instance->node. Hence, we will need to process nodes even if we only need
12101 # instance information.
12102 if do_nodes or do_instances:
12103 all_nodes = lu.cfg.GetAllNodesInfo()
12104 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12107 for node in all_nodes.values():
12108 if node.group in group_to_nodes:
12109 group_to_nodes[node.group].append(node.name)
12110 node_to_group[node.name] = node.group
12113 all_instances = lu.cfg.GetAllInstancesInfo()
12114 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12116 for instance in all_instances.values():
12117 node = instance.primary_node
12118 if node in node_to_group:
12119 group_to_instances[node_to_group[node]].append(instance.name)
12122 # Do not pass on node information if it was not requested.
12123 group_to_nodes = None
12125 return query.GroupQueryData([self._all_groups[uuid]
12126 for uuid in self.wanted],
12127 group_to_nodes, group_to_instances)
12130 class LUGroupQuery(NoHooksLU):
12131 """Logical unit for querying node groups.
12136 def CheckArguments(self):
12137 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12138 self.op.output_fields, False)
12140 def ExpandNames(self):
12141 self.gq.ExpandNames(self)
12143 def DeclareLocks(self, level):
12144 self.gq.DeclareLocks(self, level)
12146 def Exec(self, feedback_fn):
12147 return self.gq.OldStyleQuery(self)
12150 class LUGroupSetParams(LogicalUnit):
12151 """Modifies the parameters of a node group.
12154 HPATH = "group-modify"
12155 HTYPE = constants.HTYPE_GROUP
12158 def CheckArguments(self):
12161 self.op.alloc_policy,
12164 if all_changes.count(None) == len(all_changes):
12165 raise errors.OpPrereqError("Please pass at least one modification",
12166 errors.ECODE_INVAL)
12168 def ExpandNames(self):
12169 # This raises errors.OpPrereqError on its own:
12170 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12172 self.needed_locks = {
12173 locking.LEVEL_NODEGROUP: [self.group_uuid],
12176 def CheckPrereq(self):
12177 """Check prerequisites.
12180 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12182 if self.group is None:
12183 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12184 (self.op.group_name, self.group_uuid))
12186 if self.op.ndparams:
12187 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12188 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12189 self.new_ndparams = new_ndparams
12191 def BuildHooksEnv(self):
12192 """Build hooks env.
12196 "GROUP_NAME": self.op.group_name,
12197 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12200 def BuildHooksNodes(self):
12201 """Build hooks nodes.
12204 mn = self.cfg.GetMasterNode()
12205 return ([mn], [mn])
12207 def Exec(self, feedback_fn):
12208 """Modifies the node group.
12213 if self.op.ndparams:
12214 self.group.ndparams = self.new_ndparams
12215 result.append(("ndparams", str(self.group.ndparams)))
12217 if self.op.alloc_policy:
12218 self.group.alloc_policy = self.op.alloc_policy
12220 self.cfg.Update(self.group, feedback_fn)
12224 class LUGroupRemove(LogicalUnit):
12225 HPATH = "group-remove"
12226 HTYPE = constants.HTYPE_GROUP
12229 def ExpandNames(self):
12230 # This will raises errors.OpPrereqError on its own:
12231 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12232 self.needed_locks = {
12233 locking.LEVEL_NODEGROUP: [self.group_uuid],
12236 def CheckPrereq(self):
12237 """Check prerequisites.
12239 This checks that the given group name exists as a node group, that is
12240 empty (i.e., contains no nodes), and that is not the last group of the
12244 # Verify that the group is empty.
12245 group_nodes = [node.name
12246 for node in self.cfg.GetAllNodesInfo().values()
12247 if node.group == self.group_uuid]
12250 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12252 (self.op.group_name,
12253 utils.CommaJoin(utils.NiceSort(group_nodes))),
12254 errors.ECODE_STATE)
12256 # Verify the cluster would not be left group-less.
12257 if len(self.cfg.GetNodeGroupList()) == 1:
12258 raise errors.OpPrereqError("Group '%s' is the only group,"
12259 " cannot be removed" %
12260 self.op.group_name,
12261 errors.ECODE_STATE)
12263 def BuildHooksEnv(self):
12264 """Build hooks env.
12268 "GROUP_NAME": self.op.group_name,
12271 def BuildHooksNodes(self):
12272 """Build hooks nodes.
12275 mn = self.cfg.GetMasterNode()
12276 return ([mn], [mn])
12278 def Exec(self, feedback_fn):
12279 """Remove the node group.
12283 self.cfg.RemoveNodeGroup(self.group_uuid)
12284 except errors.ConfigurationError:
12285 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12286 (self.op.group_name, self.group_uuid))
12288 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12291 class LUGroupRename(LogicalUnit):
12292 HPATH = "group-rename"
12293 HTYPE = constants.HTYPE_GROUP
12296 def ExpandNames(self):
12297 # This raises errors.OpPrereqError on its own:
12298 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12300 self.needed_locks = {
12301 locking.LEVEL_NODEGROUP: [self.group_uuid],
12304 def CheckPrereq(self):
12305 """Check prerequisites.
12307 Ensures requested new name is not yet used.
12311 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12312 except errors.OpPrereqError:
12315 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12316 " node group (UUID: %s)" %
12317 (self.op.new_name, new_name_uuid),
12318 errors.ECODE_EXISTS)
12320 def BuildHooksEnv(self):
12321 """Build hooks env.
12325 "OLD_NAME": self.op.group_name,
12326 "NEW_NAME": self.op.new_name,
12329 def BuildHooksNodes(self):
12330 """Build hooks nodes.
12333 mn = self.cfg.GetMasterNode()
12335 all_nodes = self.cfg.GetAllNodesInfo()
12336 all_nodes.pop(mn, None)
12339 run_nodes.extend(node.name for node in all_nodes.values()
12340 if node.group == self.group_uuid)
12342 return (run_nodes, run_nodes)
12344 def Exec(self, feedback_fn):
12345 """Rename the node group.
12348 group = self.cfg.GetNodeGroup(self.group_uuid)
12351 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12352 (self.op.group_name, self.group_uuid))
12354 group.name = self.op.new_name
12355 self.cfg.Update(group, feedback_fn)
12357 return self.op.new_name
12360 class LUGroupEvacuate(LogicalUnit):
12361 HPATH = "group-evacuate"
12362 HTYPE = constants.HTYPE_GROUP
12365 def ExpandNames(self):
12366 # This raises errors.OpPrereqError on its own:
12367 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12369 if self.op.target_groups:
12370 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12371 self.op.target_groups)
12373 self.req_target_uuids = []
12375 if self.group_uuid in self.req_target_uuids:
12376 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12377 " as a target group (targets are %s)" %
12379 utils.CommaJoin(self.req_target_uuids)),
12380 errors.ECODE_INVAL)
12382 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12384 self.share_locks = _ShareAll()
12385 self.needed_locks = {
12386 locking.LEVEL_INSTANCE: [],
12387 locking.LEVEL_NODEGROUP: [],
12388 locking.LEVEL_NODE: [],
12391 def DeclareLocks(self, level):
12392 if level == locking.LEVEL_INSTANCE:
12393 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12395 # Lock instances optimistically, needs verification once node and group
12396 # locks have been acquired
12397 self.needed_locks[locking.LEVEL_INSTANCE] = \
12398 self.cfg.GetNodeGroupInstances(self.group_uuid)
12400 elif level == locking.LEVEL_NODEGROUP:
12401 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12403 if self.req_target_uuids:
12404 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12406 # Lock all groups used by instances optimistically; this requires going
12407 # via the node before it's locked, requiring verification later on
12408 lock_groups.update(group_uuid
12409 for instance_name in
12410 self.owned_locks(locking.LEVEL_INSTANCE)
12412 self.cfg.GetInstanceNodeGroups(instance_name))
12414 # No target groups, need to lock all of them
12415 lock_groups = locking.ALL_SET
12417 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12419 elif level == locking.LEVEL_NODE:
12420 # This will only lock the nodes in the group to be evacuated which
12421 # contain actual instances
12422 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12423 self._LockInstancesNodes()
12425 # Lock all nodes in group to be evacuated and target groups
12426 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12427 assert self.group_uuid in owned_groups
12428 member_nodes = [node_name
12429 for group in owned_groups
12430 for node_name in self.cfg.GetNodeGroup(group).members]
12431 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12433 def CheckPrereq(self):
12434 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12435 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12436 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12438 assert owned_groups.issuperset(self.req_target_uuids)
12439 assert self.group_uuid in owned_groups
12441 # Check if locked instances are still correct
12442 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12444 # Get instance information
12445 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12447 # Check if node groups for locked instances are still correct
12448 for instance_name in owned_instances:
12449 inst = self.instances[instance_name]
12450 assert owned_nodes.issuperset(inst.all_nodes), \
12451 "Instance %s's nodes changed while we kept the lock" % instance_name
12453 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12456 assert self.group_uuid in inst_groups, \
12457 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12459 if self.req_target_uuids:
12460 # User requested specific target groups
12461 self.target_uuids = self.req_target_uuids
12463 # All groups except the one to be evacuated are potential targets
12464 self.target_uuids = [group_uuid for group_uuid in owned_groups
12465 if group_uuid != self.group_uuid]
12467 if not self.target_uuids:
12468 raise errors.OpPrereqError("There are no possible target groups",
12469 errors.ECODE_INVAL)
12471 def BuildHooksEnv(self):
12472 """Build hooks env.
12476 "GROUP_NAME": self.op.group_name,
12477 "TARGET_GROUPS": " ".join(self.target_uuids),
12480 def BuildHooksNodes(self):
12481 """Build hooks nodes.
12484 mn = self.cfg.GetMasterNode()
12486 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12488 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12490 return (run_nodes, run_nodes)
12492 def Exec(self, feedback_fn):
12493 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12495 assert self.group_uuid not in self.target_uuids
12497 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12498 instances=instances, target_groups=self.target_uuids)
12500 ial.Run(self.op.iallocator)
12502 if not ial.success:
12503 raise errors.OpPrereqError("Can't compute group evacuation using"
12504 " iallocator '%s': %s" %
12505 (self.op.iallocator, ial.info),
12506 errors.ECODE_NORES)
12508 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12510 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12511 len(jobs), self.op.group_name)
12513 return ResultWithJobs(jobs)
12516 class TagsLU(NoHooksLU): # pylint: disable=W0223
12517 """Generic tags LU.
12519 This is an abstract class which is the parent of all the other tags LUs.
12522 def ExpandNames(self):
12523 self.group_uuid = None
12524 self.needed_locks = {}
12525 if self.op.kind == constants.TAG_NODE:
12526 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12527 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12528 elif self.op.kind == constants.TAG_INSTANCE:
12529 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12530 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12531 elif self.op.kind == constants.TAG_NODEGROUP:
12532 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12534 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12535 # not possible to acquire the BGL based on opcode parameters)
12537 def CheckPrereq(self):
12538 """Check prerequisites.
12541 if self.op.kind == constants.TAG_CLUSTER:
12542 self.target = self.cfg.GetClusterInfo()
12543 elif self.op.kind == constants.TAG_NODE:
12544 self.target = self.cfg.GetNodeInfo(self.op.name)
12545 elif self.op.kind == constants.TAG_INSTANCE:
12546 self.target = self.cfg.GetInstanceInfo(self.op.name)
12547 elif self.op.kind == constants.TAG_NODEGROUP:
12548 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12550 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12551 str(self.op.kind), errors.ECODE_INVAL)
12554 class LUTagsGet(TagsLU):
12555 """Returns the tags of a given object.
12560 def ExpandNames(self):
12561 TagsLU.ExpandNames(self)
12563 # Share locks as this is only a read operation
12564 self.share_locks = _ShareAll()
12566 def Exec(self, feedback_fn):
12567 """Returns the tag list.
12570 return list(self.target.GetTags())
12573 class LUTagsSearch(NoHooksLU):
12574 """Searches the tags for a given pattern.
12579 def ExpandNames(self):
12580 self.needed_locks = {}
12582 def CheckPrereq(self):
12583 """Check prerequisites.
12585 This checks the pattern passed for validity by compiling it.
12589 self.re = re.compile(self.op.pattern)
12590 except re.error, err:
12591 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12592 (self.op.pattern, err), errors.ECODE_INVAL)
12594 def Exec(self, feedback_fn):
12595 """Returns the tag list.
12599 tgts = [("/cluster", cfg.GetClusterInfo())]
12600 ilist = cfg.GetAllInstancesInfo().values()
12601 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12602 nlist = cfg.GetAllNodesInfo().values()
12603 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12604 tgts.extend(("/nodegroup/%s" % n.name, n)
12605 for n in cfg.GetAllNodeGroupsInfo().values())
12607 for path, target in tgts:
12608 for tag in target.GetTags():
12609 if self.re.search(tag):
12610 results.append((path, tag))
12614 class LUTagsSet(TagsLU):
12615 """Sets a tag on a given object.
12620 def CheckPrereq(self):
12621 """Check prerequisites.
12623 This checks the type and length of the tag name and value.
12626 TagsLU.CheckPrereq(self)
12627 for tag in self.op.tags:
12628 objects.TaggableObject.ValidateTag(tag)
12630 def Exec(self, feedback_fn):
12635 for tag in self.op.tags:
12636 self.target.AddTag(tag)
12637 except errors.TagError, err:
12638 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12639 self.cfg.Update(self.target, feedback_fn)
12642 class LUTagsDel(TagsLU):
12643 """Delete a list of tags from a given object.
12648 def CheckPrereq(self):
12649 """Check prerequisites.
12651 This checks that we have the given tag.
12654 TagsLU.CheckPrereq(self)
12655 for tag in self.op.tags:
12656 objects.TaggableObject.ValidateTag(tag)
12657 del_tags = frozenset(self.op.tags)
12658 cur_tags = self.target.GetTags()
12660 diff_tags = del_tags - cur_tags
12662 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12663 raise errors.OpPrereqError("Tag(s) %s not found" %
12664 (utils.CommaJoin(diff_names), ),
12665 errors.ECODE_NOENT)
12667 def Exec(self, feedback_fn):
12668 """Remove the tag from the object.
12671 for tag in self.op.tags:
12672 self.target.RemoveTag(tag)
12673 self.cfg.Update(self.target, feedback_fn)
12676 class LUTestDelay(NoHooksLU):
12677 """Sleep for a specified amount of time.
12679 This LU sleeps on the master and/or nodes for a specified amount of
12685 def ExpandNames(self):
12686 """Expand names and set required locks.
12688 This expands the node list, if any.
12691 self.needed_locks = {}
12692 if self.op.on_nodes:
12693 # _GetWantedNodes can be used here, but is not always appropriate to use
12694 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12695 # more information.
12696 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12697 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12699 def _TestDelay(self):
12700 """Do the actual sleep.
12703 if self.op.on_master:
12704 if not utils.TestDelay(self.op.duration):
12705 raise errors.OpExecError("Error during master delay test")
12706 if self.op.on_nodes:
12707 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12708 for node, node_result in result.items():
12709 node_result.Raise("Failure during rpc call to node %s" % node)
12711 def Exec(self, feedback_fn):
12712 """Execute the test delay opcode, with the wanted repetitions.
12715 if self.op.repeat == 0:
12718 top_value = self.op.repeat - 1
12719 for i in range(self.op.repeat):
12720 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12724 class LUTestJqueue(NoHooksLU):
12725 """Utility LU to test some aspects of the job queue.
12730 # Must be lower than default timeout for WaitForJobChange to see whether it
12731 # notices changed jobs
12732 _CLIENT_CONNECT_TIMEOUT = 20.0
12733 _CLIENT_CONFIRM_TIMEOUT = 60.0
12736 def _NotifyUsingSocket(cls, cb, errcls):
12737 """Opens a Unix socket and waits for another program to connect.
12740 @param cb: Callback to send socket name to client
12741 @type errcls: class
12742 @param errcls: Exception class to use for errors
12745 # Using a temporary directory as there's no easy way to create temporary
12746 # sockets without writing a custom loop around tempfile.mktemp and
12748 tmpdir = tempfile.mkdtemp()
12750 tmpsock = utils.PathJoin(tmpdir, "sock")
12752 logging.debug("Creating temporary socket at %s", tmpsock)
12753 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12758 # Send details to client
12761 # Wait for client to connect before continuing
12762 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12764 (conn, _) = sock.accept()
12765 except socket.error, err:
12766 raise errcls("Client didn't connect in time (%s)" % err)
12770 # Remove as soon as client is connected
12771 shutil.rmtree(tmpdir)
12773 # Wait for client to close
12776 # pylint: disable=E1101
12777 # Instance of '_socketobject' has no ... member
12778 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12780 except socket.error, err:
12781 raise errcls("Client failed to confirm notification (%s)" % err)
12785 def _SendNotification(self, test, arg, sockname):
12786 """Sends a notification to the client.
12789 @param test: Test name
12790 @param arg: Test argument (depends on test)
12791 @type sockname: string
12792 @param sockname: Socket path
12795 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12797 def _Notify(self, prereq, test, arg):
12798 """Notifies the client of a test.
12801 @param prereq: Whether this is a prereq-phase test
12803 @param test: Test name
12804 @param arg: Test argument (depends on test)
12808 errcls = errors.OpPrereqError
12810 errcls = errors.OpExecError
12812 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12816 def CheckArguments(self):
12817 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12818 self.expandnames_calls = 0
12820 def ExpandNames(self):
12821 checkargs_calls = getattr(self, "checkargs_calls", 0)
12822 if checkargs_calls < 1:
12823 raise errors.ProgrammerError("CheckArguments was not called")
12825 self.expandnames_calls += 1
12827 if self.op.notify_waitlock:
12828 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12830 self.LogInfo("Expanding names")
12832 # Get lock on master node (just to get a lock, not for a particular reason)
12833 self.needed_locks = {
12834 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12837 def Exec(self, feedback_fn):
12838 if self.expandnames_calls < 1:
12839 raise errors.ProgrammerError("ExpandNames was not called")
12841 if self.op.notify_exec:
12842 self._Notify(False, constants.JQT_EXEC, None)
12844 self.LogInfo("Executing")
12846 if self.op.log_messages:
12847 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12848 for idx, msg in enumerate(self.op.log_messages):
12849 self.LogInfo("Sending log message %s", idx + 1)
12850 feedback_fn(constants.JQT_MSGPREFIX + msg)
12851 # Report how many test messages have been sent
12852 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12855 raise errors.OpExecError("Opcode failure was requested")
12860 class IAllocator(object):
12861 """IAllocator framework.
12863 An IAllocator instance has three sets of attributes:
12864 - cfg that is needed to query the cluster
12865 - input data (all members of the _KEYS class attribute are required)
12866 - four buffer attributes (in|out_data|text), that represent the
12867 input (to the external script) in text and data structure format,
12868 and the output from it, again in two formats
12869 - the result variables from the script (success, info, nodes) for
12873 # pylint: disable=R0902
12874 # lots of instance attributes
12876 def __init__(self, cfg, rpc, mode, **kwargs):
12879 # init buffer variables
12880 self.in_text = self.out_text = self.in_data = self.out_data = None
12881 # init all input fields so that pylint is happy
12883 self.memory = self.disks = self.disk_template = None
12884 self.os = self.tags = self.nics = self.vcpus = None
12885 self.hypervisor = None
12886 self.relocate_from = None
12888 self.instances = None
12889 self.evac_mode = None
12890 self.target_groups = []
12892 self.required_nodes = None
12893 # init result fields
12894 self.success = self.info = self.result = None
12897 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12899 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12900 " IAllocator" % self.mode)
12902 keyset = [n for (n, _) in keydata]
12905 if key not in keyset:
12906 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12907 " IAllocator" % key)
12908 setattr(self, key, kwargs[key])
12911 if key not in kwargs:
12912 raise errors.ProgrammerError("Missing input parameter '%s' to"
12913 " IAllocator" % key)
12914 self._BuildInputData(compat.partial(fn, self), keydata)
12916 def _ComputeClusterData(self):
12917 """Compute the generic allocator input data.
12919 This is the data that is independent of the actual operation.
12923 cluster_info = cfg.GetClusterInfo()
12926 "version": constants.IALLOCATOR_VERSION,
12927 "cluster_name": cfg.GetClusterName(),
12928 "cluster_tags": list(cluster_info.GetTags()),
12929 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12930 # we don't have job IDs
12932 ninfo = cfg.GetAllNodesInfo()
12933 iinfo = cfg.GetAllInstancesInfo().values()
12934 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12937 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12939 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12940 hypervisor_name = self.hypervisor
12941 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12942 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12944 hypervisor_name = cluster_info.enabled_hypervisors[0]
12946 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12949 self.rpc.call_all_instances_info(node_list,
12950 cluster_info.enabled_hypervisors)
12952 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12954 config_ndata = self._ComputeBasicNodeData(ninfo)
12955 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12956 i_list, config_ndata)
12957 assert len(data["nodes"]) == len(ninfo), \
12958 "Incomplete node data computed"
12960 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12962 self.in_data = data
12965 def _ComputeNodeGroupData(cfg):
12966 """Compute node groups data.
12969 ng = dict((guuid, {
12970 "name": gdata.name,
12971 "alloc_policy": gdata.alloc_policy,
12973 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12978 def _ComputeBasicNodeData(node_cfg):
12979 """Compute global node data.
12982 @returns: a dict of name: (node dict, node config)
12985 # fill in static (config-based) values
12986 node_results = dict((ninfo.name, {
12987 "tags": list(ninfo.GetTags()),
12988 "primary_ip": ninfo.primary_ip,
12989 "secondary_ip": ninfo.secondary_ip,
12990 "offline": ninfo.offline,
12991 "drained": ninfo.drained,
12992 "master_candidate": ninfo.master_candidate,
12993 "group": ninfo.group,
12994 "master_capable": ninfo.master_capable,
12995 "vm_capable": ninfo.vm_capable,
12997 for ninfo in node_cfg.values())
12999 return node_results
13002 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13004 """Compute global node data.
13006 @param node_results: the basic node structures as filled from the config
13009 # make a copy of the current dict
13010 node_results = dict(node_results)
13011 for nname, nresult in node_data.items():
13012 assert nname in node_results, "Missing basic data for node %s" % nname
13013 ninfo = node_cfg[nname]
13015 if not (ninfo.offline or ninfo.drained):
13016 nresult.Raise("Can't get data for node %s" % nname)
13017 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13019 remote_info = nresult.payload
13021 for attr in ["memory_total", "memory_free", "memory_dom0",
13022 "vg_size", "vg_free", "cpu_total"]:
13023 if attr not in remote_info:
13024 raise errors.OpExecError("Node '%s' didn't return attribute"
13025 " '%s'" % (nname, attr))
13026 if not isinstance(remote_info[attr], int):
13027 raise errors.OpExecError("Node '%s' returned invalid value"
13029 (nname, attr, remote_info[attr]))
13030 # compute memory used by primary instances
13031 i_p_mem = i_p_up_mem = 0
13032 for iinfo, beinfo in i_list:
13033 if iinfo.primary_node == nname:
13034 i_p_mem += beinfo[constants.BE_MEMORY]
13035 if iinfo.name not in node_iinfo[nname].payload:
13038 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13039 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13040 remote_info["memory_free"] -= max(0, i_mem_diff)
13043 i_p_up_mem += beinfo[constants.BE_MEMORY]
13045 # compute memory used by instances
13047 "total_memory": remote_info["memory_total"],
13048 "reserved_memory": remote_info["memory_dom0"],
13049 "free_memory": remote_info["memory_free"],
13050 "total_disk": remote_info["vg_size"],
13051 "free_disk": remote_info["vg_free"],
13052 "total_cpus": remote_info["cpu_total"],
13053 "i_pri_memory": i_p_mem,
13054 "i_pri_up_memory": i_p_up_mem,
13056 pnr_dyn.update(node_results[nname])
13057 node_results[nname] = pnr_dyn
13059 return node_results
13062 def _ComputeInstanceData(cluster_info, i_list):
13063 """Compute global instance data.
13067 for iinfo, beinfo in i_list:
13069 for nic in iinfo.nics:
13070 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13074 "mode": filled_params[constants.NIC_MODE],
13075 "link": filled_params[constants.NIC_LINK],
13077 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13078 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13079 nic_data.append(nic_dict)
13081 "tags": list(iinfo.GetTags()),
13082 "admin_up": iinfo.admin_up,
13083 "vcpus": beinfo[constants.BE_VCPUS],
13084 "memory": beinfo[constants.BE_MEMORY],
13086 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13088 "disks": [{constants.IDISK_SIZE: dsk.size,
13089 constants.IDISK_MODE: dsk.mode}
13090 for dsk in iinfo.disks],
13091 "disk_template": iinfo.disk_template,
13092 "hypervisor": iinfo.hypervisor,
13094 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13096 instance_data[iinfo.name] = pir
13098 return instance_data
13100 def _AddNewInstance(self):
13101 """Add new instance data to allocator structure.
13103 This in combination with _AllocatorGetClusterData will create the
13104 correct structure needed as input for the allocator.
13106 The checks for the completeness of the opcode must have already been
13110 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13112 if self.disk_template in constants.DTS_INT_MIRROR:
13113 self.required_nodes = 2
13115 self.required_nodes = 1
13119 "disk_template": self.disk_template,
13122 "vcpus": self.vcpus,
13123 "memory": self.memory,
13124 "disks": self.disks,
13125 "disk_space_total": disk_space,
13127 "required_nodes": self.required_nodes,
13128 "hypervisor": self.hypervisor,
13133 def _AddRelocateInstance(self):
13134 """Add relocate instance data to allocator structure.
13136 This in combination with _IAllocatorGetClusterData will create the
13137 correct structure needed as input for the allocator.
13139 The checks for the completeness of the opcode must have already been
13143 instance = self.cfg.GetInstanceInfo(self.name)
13144 if instance is None:
13145 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13146 " IAllocator" % self.name)
13148 if instance.disk_template not in constants.DTS_MIRRORED:
13149 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13150 errors.ECODE_INVAL)
13152 if instance.disk_template in constants.DTS_INT_MIRROR and \
13153 len(instance.secondary_nodes) != 1:
13154 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13155 errors.ECODE_STATE)
13157 self.required_nodes = 1
13158 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13159 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13163 "disk_space_total": disk_space,
13164 "required_nodes": self.required_nodes,
13165 "relocate_from": self.relocate_from,
13169 def _AddNodeEvacuate(self):
13170 """Get data for node-evacuate requests.
13174 "instances": self.instances,
13175 "evac_mode": self.evac_mode,
13178 def _AddChangeGroup(self):
13179 """Get data for node-evacuate requests.
13183 "instances": self.instances,
13184 "target_groups": self.target_groups,
13187 def _BuildInputData(self, fn, keydata):
13188 """Build input data structures.
13191 self._ComputeClusterData()
13194 request["type"] = self.mode
13195 for keyname, keytype in keydata:
13196 if keyname not in request:
13197 raise errors.ProgrammerError("Request parameter %s is missing" %
13199 val = request[keyname]
13200 if not keytype(val):
13201 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13202 " validation, value %s, expected"
13203 " type %s" % (keyname, val, keytype))
13204 self.in_data["request"] = request
13206 self.in_text = serializer.Dump(self.in_data)
13208 _STRING_LIST = ht.TListOf(ht.TString)
13209 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13210 # pylint: disable=E1101
13211 # Class '...' has no 'OP_ID' member
13212 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13213 opcodes.OpInstanceMigrate.OP_ID,
13214 opcodes.OpInstanceReplaceDisks.OP_ID])
13218 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13219 ht.TItems([ht.TNonEmptyString,
13220 ht.TNonEmptyString,
13221 ht.TListOf(ht.TNonEmptyString),
13224 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13225 ht.TItems([ht.TNonEmptyString,
13228 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13229 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13232 constants.IALLOCATOR_MODE_ALLOC:
13235 ("name", ht.TString),
13236 ("memory", ht.TInt),
13237 ("disks", ht.TListOf(ht.TDict)),
13238 ("disk_template", ht.TString),
13239 ("os", ht.TString),
13240 ("tags", _STRING_LIST),
13241 ("nics", ht.TListOf(ht.TDict)),
13242 ("vcpus", ht.TInt),
13243 ("hypervisor", ht.TString),
13245 constants.IALLOCATOR_MODE_RELOC:
13246 (_AddRelocateInstance,
13247 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13249 constants.IALLOCATOR_MODE_NODE_EVAC:
13250 (_AddNodeEvacuate, [
13251 ("instances", _STRING_LIST),
13252 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13254 constants.IALLOCATOR_MODE_CHG_GROUP:
13255 (_AddChangeGroup, [
13256 ("instances", _STRING_LIST),
13257 ("target_groups", _STRING_LIST),
13261 def Run(self, name, validate=True, call_fn=None):
13262 """Run an instance allocator and return the results.
13265 if call_fn is None:
13266 call_fn = self.rpc.call_iallocator_runner
13268 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13269 result.Raise("Failure while running the iallocator script")
13271 self.out_text = result.payload
13273 self._ValidateResult()
13275 def _ValidateResult(self):
13276 """Process the allocator results.
13278 This will process and if successful save the result in
13279 self.out_data and the other parameters.
13283 rdict = serializer.Load(self.out_text)
13284 except Exception, err:
13285 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13287 if not isinstance(rdict, dict):
13288 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13290 # TODO: remove backwards compatiblity in later versions
13291 if "nodes" in rdict and "result" not in rdict:
13292 rdict["result"] = rdict["nodes"]
13295 for key in "success", "info", "result":
13296 if key not in rdict:
13297 raise errors.OpExecError("Can't parse iallocator results:"
13298 " missing key '%s'" % key)
13299 setattr(self, key, rdict[key])
13301 if not self._result_check(self.result):
13302 raise errors.OpExecError("Iallocator returned invalid result,"
13303 " expected %s, got %s" %
13304 (self._result_check, self.result),
13305 errors.ECODE_INVAL)
13307 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13308 assert self.relocate_from is not None
13309 assert self.required_nodes == 1
13311 node2group = dict((name, ndata["group"])
13312 for (name, ndata) in self.in_data["nodes"].items())
13314 fn = compat.partial(self._NodesToGroups, node2group,
13315 self.in_data["nodegroups"])
13317 instance = self.cfg.GetInstanceInfo(self.name)
13318 request_groups = fn(self.relocate_from + [instance.primary_node])
13319 result_groups = fn(rdict["result"] + [instance.primary_node])
13321 if self.success and not set(result_groups).issubset(request_groups):
13322 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13323 " differ from original groups (%s)" %
13324 (utils.CommaJoin(result_groups),
13325 utils.CommaJoin(request_groups)))
13327 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13328 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13330 self.out_data = rdict
13333 def _NodesToGroups(node2group, groups, nodes):
13334 """Returns a list of unique group names for a list of nodes.
13336 @type node2group: dict
13337 @param node2group: Map from node name to group UUID
13339 @param groups: Group information
13341 @param nodes: Node names
13348 group_uuid = node2group[node]
13350 # Ignore unknown node
13354 group = groups[group_uuid]
13356 # Can't find group, let's use UUID
13357 group_name = group_uuid
13359 group_name = group["name"]
13361 result.add(group_name)
13363 return sorted(result)
13366 class LUTestAllocator(NoHooksLU):
13367 """Run allocator tests.
13369 This LU runs the allocator tests
13372 def CheckPrereq(self):
13373 """Check prerequisites.
13375 This checks the opcode parameters depending on the director and mode test.
13378 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13379 for attr in ["memory", "disks", "disk_template",
13380 "os", "tags", "nics", "vcpus"]:
13381 if not hasattr(self.op, attr):
13382 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13383 attr, errors.ECODE_INVAL)
13384 iname = self.cfg.ExpandInstanceName(self.op.name)
13385 if iname is not None:
13386 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13387 iname, errors.ECODE_EXISTS)
13388 if not isinstance(self.op.nics, list):
13389 raise errors.OpPrereqError("Invalid parameter 'nics'",
13390 errors.ECODE_INVAL)
13391 if not isinstance(self.op.disks, list):
13392 raise errors.OpPrereqError("Invalid parameter 'disks'",
13393 errors.ECODE_INVAL)
13394 for row in self.op.disks:
13395 if (not isinstance(row, dict) or
13396 constants.IDISK_SIZE not in row or
13397 not isinstance(row[constants.IDISK_SIZE], int) or
13398 constants.IDISK_MODE not in row or
13399 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13400 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13401 " parameter", errors.ECODE_INVAL)
13402 if self.op.hypervisor is None:
13403 self.op.hypervisor = self.cfg.GetHypervisorType()
13404 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13405 fname = _ExpandInstanceName(self.cfg, self.op.name)
13406 self.op.name = fname
13407 self.relocate_from = \
13408 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13409 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13410 constants.IALLOCATOR_MODE_NODE_EVAC):
13411 if not self.op.instances:
13412 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13413 self.op.instances = _GetWantedInstances(self, self.op.instances)
13415 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13416 self.op.mode, errors.ECODE_INVAL)
13418 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13419 if self.op.allocator is None:
13420 raise errors.OpPrereqError("Missing allocator name",
13421 errors.ECODE_INVAL)
13422 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13423 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13424 self.op.direction, errors.ECODE_INVAL)
13426 def Exec(self, feedback_fn):
13427 """Run the allocator test.
13430 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13431 ial = IAllocator(self.cfg, self.rpc,
13434 memory=self.op.memory,
13435 disks=self.op.disks,
13436 disk_template=self.op.disk_template,
13440 vcpus=self.op.vcpus,
13441 hypervisor=self.op.hypervisor,
13443 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13444 ial = IAllocator(self.cfg, self.rpc,
13447 relocate_from=list(self.relocate_from),
13449 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13450 ial = IAllocator(self.cfg, self.rpc,
13452 instances=self.op.instances,
13453 target_groups=self.op.target_groups)
13454 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13455 ial = IAllocator(self.cfg, self.rpc,
13457 instances=self.op.instances,
13458 evac_mode=self.op.evac_mode)
13460 raise errors.ProgrammerError("Uncatched mode %s in"
13461 " LUTestAllocator.Exec", self.op.mode)
13463 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13464 result = ial.in_text
13466 ial.Run(self.op.allocator, validate=False)
13467 result = ial.out_text
13471 #: Query type implementations
13473 constants.QR_INSTANCE: _InstanceQuery,
13474 constants.QR_NODE: _NodeQuery,
13475 constants.QR_GROUP: _GroupQuery,
13476 constants.QR_OS: _OsQuery,
13479 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13482 def _GetQueryImplementation(name):
13483 """Returns the implemtnation for a query type.
13485 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13489 return _QUERY_IMPL[name]
13491 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13492 errors.ECODE_INVAL)