4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable-msg=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay to many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
63 import ganeti.masterd.instance # pylint: disable-msg=W0611
67 """Data container for LU results with jobs.
69 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71 contained in the C{jobs} attribute and include the job IDs in the opcode
75 def __init__(self, jobs, **kwargs):
76 """Initializes this class.
78 Additional return values can be specified as keyword arguments.
80 @type jobs: list of lists of L{opcode.OpCode}
81 @param jobs: A list of lists of opcode objects
88 class LogicalUnit(object):
89 """Logical Unit base class.
91 Subclasses must follow these rules:
92 - implement ExpandNames
93 - implement CheckPrereq (except when tasklets are used)
94 - implement Exec (except when tasklets are used)
95 - implement BuildHooksEnv
96 - implement BuildHooksNodes
97 - redefine HPATH and HTYPE
98 - optionally redefine their run requirements:
99 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
101 Note that all commands require root permissions.
103 @ivar dry_run_result: the value (if any) that will be returned to the caller
104 in dry-run mode (signalled by opcode dry_run parameter)
111 def __init__(self, processor, op, context, rpc):
112 """Constructor for LogicalUnit.
114 This needs to be overridden in derived classes in order to check op
118 self.proc = processor
120 self.cfg = context.cfg
121 self.glm = context.glm
123 self.owned_locks = context.glm.list_owned
124 self.context = context
126 # Dicts used to declare locking needs to mcpu
127 self.needed_locks = None
128 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
130 self.remove_locks = {}
131 # Used to force good behavior when calling helper functions
132 self.recalculate_locks = {}
134 self.Log = processor.Log # pylint: disable-msg=C0103
135 self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
136 self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
137 self.LogStep = processor.LogStep # pylint: disable-msg=C0103
138 # support for dry-run
139 self.dry_run_result = None
140 # support for generic debug attribute
141 if (not hasattr(self.op, "debug_level") or
142 not isinstance(self.op.debug_level, int)):
143 self.op.debug_level = 0
148 # Validate opcode parameters and set defaults
149 self.op.Validate(True)
151 self.CheckArguments()
153 def CheckArguments(self):
154 """Check syntactic validity for the opcode arguments.
156 This method is for doing a simple syntactic check and ensure
157 validity of opcode parameters, without any cluster-related
158 checks. While the same can be accomplished in ExpandNames and/or
159 CheckPrereq, doing these separate is better because:
161 - ExpandNames is left as as purely a lock-related function
162 - CheckPrereq is run after we have acquired locks (and possible
165 The function is allowed to change the self.op attribute so that
166 later methods can no longer worry about missing parameters.
171 def ExpandNames(self):
172 """Expand names for this LU.
174 This method is called before starting to execute the opcode, and it should
175 update all the parameters of the opcode to their canonical form (e.g. a
176 short node name must be fully expanded after this method has successfully
177 completed). This way locking, hooks, logging, etc. can work correctly.
179 LUs which implement this method must also populate the self.needed_locks
180 member, as a dict with lock levels as keys, and a list of needed lock names
183 - use an empty dict if you don't need any lock
184 - if you don't need any lock at a particular level omit that level
185 - don't put anything for the BGL level
186 - if you want all locks at a level use locking.ALL_SET as a value
188 If you need to share locks (rather than acquire them exclusively) at one
189 level you can modify self.share_locks, setting a true value (usually 1) for
190 that level. By default locks are not shared.
192 This function can also define a list of tasklets, which then will be
193 executed in order instead of the usual LU-level CheckPrereq and Exec
194 functions, if those are not defined by the LU.
198 # Acquire all nodes and one instance
199 self.needed_locks = {
200 locking.LEVEL_NODE: locking.ALL_SET,
201 locking.LEVEL_INSTANCE: ['instance1.example.com'],
203 # Acquire just two nodes
204 self.needed_locks = {
205 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
208 self.needed_locks = {} # No, you can't leave it to the default value None
211 # The implementation of this method is mandatory only if the new LU is
212 # concurrent, so that old LUs don't need to be changed all at the same
215 self.needed_locks = {} # Exclusive LUs don't need locks.
217 raise NotImplementedError
219 def DeclareLocks(self, level):
220 """Declare LU locking needs for a level
222 While most LUs can just declare their locking needs at ExpandNames time,
223 sometimes there's the need to calculate some locks after having acquired
224 the ones before. This function is called just before acquiring locks at a
225 particular level, but after acquiring the ones at lower levels, and permits
226 such calculations. It can be used to modify self.needed_locks, and by
227 default it does nothing.
229 This function is only called if you have something already set in
230 self.needed_locks for the level.
232 @param level: Locking level which is going to be locked
233 @type level: member of ganeti.locking.LEVELS
237 def CheckPrereq(self):
238 """Check prerequisites for this LU.
240 This method should check that the prerequisites for the execution
241 of this LU are fulfilled. It can do internode communication, but
242 it should be idempotent - no cluster or system changes are
245 The method should raise errors.OpPrereqError in case something is
246 not fulfilled. Its return value is ignored.
248 This method should also update all the parameters of the opcode to
249 their canonical form if it hasn't been done by ExpandNames before.
252 if self.tasklets is not None:
253 for (idx, tl) in enumerate(self.tasklets):
254 logging.debug("Checking prerequisites for tasklet %s/%s",
255 idx + 1, len(self.tasklets))
260 def Exec(self, feedback_fn):
263 This method should implement the actual work. It should raise
264 errors.OpExecError for failures that are somewhat dealt with in
268 if self.tasklets is not None:
269 for (idx, tl) in enumerate(self.tasklets):
270 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
273 raise NotImplementedError
275 def BuildHooksEnv(self):
276 """Build hooks environment for this LU.
279 @return: Dictionary containing the environment that will be used for
280 running the hooks for this LU. The keys of the dict must not be prefixed
281 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282 will extend the environment with additional variables. If no environment
283 should be defined, an empty dictionary should be returned (not C{None}).
284 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
288 raise NotImplementedError
290 def BuildHooksNodes(self):
291 """Build list of nodes to run LU's hooks.
293 @rtype: tuple; (list, list)
294 @return: Tuple containing a list of node names on which the hook
295 should run before the execution and a list of node names on which the
296 hook should run after the execution. No nodes should be returned as an
297 empty list (and not None).
298 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
302 raise NotImplementedError
304 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305 """Notify the LU about the results of its hooks.
307 This method is called every time a hooks phase is executed, and notifies
308 the Logical Unit about the hooks' result. The LU can then use it to alter
309 its result based on the hooks. By default the method does nothing and the
310 previous result is passed back unchanged but any LU can define it if it
311 wants to use the local cluster hook-scripts somehow.
313 @param phase: one of L{constants.HOOKS_PHASE_POST} or
314 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315 @param hook_results: the results of the multi-node hooks rpc call
316 @param feedback_fn: function used send feedback back to the caller
317 @param lu_result: the previous Exec result this LU had, or None
319 @return: the new Exec result, based on the previous result
323 # API must be kept, thus we ignore the unused argument and could
324 # be a function warnings
325 # pylint: disable-msg=W0613,R0201
328 def _ExpandAndLockInstance(self):
329 """Helper function to expand and lock an instance.
331 Many LUs that work on an instance take its name in self.op.instance_name
332 and need to expand it and then declare the expanded name for locking. This
333 function does it, and then updates self.op.instance_name to the expanded
334 name. It also initializes needed_locks as a dict, if this hasn't been done
338 if self.needed_locks is None:
339 self.needed_locks = {}
341 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342 "_ExpandAndLockInstance called with instance-level locks set"
343 self.op.instance_name = _ExpandInstanceName(self.cfg,
344 self.op.instance_name)
345 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
347 def _LockInstancesNodes(self, primary_only=False):
348 """Helper function to declare instances' nodes for locking.
350 This function should be called after locking one or more instances to lock
351 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352 with all primary or secondary nodes for instances already locked and
353 present in self.needed_locks[locking.LEVEL_INSTANCE].
355 It should be called from DeclareLocks, and for safety only works if
356 self.recalculate_locks[locking.LEVEL_NODE] is set.
358 In the future it may grow parameters to just lock some instance's nodes, or
359 to just lock primaries or secondary nodes, if needed.
361 If should be called in DeclareLocks in a way similar to::
363 if level == locking.LEVEL_NODE:
364 self._LockInstancesNodes()
366 @type primary_only: boolean
367 @param primary_only: only lock primary nodes of locked instances
370 assert locking.LEVEL_NODE in self.recalculate_locks, \
371 "_LockInstancesNodes helper function called with no nodes to recalculate"
373 # TODO: check if we're really been called with the instance locks held
375 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376 # future we might want to have different behaviors depending on the value
377 # of self.recalculate_locks[locking.LEVEL_NODE]
379 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381 wanted_nodes.append(instance.primary_node)
383 wanted_nodes.extend(instance.secondary_nodes)
385 if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386 self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387 elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388 self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
390 del self.recalculate_locks[locking.LEVEL_NODE]
393 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
394 """Simple LU which runs no hooks.
396 This LU is intended as a parent for other LogicalUnits which will
397 run no hooks, in order to reduce duplicate code.
403 def BuildHooksEnv(self):
404 """Empty BuildHooksEnv for NoHooksLu.
406 This just raises an error.
409 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
411 def BuildHooksNodes(self):
412 """Empty BuildHooksNodes for NoHooksLU.
415 raise AssertionError("BuildHooksNodes called for NoHooksLU")
419 """Tasklet base class.
421 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422 they can mix legacy code with tasklets. Locking needs to be done in the LU,
423 tasklets know nothing about locks.
425 Subclasses must follow these rules:
426 - Implement CheckPrereq
430 def __init__(self, lu):
437 def CheckPrereq(self):
438 """Check prerequisites for this tasklets.
440 This method should check whether the prerequisites for the execution of
441 this tasklet are fulfilled. It can do internode communication, but it
442 should be idempotent - no cluster or system changes are allowed.
444 The method should raise errors.OpPrereqError in case something is not
445 fulfilled. Its return value is ignored.
447 This method should also update all parameters to their canonical form if it
448 hasn't been done before.
453 def Exec(self, feedback_fn):
454 """Execute the tasklet.
456 This method should implement the actual work. It should raise
457 errors.OpExecError for failures that are somewhat dealt with in code, or
461 raise NotImplementedError
465 """Base for query utility classes.
468 #: Attribute holding field definitions
471 def __init__(self, filter_, fields, use_locking):
472 """Initializes this class.
475 self.use_locking = use_locking
477 self.query = query.Query(self.FIELDS, fields, filter_=filter_,
479 self.requested_data = self.query.RequestedData()
480 self.names = self.query.RequestedNames()
482 # Sort only if no names were requested
483 self.sort_by_name = not self.names
485 self.do_locking = None
488 def _GetNames(self, lu, all_names, lock_level):
489 """Helper function to determine names asked for in the query.
493 names = lu.owned_locks(lock_level)
497 if self.wanted == locking.ALL_SET:
498 assert not self.names
499 # caller didn't specify names, so ordering is not important
500 return utils.NiceSort(names)
502 # caller specified names and we must keep the same order
504 assert not self.do_locking or lu.glm.is_owned(lock_level)
506 missing = set(self.wanted).difference(names)
508 raise errors.OpExecError("Some items were removed before retrieving"
509 " their data: %s" % missing)
511 # Return expanded names
514 def ExpandNames(self, lu):
515 """Expand names for this query.
517 See L{LogicalUnit.ExpandNames}.
520 raise NotImplementedError()
522 def DeclareLocks(self, lu, level):
523 """Declare locks for this query.
525 See L{LogicalUnit.DeclareLocks}.
528 raise NotImplementedError()
530 def _GetQueryData(self, lu):
531 """Collects all data for this query.
533 @return: Query data object
536 raise NotImplementedError()
538 def NewStyleQuery(self, lu):
539 """Collect data and execute query.
542 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543 sort_by_name=self.sort_by_name)
545 def OldStyleQuery(self, lu):
546 """Collect data and execute query.
549 return self.query.OldStyleQuery(self._GetQueryData(lu),
550 sort_by_name=self.sort_by_name)
554 """Returns a dict declaring all lock levels shared.
557 return dict.fromkeys(locking.LEVELS, 1)
560 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561 """Checks if the owned node groups are still correct for an instance.
563 @type cfg: L{config.ConfigWriter}
564 @param cfg: The cluster configuration
565 @type instance_name: string
566 @param instance_name: Instance name
567 @type owned_groups: set or frozenset
568 @param owned_groups: List of currently owned node groups
571 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
573 if not owned_groups.issuperset(inst_groups):
574 raise errors.OpPrereqError("Instance %s's node groups changed since"
575 " locks were acquired, current groups are"
576 " are '%s', owning groups '%s'; retry the"
579 utils.CommaJoin(inst_groups),
580 utils.CommaJoin(owned_groups)),
586 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587 """Checks if the instances in a node group are still correct.
589 @type cfg: L{config.ConfigWriter}
590 @param cfg: The cluster configuration
591 @type group_uuid: string
592 @param group_uuid: Node group UUID
593 @type owned_instances: set or frozenset
594 @param owned_instances: List of currently owned instances
597 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598 if owned_instances != wanted_instances:
599 raise errors.OpPrereqError("Instances in node group '%s' changed since"
600 " locks were acquired, wanted '%s', have '%s';"
601 " retry the operation" %
603 utils.CommaJoin(wanted_instances),
604 utils.CommaJoin(owned_instances)),
607 return wanted_instances
610 def _SupportsOob(cfg, node):
611 """Tells if node supports OOB.
613 @type cfg: L{config.ConfigWriter}
614 @param cfg: The cluster configuration
615 @type node: L{objects.Node}
616 @param node: The node
617 @return: The OOB script if supported or an empty string otherwise
620 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
623 def _GetWantedNodes(lu, nodes):
624 """Returns list of checked and expanded node names.
626 @type lu: L{LogicalUnit}
627 @param lu: the logical unit on whose behalf we execute
629 @param nodes: list of node names or None for all nodes
631 @return: the list of nodes, sorted
632 @raise errors.ProgrammerError: if the nodes parameter is wrong type
636 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
638 return utils.NiceSort(lu.cfg.GetNodeList())
641 def _GetWantedInstances(lu, instances):
642 """Returns list of checked and expanded instance names.
644 @type lu: L{LogicalUnit}
645 @param lu: the logical unit on whose behalf we execute
646 @type instances: list
647 @param instances: list of instance names or None for all instances
649 @return: the list of instances, sorted
650 @raise errors.OpPrereqError: if the instances parameter is wrong type
651 @raise errors.OpPrereqError: if any of the passed instances is not found
655 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
657 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
661 def _GetUpdatedParams(old_params, update_dict,
662 use_default=True, use_none=False):
663 """Return the new version of a parameter dictionary.
665 @type old_params: dict
666 @param old_params: old parameters
667 @type update_dict: dict
668 @param update_dict: dict containing new parameter values, or
669 constants.VALUE_DEFAULT to reset the parameter to its default
671 @param use_default: boolean
672 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673 values as 'to be deleted' values
674 @param use_none: boolean
675 @type use_none: whether to recognise C{None} values as 'to be
678 @return: the new parameter dictionary
681 params_copy = copy.deepcopy(old_params)
682 for key, val in update_dict.iteritems():
683 if ((use_default and val == constants.VALUE_DEFAULT) or
684 (use_none and val is None)):
690 params_copy[key] = val
694 def _ReleaseLocks(lu, level, names=None, keep=None):
695 """Releases locks owned by an LU.
697 @type lu: L{LogicalUnit}
698 @param level: Lock level
699 @type names: list or None
700 @param names: Names of locks to release
701 @type keep: list or None
702 @param keep: Names of locks to retain
705 assert not (keep is not None and names is not None), \
706 "Only one of the 'names' and the 'keep' parameters can be given"
708 if names is not None:
709 should_release = names.__contains__
711 should_release = lambda name: name not in keep
713 should_release = None
719 # Determine which locks to release
720 for name in lu.owned_locks(level):
721 if should_release(name):
726 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
728 # Release just some locks
729 lu.glm.release(level, names=release)
731 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
734 lu.glm.release(level)
736 assert not lu.glm.is_owned(level), "No locks should be owned"
739 def _MapInstanceDisksToNodes(instances):
740 """Creates a map from (node, volume) to instance name.
742 @type instances: list of L{objects.Instance}
743 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
746 return dict(((node, vol), inst.name)
747 for inst in instances
748 for (node, vols) in inst.MapLVsByNode().items()
752 def _RunPostHook(lu, node_name):
753 """Runs the post-hook for an opcode on a single node.
756 hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
758 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
760 # pylint: disable-msg=W0702
761 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
764 def _CheckOutputFields(static, dynamic, selected):
765 """Checks whether all selected fields are valid.
767 @type static: L{utils.FieldSet}
768 @param static: static fields set
769 @type dynamic: L{utils.FieldSet}
770 @param dynamic: dynamic fields set
777 delta = f.NonMatching(selected)
779 raise errors.OpPrereqError("Unknown output fields selected: %s"
780 % ",".join(delta), errors.ECODE_INVAL)
783 def _CheckGlobalHvParams(params):
784 """Validates that given hypervisor params are not global ones.
786 This will ensure that instances don't get customised versions of
790 used_globals = constants.HVC_GLOBALS.intersection(params)
792 msg = ("The following hypervisor parameters are global and cannot"
793 " be customized at instance level, please modify them at"
794 " cluster level: %s" % utils.CommaJoin(used_globals))
795 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
798 def _CheckNodeOnline(lu, node, msg=None):
799 """Ensure that a given node is online.
801 @param lu: the LU on behalf of which we make the check
802 @param node: the node to check
803 @param msg: if passed, should be a message to replace the default one
804 @raise errors.OpPrereqError: if the node is offline
808 msg = "Can't use offline node"
809 if lu.cfg.GetNodeInfo(node).offline:
810 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
813 def _CheckNodeNotDrained(lu, node):
814 """Ensure that a given node is not drained.
816 @param lu: the LU on behalf of which we make the check
817 @param node: the node to check
818 @raise errors.OpPrereqError: if the node is drained
821 if lu.cfg.GetNodeInfo(node).drained:
822 raise errors.OpPrereqError("Can't use drained node %s" % node,
826 def _CheckNodeVmCapable(lu, node):
827 """Ensure that a given node is vm capable.
829 @param lu: the LU on behalf of which we make the check
830 @param node: the node to check
831 @raise errors.OpPrereqError: if the node is not vm capable
834 if not lu.cfg.GetNodeInfo(node).vm_capable:
835 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
839 def _CheckNodeHasOS(lu, node, os_name, force_variant):
840 """Ensure that a node supports a given OS.
842 @param lu: the LU on behalf of which we make the check
843 @param node: the node to check
844 @param os_name: the OS to query about
845 @param force_variant: whether to ignore variant errors
846 @raise errors.OpPrereqError: if the node is not supporting the OS
849 result = lu.rpc.call_os_get(node, os_name)
850 result.Raise("OS '%s' not in supported OS list for node %s" %
852 prereq=True, ecode=errors.ECODE_INVAL)
853 if not force_variant:
854 _CheckOSVariant(result.payload, os_name)
857 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858 """Ensure that a node has the given secondary ip.
860 @type lu: L{LogicalUnit}
861 @param lu: the LU on behalf of which we make the check
863 @param node: the node to check
864 @type secondary_ip: string
865 @param secondary_ip: the ip to check
866 @type prereq: boolean
867 @param prereq: whether to throw a prerequisite or an execute error
868 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
872 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873 result.Raise("Failure checking secondary ip on node %s" % node,
874 prereq=prereq, ecode=errors.ECODE_ENVIRON)
875 if not result.payload:
876 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877 " please fix and re-run this command" % secondary_ip)
879 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
881 raise errors.OpExecError(msg)
884 def _GetClusterDomainSecret():
885 """Reads the cluster domain secret.
888 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
892 def _CheckInstanceDown(lu, instance, reason):
893 """Ensure that an instance is not running."""
894 if instance.admin_up:
895 raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896 (instance.name, reason), errors.ECODE_STATE)
898 pnode = instance.primary_node
899 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900 ins_l.Raise("Can't contact node %s for instance information" % pnode,
901 prereq=True, ecode=errors.ECODE_ENVIRON)
903 if instance.name in ins_l.payload:
904 raise errors.OpPrereqError("Instance %s is running, %s" %
905 (instance.name, reason), errors.ECODE_STATE)
908 def _ExpandItemName(fn, name, kind):
909 """Expand an item name.
911 @param fn: the function to use for expansion
912 @param name: requested item name
913 @param kind: text description ('Node' or 'Instance')
914 @return: the resolved (full) name
915 @raise errors.OpPrereqError: if the item is not found
919 if full_name is None:
920 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
925 def _ExpandNodeName(cfg, name):
926 """Wrapper over L{_ExpandItemName} for nodes."""
927 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
930 def _ExpandInstanceName(cfg, name):
931 """Wrapper over L{_ExpandItemName} for instance."""
932 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
935 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936 memory, vcpus, nics, disk_template, disks,
937 bep, hvp, hypervisor_name, tags):
938 """Builds instance related env variables for hooks
940 This builds the hook environment from individual variables.
943 @param name: the name of the instance
944 @type primary_node: string
945 @param primary_node: the name of the instance's primary node
946 @type secondary_nodes: list
947 @param secondary_nodes: list of secondary nodes as strings
948 @type os_type: string
949 @param os_type: the name of the instance's OS
950 @type status: boolean
951 @param status: the should_run status of the instance
953 @param memory: the memory size of the instance
955 @param vcpus: the count of VCPUs the instance has
957 @param nics: list of tuples (ip, mac, mode, link) representing
958 the NICs the instance has
959 @type disk_template: string
960 @param disk_template: the disk template of the instance
962 @param disks: the list of (size, mode) pairs
964 @param bep: the backend parameters for the instance
966 @param hvp: the hypervisor parameters for the instance
967 @type hypervisor_name: string
968 @param hypervisor_name: the hypervisor for the instance
970 @param tags: list of instance tags as strings
972 @return: the hook environment for this instance
981 "INSTANCE_NAME": name,
982 "INSTANCE_PRIMARY": primary_node,
983 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984 "INSTANCE_OS_TYPE": os_type,
985 "INSTANCE_STATUS": str_status,
986 "INSTANCE_MEMORY": memory,
987 "INSTANCE_VCPUS": vcpus,
988 "INSTANCE_DISK_TEMPLATE": disk_template,
989 "INSTANCE_HYPERVISOR": hypervisor_name,
993 nic_count = len(nics)
994 for idx, (ip, mac, mode, link) in enumerate(nics):
997 env["INSTANCE_NIC%d_IP" % idx] = ip
998 env["INSTANCE_NIC%d_MAC" % idx] = mac
999 env["INSTANCE_NIC%d_MODE" % idx] = mode
1000 env["INSTANCE_NIC%d_LINK" % idx] = link
1001 if mode == constants.NIC_MODE_BRIDGED:
1002 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1006 env["INSTANCE_NIC_COUNT"] = nic_count
1009 disk_count = len(disks)
1010 for idx, (size, mode) in enumerate(disks):
1011 env["INSTANCE_DISK%d_SIZE" % idx] = size
1012 env["INSTANCE_DISK%d_MODE" % idx] = mode
1016 env["INSTANCE_DISK_COUNT"] = disk_count
1021 env["INSTANCE_TAGS"] = " ".join(tags)
1023 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024 for key, value in source.items():
1025 env["INSTANCE_%s_%s" % (kind, key)] = value
1030 def _NICListToTuple(lu, nics):
1031 """Build a list of nic information tuples.
1033 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034 value in LUInstanceQueryData.
1036 @type lu: L{LogicalUnit}
1037 @param lu: the logical unit on whose behalf we execute
1038 @type nics: list of L{objects.NIC}
1039 @param nics: list of nics to convert to hooks tuples
1043 cluster = lu.cfg.GetClusterInfo()
1047 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048 mode = filled_params[constants.NIC_MODE]
1049 link = filled_params[constants.NIC_LINK]
1050 hooks_nics.append((ip, mac, mode, link))
1054 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055 """Builds instance related env variables for hooks from an object.
1057 @type lu: L{LogicalUnit}
1058 @param lu: the logical unit on whose behalf we execute
1059 @type instance: L{objects.Instance}
1060 @param instance: the instance for which we should build the
1062 @type override: dict
1063 @param override: dictionary with key/values that will override
1066 @return: the hook environment dictionary
1069 cluster = lu.cfg.GetClusterInfo()
1070 bep = cluster.FillBE(instance)
1071 hvp = cluster.FillHV(instance)
1073 "name": instance.name,
1074 "primary_node": instance.primary_node,
1075 "secondary_nodes": instance.secondary_nodes,
1076 "os_type": instance.os,
1077 "status": instance.admin_up,
1078 "memory": bep[constants.BE_MEMORY],
1079 "vcpus": bep[constants.BE_VCPUS],
1080 "nics": _NICListToTuple(lu, instance.nics),
1081 "disk_template": instance.disk_template,
1082 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1085 "hypervisor_name": instance.hypervisor,
1086 "tags": instance.tags,
1089 args.update(override)
1090 return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1093 def _AdjustCandidatePool(lu, exceptions):
1094 """Adjust the candidate pool after node operations.
1097 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1099 lu.LogInfo("Promoted nodes to master candidate role: %s",
1100 utils.CommaJoin(node.name for node in mod_list))
1101 for name in mod_list:
1102 lu.context.ReaddNode(name)
1103 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1105 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1109 def _DecideSelfPromotion(lu, exceptions=None):
1110 """Decide whether I should promote myself as a master candidate.
1113 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115 # the new node will increase mc_max with one, so:
1116 mc_should = min(mc_should + 1, cp_size)
1117 return mc_now < mc_should
1120 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121 """Check that the brigdes needed by a list of nics exist.
1124 cluster = lu.cfg.GetClusterInfo()
1125 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126 brlist = [params[constants.NIC_LINK] for params in paramslist
1127 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1129 result = lu.rpc.call_bridges_exist(target_node, brlist)
1130 result.Raise("Error checking bridges on destination node '%s'" %
1131 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1134 def _CheckInstanceBridgesExist(lu, instance, node=None):
1135 """Check that the brigdes needed by an instance exist.
1139 node = instance.primary_node
1140 _CheckNicsBridgesExist(lu, instance.nics, node)
1143 def _CheckOSVariant(os_obj, name):
1144 """Check whether an OS name conforms to the os variants specification.
1146 @type os_obj: L{objects.OS}
1147 @param os_obj: OS object to check
1149 @param name: OS name passed by the user, to check for validity
1152 variant = objects.OS.GetVariant(name)
1153 if not os_obj.supported_variants:
1155 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156 " passed)" % (os_obj.name, variant),
1160 raise errors.OpPrereqError("OS name must include a variant",
1163 if variant not in os_obj.supported_variants:
1164 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1167 def _GetNodeInstancesInner(cfg, fn):
1168 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1171 def _GetNodeInstances(cfg, node_name):
1172 """Returns a list of all primary and secondary instances on a node.
1176 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1179 def _GetNodePrimaryInstances(cfg, node_name):
1180 """Returns primary instances on a node.
1183 return _GetNodeInstancesInner(cfg,
1184 lambda inst: node_name == inst.primary_node)
1187 def _GetNodeSecondaryInstances(cfg, node_name):
1188 """Returns secondary instances on a node.
1191 return _GetNodeInstancesInner(cfg,
1192 lambda inst: node_name in inst.secondary_nodes)
1195 def _GetStorageTypeArgs(cfg, storage_type):
1196 """Returns the arguments for a storage type.
1199 # Special case for file storage
1200 if storage_type == constants.ST_FILE:
1201 # storage.FileStorage wants a list of storage directories
1202 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1207 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1210 for dev in instance.disks:
1211 cfg.SetDiskID(dev, node_name)
1213 result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214 result.Raise("Failed to get disk status from node %s" % node_name,
1215 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1217 for idx, bdev_status in enumerate(result.payload):
1218 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1224 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225 """Check the sanity of iallocator and node arguments and use the
1226 cluster-wide iallocator if appropriate.
1228 Check that at most one of (iallocator, node) is specified. If none is
1229 specified, then the LU's opcode's iallocator slot is filled with the
1230 cluster-wide default iallocator.
1232 @type iallocator_slot: string
1233 @param iallocator_slot: the name of the opcode iallocator slot
1234 @type node_slot: string
1235 @param node_slot: the name of the opcode target node slot
1238 node = getattr(lu.op, node_slot, None)
1239 iallocator = getattr(lu.op, iallocator_slot, None)
1241 if node is not None and iallocator is not None:
1242 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1244 elif node is None and iallocator is None:
1245 default_iallocator = lu.cfg.GetDefaultIAllocator()
1246 if default_iallocator:
1247 setattr(lu.op, iallocator_slot, default_iallocator)
1249 raise errors.OpPrereqError("No iallocator or node given and no"
1250 " cluster-wide default iallocator found;"
1251 " please specify either an iallocator or a"
1252 " node, or set a cluster-wide default"
1256 def _GetDefaultIAllocator(cfg, iallocator):
1257 """Decides on which iallocator to use.
1259 @type cfg: L{config.ConfigWriter}
1260 @param cfg: Cluster configuration object
1261 @type iallocator: string or None
1262 @param iallocator: Iallocator specified in opcode
1264 @return: Iallocator name
1268 # Use default iallocator
1269 iallocator = cfg.GetDefaultIAllocator()
1272 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273 " opcode nor as a cluster-wide default",
1279 class LUClusterPostInit(LogicalUnit):
1280 """Logical unit for running hooks after cluster initialization.
1283 HPATH = "cluster-init"
1284 HTYPE = constants.HTYPE_CLUSTER
1286 def BuildHooksEnv(self):
1291 "OP_TARGET": self.cfg.GetClusterName(),
1294 def BuildHooksNodes(self):
1295 """Build hooks nodes.
1298 return ([], [self.cfg.GetMasterNode()])
1300 def Exec(self, feedback_fn):
1307 class LUClusterDestroy(LogicalUnit):
1308 """Logical unit for destroying the cluster.
1311 HPATH = "cluster-destroy"
1312 HTYPE = constants.HTYPE_CLUSTER
1314 def BuildHooksEnv(self):
1319 "OP_TARGET": self.cfg.GetClusterName(),
1322 def BuildHooksNodes(self):
1323 """Build hooks nodes.
1328 def CheckPrereq(self):
1329 """Check prerequisites.
1331 This checks whether the cluster is empty.
1333 Any errors are signaled by raising errors.OpPrereqError.
1336 master = self.cfg.GetMasterNode()
1338 nodelist = self.cfg.GetNodeList()
1339 if len(nodelist) != 1 or nodelist[0] != master:
1340 raise errors.OpPrereqError("There are still %d node(s) in"
1341 " this cluster." % (len(nodelist) - 1),
1343 instancelist = self.cfg.GetInstanceList()
1345 raise errors.OpPrereqError("There are still %d instance(s) in"
1346 " this cluster." % len(instancelist),
1349 def Exec(self, feedback_fn):
1350 """Destroys the cluster.
1353 master = self.cfg.GetMasterNode()
1355 # Run post hooks on master node before it's removed
1356 _RunPostHook(self, master)
1358 result = self.rpc.call_node_stop_master(master, False)
1359 result.Raise("Could not disable the master role")
1364 def _VerifyCertificate(filename):
1365 """Verifies a certificate for L{LUClusterVerifyConfig}.
1367 @type filename: string
1368 @param filename: Path to PEM file
1372 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373 utils.ReadFile(filename))
1374 except Exception, err: # pylint: disable-msg=W0703
1375 return (LUClusterVerifyConfig.ETYPE_ERROR,
1376 "Failed to load X509 certificate %s: %s" % (filename, err))
1379 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380 constants.SSL_CERT_EXPIRATION_ERROR)
1383 fnamemsg = "While verifying %s: %s" % (filename, msg)
1388 return (None, fnamemsg)
1389 elif errcode == utils.CERT_WARNING:
1390 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391 elif errcode == utils.CERT_ERROR:
1392 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1394 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1397 def _GetAllHypervisorParameters(cluster, instances):
1398 """Compute the set of all hypervisor parameters.
1400 @type cluster: L{objects.Cluster}
1401 @param cluster: the cluster object
1402 @param instances: list of L{objects.Instance}
1403 @param instances: additional instances from which to obtain parameters
1404 @rtype: list of (origin, hypervisor, parameters)
1405 @return: a list with all parameters found, indicating the hypervisor they
1406 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1411 for hv_name in cluster.enabled_hypervisors:
1412 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1414 for os_name, os_hvp in cluster.os_hvp.items():
1415 for hv_name, hv_params in os_hvp.items():
1417 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1420 # TODO: collapse identical parameter values in a single one
1421 for instance in instances:
1422 if instance.hvparams:
1423 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424 cluster.FillHV(instance)))
1429 class _VerifyErrors(object):
1430 """Mix-in for cluster/group verify LUs.
1432 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433 self.op and self._feedback_fn to be available.)
1436 TCLUSTER = "cluster"
1438 TINSTANCE = "instance"
1440 ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441 ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442 ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443 ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444 ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445 EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446 EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447 EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448 EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449 EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450 EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451 EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452 ENODEDRBD = (TNODE, "ENODEDRBD")
1453 ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454 ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455 ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456 ENODEHV = (TNODE, "ENODEHV")
1457 ENODELVM = (TNODE, "ENODELVM")
1458 ENODEN1 = (TNODE, "ENODEN1")
1459 ENODENET = (TNODE, "ENODENET")
1460 ENODEOS = (TNODE, "ENODEOS")
1461 ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462 ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463 ENODERPC = (TNODE, "ENODERPC")
1464 ENODESSH = (TNODE, "ENODESSH")
1465 ENODEVERSION = (TNODE, "ENODEVERSION")
1466 ENODESETUP = (TNODE, "ENODESETUP")
1467 ENODETIME = (TNODE, "ENODETIME")
1468 ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1470 ETYPE_FIELD = "code"
1471 ETYPE_ERROR = "ERROR"
1472 ETYPE_WARNING = "WARNING"
1474 def _Error(self, ecode, item, msg, *args, **kwargs):
1475 """Format an error message.
1477 Based on the opcode's error_codes parameter, either format a
1478 parseable error code, or a simpler error string.
1480 This must be called only from Exec and functions called from Exec.
1483 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1485 # first complete the msg
1488 # then format the whole message
1489 if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1490 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1496 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497 # and finally report it via the feedback_fn
1498 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1500 def _ErrorIf(self, cond, *args, **kwargs):
1501 """Log an error message if the passed condition is True.
1505 or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1507 self._Error(*args, **kwargs)
1508 # do not mark the operation as failed for WARN cases only
1509 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510 self.bad = self.bad or cond
1513 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1514 """Verifies the cluster config.
1519 def _VerifyHVP(self, hvp_data):
1520 """Verifies locally the syntax of the hypervisor parameters.
1523 for item, hv_name, hv_params in hvp_data:
1524 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1527 hv_class = hypervisor.GetHypervisor(hv_name)
1528 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1529 hv_class.CheckParameterSyntax(hv_params)
1530 except errors.GenericError, err:
1531 self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1533 def ExpandNames(self):
1534 # Information can be safely retrieved as the BGL is acquired in exclusive
1536 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1537 self.all_node_info = self.cfg.GetAllNodesInfo()
1538 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1539 self.needed_locks = {}
1541 def Exec(self, feedback_fn):
1542 """Verify integrity of cluster, performing various test on nodes.
1546 self._feedback_fn = feedback_fn
1548 feedback_fn("* Verifying cluster config")
1550 for msg in self.cfg.VerifyConfig():
1551 self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1553 feedback_fn("* Verifying cluster certificate files")
1555 for cert_filename in constants.ALL_CERT_FILES:
1556 (errcode, msg) = _VerifyCertificate(cert_filename)
1557 self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1559 feedback_fn("* Verifying hypervisor parameters")
1561 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1562 self.all_inst_info.values()))
1564 feedback_fn("* Verifying all nodes belong to an existing group")
1566 # We do this verification here because, should this bogus circumstance
1567 # occur, it would never be caught by VerifyGroup, which only acts on
1568 # nodes/instances reachable from existing node groups.
1570 dangling_nodes = set(node.name for node in self.all_node_info.values()
1571 if node.group not in self.all_group_info)
1573 dangling_instances = {}
1574 no_node_instances = []
1576 for inst in self.all_inst_info.values():
1577 if inst.primary_node in dangling_nodes:
1578 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1579 elif inst.primary_node not in self.all_node_info:
1580 no_node_instances.append(inst.name)
1585 utils.CommaJoin(dangling_instances.get(node.name,
1587 for node in dangling_nodes]
1589 self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1590 "the following nodes (and their instances) belong to a non"
1591 " existing group: %s", utils.CommaJoin(pretty_dangling))
1593 self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1594 "the following instances have a non-existing primary-node:"
1595 " %s", utils.CommaJoin(no_node_instances))
1597 return (not self.bad, [g.name for g in self.all_group_info.values()])
1600 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1601 """Verifies the status of a node group.
1604 HPATH = "cluster-verify"
1605 HTYPE = constants.HTYPE_CLUSTER
1608 _HOOKS_INDENT_RE = re.compile("^", re.M)
1610 class NodeImage(object):
1611 """A class representing the logical and physical status of a node.
1614 @ivar name: the node name to which this object refers
1615 @ivar volumes: a structure as returned from
1616 L{ganeti.backend.GetVolumeList} (runtime)
1617 @ivar instances: a list of running instances (runtime)
1618 @ivar pinst: list of configured primary instances (config)
1619 @ivar sinst: list of configured secondary instances (config)
1620 @ivar sbp: dictionary of {primary-node: list of instances} for all
1621 instances for which this node is secondary (config)
1622 @ivar mfree: free memory, as reported by hypervisor (runtime)
1623 @ivar dfree: free disk, as reported by the node (runtime)
1624 @ivar offline: the offline status (config)
1625 @type rpc_fail: boolean
1626 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1627 not whether the individual keys were correct) (runtime)
1628 @type lvm_fail: boolean
1629 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1630 @type hyp_fail: boolean
1631 @ivar hyp_fail: whether the RPC call didn't return the instance list
1632 @type ghost: boolean
1633 @ivar ghost: whether this is a known node or not (config)
1634 @type os_fail: boolean
1635 @ivar os_fail: whether the RPC call didn't return valid OS data
1637 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1638 @type vm_capable: boolean
1639 @ivar vm_capable: whether the node can host instances
1642 def __init__(self, offline=False, name=None, vm_capable=True):
1651 self.offline = offline
1652 self.vm_capable = vm_capable
1653 self.rpc_fail = False
1654 self.lvm_fail = False
1655 self.hyp_fail = False
1657 self.os_fail = False
1660 def ExpandNames(self):
1661 # This raises errors.OpPrereqError on its own:
1662 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1664 # Get instances in node group; this is unsafe and needs verification later
1665 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1667 self.needed_locks = {
1668 locking.LEVEL_INSTANCE: inst_names,
1669 locking.LEVEL_NODEGROUP: [self.group_uuid],
1670 locking.LEVEL_NODE: [],
1673 self.share_locks = _ShareAll()
1675 def DeclareLocks(self, level):
1676 if level == locking.LEVEL_NODE:
1677 # Get members of node group; this is unsafe and needs verification later
1678 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1680 all_inst_info = self.cfg.GetAllInstancesInfo()
1682 # In Exec(), we warn about mirrored instances that have primary and
1683 # secondary living in separate node groups. To fully verify that
1684 # volumes for these instances are healthy, we will need to do an
1685 # extra call to their secondaries. We ensure here those nodes will
1687 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1688 # Important: access only the instances whose lock is owned
1689 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1690 nodes.update(all_inst_info[inst].secondary_nodes)
1692 self.needed_locks[locking.LEVEL_NODE] = nodes
1694 def CheckPrereq(self):
1695 group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1696 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1699 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1701 unlocked_instances = \
1702 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1705 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1706 utils.CommaJoin(unlocked_nodes))
1708 if unlocked_instances:
1709 raise errors.OpPrereqError("Missing lock for instances: %s" %
1710 utils.CommaJoin(unlocked_instances))
1712 self.all_node_info = self.cfg.GetAllNodesInfo()
1713 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1715 self.my_node_names = utils.NiceSort(group_nodes)
1716 self.my_inst_names = utils.NiceSort(group_instances)
1718 self.my_node_info = dict((name, self.all_node_info[name])
1719 for name in self.my_node_names)
1721 self.my_inst_info = dict((name, self.all_inst_info[name])
1722 for name in self.my_inst_names)
1724 # We detect here the nodes that will need the extra RPC calls for verifying
1725 # split LV volumes; they should be locked.
1726 extra_lv_nodes = set()
1728 for inst in self.my_inst_info.values():
1729 if inst.disk_template in constants.DTS_INT_MIRROR:
1730 group = self.my_node_info[inst.primary_node].group
1731 for nname in inst.secondary_nodes:
1732 if self.all_node_info[nname].group != group:
1733 extra_lv_nodes.add(nname)
1735 unlocked_lv_nodes = \
1736 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1738 if unlocked_lv_nodes:
1739 raise errors.OpPrereqError("these nodes could be locked: %s" %
1740 utils.CommaJoin(unlocked_lv_nodes))
1741 self.extra_lv_nodes = list(extra_lv_nodes)
1743 def _VerifyNode(self, ninfo, nresult):
1744 """Perform some basic validation on data returned from a node.
1746 - check the result data structure is well formed and has all the
1748 - check ganeti version
1750 @type ninfo: L{objects.Node}
1751 @param ninfo: the node to check
1752 @param nresult: the results from the node
1754 @return: whether overall this call was successful (and we can expect
1755 reasonable values in the respose)
1759 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1761 # main result, nresult should be a non-empty dict
1762 test = not nresult or not isinstance(nresult, dict)
1763 _ErrorIf(test, self.ENODERPC, node,
1764 "unable to verify node: no data returned")
1768 # compares ganeti version
1769 local_version = constants.PROTOCOL_VERSION
1770 remote_version = nresult.get("version", None)
1771 test = not (remote_version and
1772 isinstance(remote_version, (list, tuple)) and
1773 len(remote_version) == 2)
1774 _ErrorIf(test, self.ENODERPC, node,
1775 "connection to node returned invalid data")
1779 test = local_version != remote_version[0]
1780 _ErrorIf(test, self.ENODEVERSION, node,
1781 "incompatible protocol versions: master %s,"
1782 " node %s", local_version, remote_version[0])
1786 # node seems compatible, we can actually try to look into its results
1788 # full package version
1789 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1790 self.ENODEVERSION, node,
1791 "software version mismatch: master %s, node %s",
1792 constants.RELEASE_VERSION, remote_version[1],
1793 code=self.ETYPE_WARNING)
1795 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1796 if ninfo.vm_capable and isinstance(hyp_result, dict):
1797 for hv_name, hv_result in hyp_result.iteritems():
1798 test = hv_result is not None
1799 _ErrorIf(test, self.ENODEHV, node,
1800 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1802 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1803 if ninfo.vm_capable and isinstance(hvp_result, list):
1804 for item, hv_name, hv_result in hvp_result:
1805 _ErrorIf(True, self.ENODEHV, node,
1806 "hypervisor %s parameter verify failure (source %s): %s",
1807 hv_name, item, hv_result)
1809 test = nresult.get(constants.NV_NODESETUP,
1810 ["Missing NODESETUP results"])
1811 _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1816 def _VerifyNodeTime(self, ninfo, nresult,
1817 nvinfo_starttime, nvinfo_endtime):
1818 """Check the node time.
1820 @type ninfo: L{objects.Node}
1821 @param ninfo: the node to check
1822 @param nresult: the remote results for the node
1823 @param nvinfo_starttime: the start time of the RPC call
1824 @param nvinfo_endtime: the end time of the RPC call
1828 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1830 ntime = nresult.get(constants.NV_TIME, None)
1832 ntime_merged = utils.MergeTime(ntime)
1833 except (ValueError, TypeError):
1834 _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1837 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1838 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1839 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1840 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1844 _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1845 "Node time diverges by at least %s from master node time",
1848 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1849 """Check the node LVM results.
1851 @type ninfo: L{objects.Node}
1852 @param ninfo: the node to check
1853 @param nresult: the remote results for the node
1854 @param vg_name: the configured VG name
1861 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1863 # checks vg existence and size > 20G
1864 vglist = nresult.get(constants.NV_VGLIST, None)
1866 _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1868 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1869 constants.MIN_VG_SIZE)
1870 _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1873 pvlist = nresult.get(constants.NV_PVLIST, None)
1874 test = pvlist is None
1875 _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1877 # check that ':' is not present in PV names, since it's a
1878 # special character for lvcreate (denotes the range of PEs to
1880 for _, pvname, owner_vg in pvlist:
1881 test = ":" in pvname
1882 _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1883 " '%s' of VG '%s'", pvname, owner_vg)
1885 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1886 """Check the node bridges.
1888 @type ninfo: L{objects.Node}
1889 @param ninfo: the node to check
1890 @param nresult: the remote results for the node
1891 @param bridges: the expected list of bridges
1898 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1900 missing = nresult.get(constants.NV_BRIDGES, None)
1901 test = not isinstance(missing, list)
1902 _ErrorIf(test, self.ENODENET, node,
1903 "did not return valid bridge information")
1905 _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1906 utils.CommaJoin(sorted(missing)))
1908 def _VerifyNodeNetwork(self, ninfo, nresult):
1909 """Check the node network connectivity results.
1911 @type ninfo: L{objects.Node}
1912 @param ninfo: the node to check
1913 @param nresult: the remote results for the node
1917 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1919 test = constants.NV_NODELIST not in nresult
1920 _ErrorIf(test, self.ENODESSH, node,
1921 "node hasn't returned node ssh connectivity data")
1923 if nresult[constants.NV_NODELIST]:
1924 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1925 _ErrorIf(True, self.ENODESSH, node,
1926 "ssh communication with node '%s': %s", a_node, a_msg)
1928 test = constants.NV_NODENETTEST not in nresult
1929 _ErrorIf(test, self.ENODENET, node,
1930 "node hasn't returned node tcp connectivity data")
1932 if nresult[constants.NV_NODENETTEST]:
1933 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1935 _ErrorIf(True, self.ENODENET, node,
1936 "tcp communication with node '%s': %s",
1937 anode, nresult[constants.NV_NODENETTEST][anode])
1939 test = constants.NV_MASTERIP not in nresult
1940 _ErrorIf(test, self.ENODENET, node,
1941 "node hasn't returned node master IP reachability data")
1943 if not nresult[constants.NV_MASTERIP]:
1944 if node == self.master_node:
1945 msg = "the master node cannot reach the master IP (not configured?)"
1947 msg = "cannot reach the master IP"
1948 _ErrorIf(True, self.ENODENET, node, msg)
1950 def _VerifyInstance(self, instance, instanceconfig, node_image,
1952 """Verify an instance.
1954 This function checks to see if the required block devices are
1955 available on the instance's node.
1958 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1959 node_current = instanceconfig.primary_node
1961 node_vol_should = {}
1962 instanceconfig.MapLVsByNode(node_vol_should)
1964 for node in node_vol_should:
1965 n_img = node_image[node]
1966 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1967 # ignore missing volumes on offline or broken nodes
1969 for volume in node_vol_should[node]:
1970 test = volume not in n_img.volumes
1971 _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1972 "volume %s missing on node %s", volume, node)
1974 if instanceconfig.admin_up:
1975 pri_img = node_image[node_current]
1976 test = instance not in pri_img.instances and not pri_img.offline
1977 _ErrorIf(test, self.EINSTANCEDOWN, instance,
1978 "instance not running on its primary node %s",
1981 diskdata = [(nname, success, status, idx)
1982 for (nname, disks) in diskstatus.items()
1983 for idx, (success, status) in enumerate(disks)]
1985 for nname, success, bdev_status, idx in diskdata:
1986 # the 'ghost node' construction in Exec() ensures that we have a
1988 snode = node_image[nname]
1989 bad_snode = snode.ghost or snode.offline
1990 _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1991 self.EINSTANCEFAULTYDISK, instance,
1992 "couldn't retrieve status for disk/%s on %s: %s",
1993 idx, nname, bdev_status)
1994 _ErrorIf((instanceconfig.admin_up and success and
1995 bdev_status.ldisk_status == constants.LDS_FAULTY),
1996 self.EINSTANCEFAULTYDISK, instance,
1997 "disk/%s on %s is faulty", idx, nname)
1999 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2000 """Verify if there are any unknown volumes in the cluster.
2002 The .os, .swap and backup volumes are ignored. All other volumes are
2003 reported as unknown.
2005 @type reserved: L{ganeti.utils.FieldSet}
2006 @param reserved: a FieldSet of reserved volume names
2009 for node, n_img in node_image.items():
2010 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2011 # skip non-healthy nodes
2013 for volume in n_img.volumes:
2014 test = ((node not in node_vol_should or
2015 volume not in node_vol_should[node]) and
2016 not reserved.Matches(volume))
2017 self._ErrorIf(test, self.ENODEORPHANLV, node,
2018 "volume %s is unknown", volume)
2020 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2021 """Verify N+1 Memory Resilience.
2023 Check that if one single node dies we can still start all the
2024 instances it was primary for.
2027 cluster_info = self.cfg.GetClusterInfo()
2028 for node, n_img in node_image.items():
2029 # This code checks that every node which is now listed as
2030 # secondary has enough memory to host all instances it is
2031 # supposed to should a single other node in the cluster fail.
2032 # FIXME: not ready for failover to an arbitrary node
2033 # FIXME: does not support file-backed instances
2034 # WARNING: we currently take into account down instances as well
2035 # as up ones, considering that even if they're down someone
2036 # might want to start them even in the event of a node failure.
2038 # we're skipping offline nodes from the N+1 warning, since
2039 # most likely we don't have good memory infromation from them;
2040 # we already list instances living on such nodes, and that's
2043 for prinode, instances in n_img.sbp.items():
2045 for instance in instances:
2046 bep = cluster_info.FillBE(instance_cfg[instance])
2047 if bep[constants.BE_AUTO_BALANCE]:
2048 needed_mem += bep[constants.BE_MEMORY]
2049 test = n_img.mfree < needed_mem
2050 self._ErrorIf(test, self.ENODEN1, node,
2051 "not enough memory to accomodate instance failovers"
2052 " should node %s fail (%dMiB needed, %dMiB available)",
2053 prinode, needed_mem, n_img.mfree)
2056 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2057 (files_all, files_all_opt, files_mc, files_vm)):
2058 """Verifies file checksums collected from all nodes.
2060 @param errorif: Callback for reporting errors
2061 @param nodeinfo: List of L{objects.Node} objects
2062 @param master_node: Name of master node
2063 @param all_nvinfo: RPC results
2066 node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2068 assert master_node in node_names
2069 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2070 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2071 "Found file listed in more than one file list"
2073 # Define functions determining which nodes to consider for a file
2074 file2nodefn = dict([(filename, fn)
2075 for (files, fn) in [(files_all, None),
2076 (files_all_opt, None),
2077 (files_mc, lambda node: (node.master_candidate or
2078 node.name == master_node)),
2079 (files_vm, lambda node: node.vm_capable)]
2080 for filename in files])
2082 fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2084 for node in nodeinfo:
2088 nresult = all_nvinfo[node.name]
2090 if nresult.fail_msg or not nresult.payload:
2093 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2095 test = not (node_files and isinstance(node_files, dict))
2096 errorif(test, cls.ENODEFILECHECK, node.name,
2097 "Node did not return file checksum data")
2101 for (filename, checksum) in node_files.items():
2102 # Check if the file should be considered for a node
2103 fn = file2nodefn[filename]
2104 if fn is None or fn(node):
2105 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2107 for (filename, checksums) in fileinfo.items():
2108 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2110 # Nodes having the file
2111 with_file = frozenset(node_name
2112 for nodes in fileinfo[filename].values()
2113 for node_name in nodes)
2115 # Nodes missing file
2116 missing_file = node_names - with_file
2118 if filename in files_all_opt:
2120 errorif(missing_file and missing_file != node_names,
2121 cls.ECLUSTERFILECHECK, None,
2122 "File %s is optional, but it must exist on all or no"
2123 " nodes (not found on %s)",
2124 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2126 errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2127 "File %s is missing from node(s) %s", filename,
2128 utils.CommaJoin(utils.NiceSort(missing_file)))
2130 # See if there are multiple versions of the file
2131 test = len(checksums) > 1
2133 variants = ["variant %s on %s" %
2134 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2135 for (idx, (checksum, nodes)) in
2136 enumerate(sorted(checksums.items()))]
2140 errorif(test, cls.ECLUSTERFILECHECK, None,
2141 "File %s found with %s different checksums (%s)",
2142 filename, len(checksums), "; ".join(variants))
2144 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2146 """Verifies and the node DRBD status.
2148 @type ninfo: L{objects.Node}
2149 @param ninfo: the node to check
2150 @param nresult: the remote results for the node
2151 @param instanceinfo: the dict of instances
2152 @param drbd_helper: the configured DRBD usermode helper
2153 @param drbd_map: the DRBD map as returned by
2154 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2158 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2161 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2162 test = (helper_result == None)
2163 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2164 "no drbd usermode helper returned")
2166 status, payload = helper_result
2168 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2169 "drbd usermode helper check unsuccessful: %s", payload)
2170 test = status and (payload != drbd_helper)
2171 _ErrorIf(test, self.ENODEDRBDHELPER, node,
2172 "wrong drbd usermode helper: %s", payload)
2174 # compute the DRBD minors
2176 for minor, instance in drbd_map[node].items():
2177 test = instance not in instanceinfo
2178 _ErrorIf(test, self.ECLUSTERCFG, None,
2179 "ghost instance '%s' in temporary DRBD map", instance)
2180 # ghost instance should not be running, but otherwise we
2181 # don't give double warnings (both ghost instance and
2182 # unallocated minor in use)
2184 node_drbd[minor] = (instance, False)
2186 instance = instanceinfo[instance]
2187 node_drbd[minor] = (instance.name, instance.admin_up)
2189 # and now check them
2190 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2191 test = not isinstance(used_minors, (tuple, list))
2192 _ErrorIf(test, self.ENODEDRBD, node,
2193 "cannot parse drbd status file: %s", str(used_minors))
2195 # we cannot check drbd status
2198 for minor, (iname, must_exist) in node_drbd.items():
2199 test = minor not in used_minors and must_exist
2200 _ErrorIf(test, self.ENODEDRBD, node,
2201 "drbd minor %d of instance %s is not active", minor, iname)
2202 for minor in used_minors:
2203 test = minor not in node_drbd
2204 _ErrorIf(test, self.ENODEDRBD, node,
2205 "unallocated drbd minor %d is in use", minor)
2207 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2208 """Builds the node OS structures.
2210 @type ninfo: L{objects.Node}
2211 @param ninfo: the node to check
2212 @param nresult: the remote results for the node
2213 @param nimg: the node image object
2217 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2219 remote_os = nresult.get(constants.NV_OSLIST, None)
2220 test = (not isinstance(remote_os, list) or
2221 not compat.all(isinstance(v, list) and len(v) == 7
2222 for v in remote_os))
2224 _ErrorIf(test, self.ENODEOS, node,
2225 "node hasn't returned valid OS data")
2234 for (name, os_path, status, diagnose,
2235 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2237 if name not in os_dict:
2240 # parameters is a list of lists instead of list of tuples due to
2241 # JSON lacking a real tuple type, fix it:
2242 parameters = [tuple(v) for v in parameters]
2243 os_dict[name].append((os_path, status, diagnose,
2244 set(variants), set(parameters), set(api_ver)))
2246 nimg.oslist = os_dict
2248 def _VerifyNodeOS(self, ninfo, nimg, base):
2249 """Verifies the node OS list.
2251 @type ninfo: L{objects.Node}
2252 @param ninfo: the node to check
2253 @param nimg: the node image object
2254 @param base: the 'template' node we match against (e.g. from the master)
2258 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2260 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2262 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2263 for os_name, os_data in nimg.oslist.items():
2264 assert os_data, "Empty OS status for OS %s?!" % os_name
2265 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2266 _ErrorIf(not f_status, self.ENODEOS, node,
2267 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2268 _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2269 "OS '%s' has multiple entries (first one shadows the rest): %s",
2270 os_name, utils.CommaJoin([v[0] for v in os_data]))
2271 # comparisons with the 'base' image
2272 test = os_name not in base.oslist
2273 _ErrorIf(test, self.ENODEOS, node,
2274 "Extra OS %s not present on reference node (%s)",
2278 assert base.oslist[os_name], "Base node has empty OS status?"
2279 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2281 # base OS is invalid, skipping
2283 for kind, a, b in [("API version", f_api, b_api),
2284 ("variants list", f_var, b_var),
2285 ("parameters", beautify_params(f_param),
2286 beautify_params(b_param))]:
2287 _ErrorIf(a != b, self.ENODEOS, node,
2288 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2289 kind, os_name, base.name,
2290 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2292 # check any missing OSes
2293 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2294 _ErrorIf(missing, self.ENODEOS, node,
2295 "OSes present on reference node %s but missing on this node: %s",
2296 base.name, utils.CommaJoin(missing))
2298 def _VerifyOob(self, ninfo, nresult):
2299 """Verifies out of band functionality of a node.
2301 @type ninfo: L{objects.Node}
2302 @param ninfo: the node to check
2303 @param nresult: the remote results for the node
2307 # We just have to verify the paths on master and/or master candidates
2308 # as the oob helper is invoked on the master
2309 if ((ninfo.master_candidate or ninfo.master_capable) and
2310 constants.NV_OOB_PATHS in nresult):
2311 for path_result in nresult[constants.NV_OOB_PATHS]:
2312 self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2314 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2315 """Verifies and updates the node volume data.
2317 This function will update a L{NodeImage}'s internal structures
2318 with data from the remote call.
2320 @type ninfo: L{objects.Node}
2321 @param ninfo: the node to check
2322 @param nresult: the remote results for the node
2323 @param nimg: the node image object
2324 @param vg_name: the configured VG name
2328 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2330 nimg.lvm_fail = True
2331 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2334 elif isinstance(lvdata, basestring):
2335 _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2336 utils.SafeEncode(lvdata))
2337 elif not isinstance(lvdata, dict):
2338 _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2340 nimg.volumes = lvdata
2341 nimg.lvm_fail = False
2343 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2344 """Verifies and updates the node instance list.
2346 If the listing was successful, then updates this node's instance
2347 list. Otherwise, it marks the RPC call as failed for the instance
2350 @type ninfo: L{objects.Node}
2351 @param ninfo: the node to check
2352 @param nresult: the remote results for the node
2353 @param nimg: the node image object
2356 idata = nresult.get(constants.NV_INSTANCELIST, None)
2357 test = not isinstance(idata, list)
2358 self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2359 " (instancelist): %s", utils.SafeEncode(str(idata)))
2361 nimg.hyp_fail = True
2363 nimg.instances = idata
2365 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2366 """Verifies and computes a node information map
2368 @type ninfo: L{objects.Node}
2369 @param ninfo: the node to check
2370 @param nresult: the remote results for the node
2371 @param nimg: the node image object
2372 @param vg_name: the configured VG name
2376 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2378 # try to read free memory (from the hypervisor)
2379 hv_info = nresult.get(constants.NV_HVINFO, None)
2380 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2381 _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2384 nimg.mfree = int(hv_info["memory_free"])
2385 except (ValueError, TypeError):
2386 _ErrorIf(True, self.ENODERPC, node,
2387 "node returned invalid nodeinfo, check hypervisor")
2389 # FIXME: devise a free space model for file based instances as well
2390 if vg_name is not None:
2391 test = (constants.NV_VGLIST not in nresult or
2392 vg_name not in nresult[constants.NV_VGLIST])
2393 _ErrorIf(test, self.ENODELVM, node,
2394 "node didn't return data for the volume group '%s'"
2395 " - it is either missing or broken", vg_name)
2398 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2399 except (ValueError, TypeError):
2400 _ErrorIf(True, self.ENODERPC, node,
2401 "node returned invalid LVM info, check LVM status")
2403 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2404 """Gets per-disk status information for all instances.
2406 @type nodelist: list of strings
2407 @param nodelist: Node names
2408 @type node_image: dict of (name, L{objects.Node})
2409 @param node_image: Node objects
2410 @type instanceinfo: dict of (name, L{objects.Instance})
2411 @param instanceinfo: Instance objects
2412 @rtype: {instance: {node: [(succes, payload)]}}
2413 @return: a dictionary of per-instance dictionaries with nodes as
2414 keys and disk information as values; the disk information is a
2415 list of tuples (success, payload)
2418 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2421 node_disks_devonly = {}
2422 diskless_instances = set()
2423 diskless = constants.DT_DISKLESS
2425 for nname in nodelist:
2426 node_instances = list(itertools.chain(node_image[nname].pinst,
2427 node_image[nname].sinst))
2428 diskless_instances.update(inst for inst in node_instances
2429 if instanceinfo[inst].disk_template == diskless)
2430 disks = [(inst, disk)
2431 for inst in node_instances
2432 for disk in instanceinfo[inst].disks]
2435 # No need to collect data
2438 node_disks[nname] = disks
2440 # Creating copies as SetDiskID below will modify the objects and that can
2441 # lead to incorrect data returned from nodes
2442 devonly = [dev.Copy() for (_, dev) in disks]
2445 self.cfg.SetDiskID(dev, nname)
2447 node_disks_devonly[nname] = devonly
2449 assert len(node_disks) == len(node_disks_devonly)
2451 # Collect data from all nodes with disks
2452 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2455 assert len(result) == len(node_disks)
2459 for (nname, nres) in result.items():
2460 disks = node_disks[nname]
2463 # No data from this node
2464 data = len(disks) * [(False, "node offline")]
2467 _ErrorIf(msg, self.ENODERPC, nname,
2468 "while getting disk information: %s", msg)
2470 # No data from this node
2471 data = len(disks) * [(False, msg)]
2474 for idx, i in enumerate(nres.payload):
2475 if isinstance(i, (tuple, list)) and len(i) == 2:
2478 logging.warning("Invalid result from node %s, entry %d: %s",
2480 data.append((False, "Invalid result from the remote node"))
2482 for ((inst, _), status) in zip(disks, data):
2483 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2485 # Add empty entries for diskless instances.
2486 for inst in diskless_instances:
2487 assert inst not in instdisk
2490 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2491 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2492 compat.all(isinstance(s, (tuple, list)) and
2493 len(s) == 2 for s in statuses)
2494 for inst, nnames in instdisk.items()
2495 for nname, statuses in nnames.items())
2496 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2500 def BuildHooksEnv(self):
2503 Cluster-Verify hooks just ran in the post phase and their failure makes
2504 the output be logged in the verify output and the verification to fail.
2508 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2511 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2512 for node in self.my_node_info.values())
2516 def BuildHooksNodes(self):
2517 """Build hooks nodes.
2520 return ([], self.my_node_names)
2522 def Exec(self, feedback_fn):
2523 """Verify integrity of the node group, performing various test on nodes.
2526 # This method has too many local variables. pylint: disable-msg=R0914
2528 if not self.my_node_names:
2530 feedback_fn("* Empty node group, skipping verification")
2534 _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2535 verbose = self.op.verbose
2536 self._feedback_fn = feedback_fn
2538 vg_name = self.cfg.GetVGName()
2539 drbd_helper = self.cfg.GetDRBDHelper()
2540 cluster = self.cfg.GetClusterInfo()
2541 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2542 hypervisors = cluster.enabled_hypervisors
2543 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2545 i_non_redundant = [] # Non redundant instances
2546 i_non_a_balanced = [] # Non auto-balanced instances
2547 n_offline = 0 # Count of offline nodes
2548 n_drained = 0 # Count of nodes being drained
2549 node_vol_should = {}
2551 # FIXME: verify OS list
2554 filemap = _ComputeAncillaryFiles(cluster, False)
2556 # do local checksums
2557 master_node = self.master_node = self.cfg.GetMasterNode()
2558 master_ip = self.cfg.GetMasterIP()
2560 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2562 # We will make nodes contact all nodes in their group, and one node from
2563 # every other group.
2564 # TODO: should it be a *random* node, different every time?
2565 online_nodes = [node.name for node in node_data_list if not node.offline]
2566 other_group_nodes = {}
2568 for name in sorted(self.all_node_info):
2569 node = self.all_node_info[name]
2570 if (node.group not in other_group_nodes
2571 and node.group != self.group_uuid
2572 and not node.offline):
2573 other_group_nodes[node.group] = node.name
2575 node_verify_param = {
2576 constants.NV_FILELIST:
2577 utils.UniqueSequence(filename
2578 for files in filemap
2579 for filename in files),
2580 constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2581 constants.NV_HYPERVISOR: hypervisors,
2582 constants.NV_HVPARAMS:
2583 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2584 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2585 for node in node_data_list
2586 if not node.offline],
2587 constants.NV_INSTANCELIST: hypervisors,
2588 constants.NV_VERSION: None,
2589 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2590 constants.NV_NODESETUP: None,
2591 constants.NV_TIME: None,
2592 constants.NV_MASTERIP: (master_node, master_ip),
2593 constants.NV_OSLIST: None,
2594 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2597 if vg_name is not None:
2598 node_verify_param[constants.NV_VGLIST] = None
2599 node_verify_param[constants.NV_LVLIST] = vg_name
2600 node_verify_param[constants.NV_PVLIST] = [vg_name]
2601 node_verify_param[constants.NV_DRBDLIST] = None
2604 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2607 # FIXME: this needs to be changed per node-group, not cluster-wide
2609 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2610 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2611 bridges.add(default_nicpp[constants.NIC_LINK])
2612 for instance in self.my_inst_info.values():
2613 for nic in instance.nics:
2614 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2615 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2616 bridges.add(full_nic[constants.NIC_LINK])
2619 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2621 # Build our expected cluster state
2622 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2624 vm_capable=node.vm_capable))
2625 for node in node_data_list)
2629 for node in self.all_node_info.values():
2630 path = _SupportsOob(self.cfg, node)
2631 if path and path not in oob_paths:
2632 oob_paths.append(path)
2635 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2637 for instance in self.my_inst_names:
2638 inst_config = self.my_inst_info[instance]
2640 for nname in inst_config.all_nodes:
2641 if nname not in node_image:
2642 gnode = self.NodeImage(name=nname)
2643 gnode.ghost = (nname not in self.all_node_info)
2644 node_image[nname] = gnode
2646 inst_config.MapLVsByNode(node_vol_should)
2648 pnode = inst_config.primary_node
2649 node_image[pnode].pinst.append(instance)
2651 for snode in inst_config.secondary_nodes:
2652 nimg = node_image[snode]
2653 nimg.sinst.append(instance)
2654 if pnode not in nimg.sbp:
2655 nimg.sbp[pnode] = []
2656 nimg.sbp[pnode].append(instance)
2658 # At this point, we have the in-memory data structures complete,
2659 # except for the runtime information, which we'll gather next
2661 # Due to the way our RPC system works, exact response times cannot be
2662 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2663 # time before and after executing the request, we can at least have a time
2665 nvinfo_starttime = time.time()
2666 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2668 self.cfg.GetClusterName())
2669 nvinfo_endtime = time.time()
2671 if self.extra_lv_nodes and vg_name is not None:
2673 self.rpc.call_node_verify(self.extra_lv_nodes,
2674 {constants.NV_LVLIST: vg_name},
2675 self.cfg.GetClusterName())
2677 extra_lv_nvinfo = {}
2679 all_drbd_map = self.cfg.ComputeDRBDMap()
2681 feedback_fn("* Gathering disk information (%s nodes)" %
2682 len(self.my_node_names))
2683 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2686 feedback_fn("* Verifying configuration file consistency")
2688 # If not all nodes are being checked, we need to make sure the master node
2689 # and a non-checked vm_capable node are in the list.
2690 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2692 vf_nvinfo = all_nvinfo.copy()
2693 vf_node_info = list(self.my_node_info.values())
2694 additional_nodes = []
2695 if master_node not in self.my_node_info:
2696 additional_nodes.append(master_node)
2697 vf_node_info.append(self.all_node_info[master_node])
2698 # Add the first vm_capable node we find which is not included
2699 for node in absent_nodes:
2700 nodeinfo = self.all_node_info[node]
2701 if nodeinfo.vm_capable and not nodeinfo.offline:
2702 additional_nodes.append(node)
2703 vf_node_info.append(self.all_node_info[node])
2705 key = constants.NV_FILELIST
2706 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2707 {key: node_verify_param[key]},
2708 self.cfg.GetClusterName()))
2710 vf_nvinfo = all_nvinfo
2711 vf_node_info = self.my_node_info.values()
2713 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2715 feedback_fn("* Verifying node status")
2719 for node_i in node_data_list:
2721 nimg = node_image[node]
2725 feedback_fn("* Skipping offline node %s" % (node,))
2729 if node == master_node:
2731 elif node_i.master_candidate:
2732 ntype = "master candidate"
2733 elif node_i.drained:
2739 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2741 msg = all_nvinfo[node].fail_msg
2742 _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2744 nimg.rpc_fail = True
2747 nresult = all_nvinfo[node].payload
2749 nimg.call_ok = self._VerifyNode(node_i, nresult)
2750 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2751 self._VerifyNodeNetwork(node_i, nresult)
2752 self._VerifyOob(node_i, nresult)
2755 self._VerifyNodeLVM(node_i, nresult, vg_name)
2756 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2759 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2760 self._UpdateNodeInstances(node_i, nresult, nimg)
2761 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2762 self._UpdateNodeOS(node_i, nresult, nimg)
2764 if not nimg.os_fail:
2765 if refos_img is None:
2767 self._VerifyNodeOS(node_i, nimg, refos_img)
2768 self._VerifyNodeBridges(node_i, nresult, bridges)
2770 # Check whether all running instancies are primary for the node. (This
2771 # can no longer be done from _VerifyInstance below, since some of the
2772 # wrong instances could be from other node groups.)
2773 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2775 for inst in non_primary_inst:
2776 test = inst in self.all_inst_info
2777 _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2778 "instance should not run on node %s", node_i.name)
2779 _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2780 "node is running unknown instance %s", inst)
2782 for node, result in extra_lv_nvinfo.items():
2783 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2784 node_image[node], vg_name)
2786 feedback_fn("* Verifying instance status")
2787 for instance in self.my_inst_names:
2789 feedback_fn("* Verifying instance %s" % instance)
2790 inst_config = self.my_inst_info[instance]
2791 self._VerifyInstance(instance, inst_config, node_image,
2793 inst_nodes_offline = []
2795 pnode = inst_config.primary_node
2796 pnode_img = node_image[pnode]
2797 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2798 self.ENODERPC, pnode, "instance %s, connection to"
2799 " primary node failed", instance)
2801 _ErrorIf(inst_config.admin_up and pnode_img.offline,
2802 self.EINSTANCEBADNODE, instance,
2803 "instance is marked as running and lives on offline node %s",
2804 inst_config.primary_node)
2806 # If the instance is non-redundant we cannot survive losing its primary
2807 # node, so we are not N+1 compliant. On the other hand we have no disk
2808 # templates with more than one secondary so that situation is not well
2810 # FIXME: does not support file-backed instances
2811 if not inst_config.secondary_nodes:
2812 i_non_redundant.append(instance)
2814 _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2815 instance, "instance has multiple secondary nodes: %s",
2816 utils.CommaJoin(inst_config.secondary_nodes),
2817 code=self.ETYPE_WARNING)
2819 if inst_config.disk_template in constants.DTS_INT_MIRROR:
2820 pnode = inst_config.primary_node
2821 instance_nodes = utils.NiceSort(inst_config.all_nodes)
2822 instance_groups = {}
2824 for node in instance_nodes:
2825 instance_groups.setdefault(self.all_node_info[node].group,
2829 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2830 # Sort so that we always list the primary node first.
2831 for group, nodes in sorted(instance_groups.items(),
2832 key=lambda (_, nodes): pnode in nodes,
2835 self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2836 instance, "instance has primary and secondary nodes in"
2837 " different groups: %s", utils.CommaJoin(pretty_list),
2838 code=self.ETYPE_WARNING)
2840 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2841 i_non_a_balanced.append(instance)
2843 for snode in inst_config.secondary_nodes:
2844 s_img = node_image[snode]
2845 _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2846 "instance %s, connection to secondary node failed", instance)
2849 inst_nodes_offline.append(snode)
2851 # warn that the instance lives on offline nodes
2852 _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2853 "instance has offline secondary node(s) %s",
2854 utils.CommaJoin(inst_nodes_offline))
2855 # ... or ghost/non-vm_capable nodes
2856 for node in inst_config.all_nodes:
2857 _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2858 "instance lives on ghost node %s", node)
2859 _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2860 instance, "instance lives on non-vm_capable node %s", node)
2862 feedback_fn("* Verifying orphan volumes")
2863 reserved = utils.FieldSet(*cluster.reserved_lvs)
2865 # We will get spurious "unknown volume" warnings if any node of this group
2866 # is secondary for an instance whose primary is in another group. To avoid
2867 # them, we find these instances and add their volumes to node_vol_should.
2868 for inst in self.all_inst_info.values():
2869 for secondary in inst.secondary_nodes:
2870 if (secondary in self.my_node_info
2871 and inst.name not in self.my_inst_info):
2872 inst.MapLVsByNode(node_vol_should)
2875 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2877 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2878 feedback_fn("* Verifying N+1 Memory redundancy")
2879 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2881 feedback_fn("* Other Notes")
2883 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
2884 % len(i_non_redundant))
2886 if i_non_a_balanced:
2887 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
2888 % len(i_non_a_balanced))
2891 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
2894 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
2898 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2899 """Analyze the post-hooks' result
2901 This method analyses the hook result, handles it, and sends some
2902 nicely-formatted feedback back to the user.
2904 @param phase: one of L{constants.HOOKS_PHASE_POST} or
2905 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2906 @param hooks_results: the results of the multi-node hooks rpc call
2907 @param feedback_fn: function used send feedback back to the caller
2908 @param lu_result: previous Exec result
2909 @return: the new Exec result, based on the previous result
2913 # We only really run POST phase hooks, only for non-empty groups,
2914 # and are only interested in their results
2915 if not self.my_node_names:
2918 elif phase == constants.HOOKS_PHASE_POST:
2919 # Used to change hooks' output to proper indentation
2920 feedback_fn("* Hooks Results")
2921 assert hooks_results, "invalid result from hooks"
2923 for node_name in hooks_results:
2924 res = hooks_results[node_name]
2926 test = msg and not res.offline
2927 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2928 "Communication failure in hooks execution: %s", msg)
2929 if res.offline or msg:
2930 # No need to investigate payload if node is offline or gave an error.
2931 # override manually lu_result here as _ErrorIf only
2932 # overrides self.bad
2935 for script, hkr, output in res.payload:
2936 test = hkr == constants.HKR_FAIL
2937 self._ErrorIf(test, self.ENODEHOOKS, node_name,
2938 "Script %s failed, output:", script)
2940 output = self._HOOKS_INDENT_RE.sub(" ", output)
2941 feedback_fn("%s" % output)
2947 class LUClusterVerifyDisks(NoHooksLU):
2948 """Verifies the cluster disks status.
2953 def ExpandNames(self):
2954 self.share_locks = _ShareAll()
2955 self.needed_locks = {
2956 locking.LEVEL_NODEGROUP: locking.ALL_SET,
2959 def Exec(self, feedback_fn):
2960 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
2962 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2963 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2964 for group in group_names])
2967 class LUGroupVerifyDisks(NoHooksLU):
2968 """Verifies the status of all disks in a node group.
2973 def ExpandNames(self):
2974 # Raises errors.OpPrereqError on its own if group can't be found
2975 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2977 self.share_locks = _ShareAll()
2978 self.needed_locks = {
2979 locking.LEVEL_INSTANCE: [],
2980 locking.LEVEL_NODEGROUP: [],
2981 locking.LEVEL_NODE: [],
2984 def DeclareLocks(self, level):
2985 if level == locking.LEVEL_INSTANCE:
2986 assert not self.needed_locks[locking.LEVEL_INSTANCE]
2988 # Lock instances optimistically, needs verification once node and group
2989 # locks have been acquired
2990 self.needed_locks[locking.LEVEL_INSTANCE] = \
2991 self.cfg.GetNodeGroupInstances(self.group_uuid)
2993 elif level == locking.LEVEL_NODEGROUP:
2994 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2996 self.needed_locks[locking.LEVEL_NODEGROUP] = \
2997 set([self.group_uuid] +
2998 # Lock all groups used by instances optimistically; this requires
2999 # going via the node before it's locked, requiring verification
3002 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3003 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3005 elif level == locking.LEVEL_NODE:
3006 # This will only lock the nodes in the group to be verified which contain
3008 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3009 self._LockInstancesNodes()
3011 # Lock all nodes in group to be verified
3012 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3013 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3014 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3016 def CheckPrereq(self):
3017 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3018 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3019 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3021 assert self.group_uuid in owned_groups
3023 # Check if locked instances are still correct
3024 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3026 # Get instance information
3027 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3029 # Check if node groups for locked instances are still correct
3030 for (instance_name, inst) in self.instances.items():
3031 assert owned_nodes.issuperset(inst.all_nodes), \
3032 "Instance %s's nodes changed while we kept the lock" % instance_name
3034 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3037 assert self.group_uuid in inst_groups, \
3038 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3040 def Exec(self, feedback_fn):
3041 """Verify integrity of cluster disks.
3043 @rtype: tuple of three items
3044 @return: a tuple of (dict of node-to-node_error, list of instances
3045 which need activate-disks, dict of instance: (node, volume) for
3050 res_instances = set()
3053 nv_dict = _MapInstanceDisksToNodes([inst
3054 for inst in self.instances.values()
3058 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3059 set(self.cfg.GetVmCapableNodeList()))
3061 node_lvs = self.rpc.call_lv_list(nodes, [])
3063 for (node, node_res) in node_lvs.items():
3064 if node_res.offline:
3067 msg = node_res.fail_msg
3069 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3070 res_nodes[node] = msg
3073 for lv_name, (_, _, lv_online) in node_res.payload.items():
3074 inst = nv_dict.pop((node, lv_name), None)
3075 if not (lv_online or inst is None):
3076 res_instances.add(inst)
3078 # any leftover items in nv_dict are missing LVs, let's arrange the data
3080 for key, inst in nv_dict.iteritems():
3081 res_missing.setdefault(inst, []).append(key)
3083 return (res_nodes, list(res_instances), res_missing)
3086 class LUClusterRepairDiskSizes(NoHooksLU):
3087 """Verifies the cluster disks sizes.
3092 def ExpandNames(self):
3093 if self.op.instances:
3094 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3095 self.needed_locks = {
3096 locking.LEVEL_NODE: [],
3097 locking.LEVEL_INSTANCE: self.wanted_names,
3099 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3101 self.wanted_names = None
3102 self.needed_locks = {
3103 locking.LEVEL_NODE: locking.ALL_SET,
3104 locking.LEVEL_INSTANCE: locking.ALL_SET,
3106 self.share_locks = _ShareAll()
3108 def DeclareLocks(self, level):
3109 if level == locking.LEVEL_NODE and self.wanted_names is not None:
3110 self._LockInstancesNodes(primary_only=True)
3112 def CheckPrereq(self):
3113 """Check prerequisites.
3115 This only checks the optional instance list against the existing names.
3118 if self.wanted_names is None:
3119 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3121 self.wanted_instances = \
3122 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3124 def _EnsureChildSizes(self, disk):
3125 """Ensure children of the disk have the needed disk size.
3127 This is valid mainly for DRBD8 and fixes an issue where the
3128 children have smaller disk size.
3130 @param disk: an L{ganeti.objects.Disk} object
3133 if disk.dev_type == constants.LD_DRBD8:
3134 assert disk.children, "Empty children for DRBD8?"
3135 fchild = disk.children[0]
3136 mismatch = fchild.size < disk.size
3138 self.LogInfo("Child disk has size %d, parent %d, fixing",
3139 fchild.size, disk.size)
3140 fchild.size = disk.size
3142 # and we recurse on this child only, not on the metadev
3143 return self._EnsureChildSizes(fchild) or mismatch
3147 def Exec(self, feedback_fn):
3148 """Verify the size of cluster disks.
3151 # TODO: check child disks too
3152 # TODO: check differences in size between primary/secondary nodes
3154 for instance in self.wanted_instances:
3155 pnode = instance.primary_node
3156 if pnode not in per_node_disks:
3157 per_node_disks[pnode] = []
3158 for idx, disk in enumerate(instance.disks):
3159 per_node_disks[pnode].append((instance, idx, disk))
3162 for node, dskl in per_node_disks.items():
3163 newl = [v[2].Copy() for v in dskl]
3165 self.cfg.SetDiskID(dsk, node)
3166 result = self.rpc.call_blockdev_getsize(node, newl)
3168 self.LogWarning("Failure in blockdev_getsize call to node"
3169 " %s, ignoring", node)
3171 if len(result.payload) != len(dskl):
3172 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3173 " result.payload=%s", node, len(dskl), result.payload)
3174 self.LogWarning("Invalid result from node %s, ignoring node results",
3177 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3179 self.LogWarning("Disk %d of instance %s did not return size"
3180 " information, ignoring", idx, instance.name)
3182 if not isinstance(size, (int, long)):
3183 self.LogWarning("Disk %d of instance %s did not return valid"
3184 " size information, ignoring", idx, instance.name)
3187 if size != disk.size:
3188 self.LogInfo("Disk %d of instance %s has mismatched size,"
3189 " correcting: recorded %d, actual %d", idx,
3190 instance.name, disk.size, size)
3192 self.cfg.Update(instance, feedback_fn)
3193 changed.append((instance.name, idx, size))
3194 if self._EnsureChildSizes(disk):
3195 self.cfg.Update(instance, feedback_fn)
3196 changed.append((instance.name, idx, disk.size))
3200 class LUClusterRename(LogicalUnit):
3201 """Rename the cluster.
3204 HPATH = "cluster-rename"
3205 HTYPE = constants.HTYPE_CLUSTER
3207 def BuildHooksEnv(self):
3212 "OP_TARGET": self.cfg.GetClusterName(),
3213 "NEW_NAME": self.op.name,
3216 def BuildHooksNodes(self):
3217 """Build hooks nodes.
3220 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3222 def CheckPrereq(self):
3223 """Verify that the passed name is a valid one.
3226 hostname = netutils.GetHostname(name=self.op.name,
3227 family=self.cfg.GetPrimaryIPFamily())
3229 new_name = hostname.name
3230 self.ip = new_ip = hostname.ip
3231 old_name = self.cfg.GetClusterName()
3232 old_ip = self.cfg.GetMasterIP()
3233 if new_name == old_name and new_ip == old_ip:
3234 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3235 " cluster has changed",
3237 if new_ip != old_ip:
3238 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3239 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3240 " reachable on the network" %
3241 new_ip, errors.ECODE_NOTUNIQUE)
3243 self.op.name = new_name
3245 def Exec(self, feedback_fn):
3246 """Rename the cluster.
3249 clustername = self.op.name
3252 # shutdown the master IP
3253 master = self.cfg.GetMasterNode()
3254 result = self.rpc.call_node_stop_master(master, False)
3255 result.Raise("Could not disable the master role")
3258 cluster = self.cfg.GetClusterInfo()
3259 cluster.cluster_name = clustername
3260 cluster.master_ip = ip
3261 self.cfg.Update(cluster, feedback_fn)
3263 # update the known hosts file
3264 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3265 node_list = self.cfg.GetOnlineNodeList()
3267 node_list.remove(master)
3270 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3272 result = self.rpc.call_node_start_master(master, False, False)
3273 msg = result.fail_msg
3275 self.LogWarning("Could not re-enable the master role on"
3276 " the master, please restart manually: %s", msg)
3281 class LUClusterSetParams(LogicalUnit):
3282 """Change the parameters of the cluster.
3285 HPATH = "cluster-modify"
3286 HTYPE = constants.HTYPE_CLUSTER
3289 def CheckArguments(self):
3293 if self.op.uid_pool:
3294 uidpool.CheckUidPool(self.op.uid_pool)
3296 if self.op.add_uids:
3297 uidpool.CheckUidPool(self.op.add_uids)
3299 if self.op.remove_uids:
3300 uidpool.CheckUidPool(self.op.remove_uids)
3302 def ExpandNames(self):
3303 # FIXME: in the future maybe other cluster params won't require checking on
3304 # all nodes to be modified.
3305 self.needed_locks = {
3306 locking.LEVEL_NODE: locking.ALL_SET,
3308 self.share_locks[locking.LEVEL_NODE] = 1
3310 def BuildHooksEnv(self):
3315 "OP_TARGET": self.cfg.GetClusterName(),
3316 "NEW_VG_NAME": self.op.vg_name,
3319 def BuildHooksNodes(self):
3320 """Build hooks nodes.
3323 mn = self.cfg.GetMasterNode()
3326 def CheckPrereq(self):
3327 """Check prerequisites.
3329 This checks whether the given params don't conflict and
3330 if the given volume group is valid.
3333 if self.op.vg_name is not None and not self.op.vg_name:
3334 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3335 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3336 " instances exist", errors.ECODE_INVAL)
3338 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3339 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3340 raise errors.OpPrereqError("Cannot disable drbd helper while"
3341 " drbd-based instances exist",
3344 node_list = self.owned_locks(locking.LEVEL_NODE)
3346 # if vg_name not None, checks given volume group on all nodes
3348 vglist = self.rpc.call_vg_list(node_list)
3349 for node in node_list:
3350 msg = vglist[node].fail_msg
3352 # ignoring down node
3353 self.LogWarning("Error while gathering data on node %s"
3354 " (ignoring node): %s", node, msg)
3356 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3358 constants.MIN_VG_SIZE)
3360 raise errors.OpPrereqError("Error on node '%s': %s" %
3361 (node, vgstatus), errors.ECODE_ENVIRON)
3363 if self.op.drbd_helper:
3364 # checks given drbd helper on all nodes
3365 helpers = self.rpc.call_drbd_helper(node_list)
3366 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3368 self.LogInfo("Not checking drbd helper on offline node %s", node)
3370 msg = helpers[node].fail_msg
3372 raise errors.OpPrereqError("Error checking drbd helper on node"
3373 " '%s': %s" % (node, msg),
3374 errors.ECODE_ENVIRON)
3375 node_helper = helpers[node].payload
3376 if node_helper != self.op.drbd_helper:
3377 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3378 (node, node_helper), errors.ECODE_ENVIRON)
3380 self.cluster = cluster = self.cfg.GetClusterInfo()
3381 # validate params changes
3382 if self.op.beparams:
3383 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3384 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3386 if self.op.ndparams:
3387 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3388 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3390 # TODO: we need a more general way to handle resetting
3391 # cluster-level parameters to default values
3392 if self.new_ndparams["oob_program"] == "":
3393 self.new_ndparams["oob_program"] = \
3394 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3396 if self.op.nicparams:
3397 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3398 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3399 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3402 # check all instances for consistency
3403 for instance in self.cfg.GetAllInstancesInfo().values():
3404 for nic_idx, nic in enumerate(instance.nics):
3405 params_copy = copy.deepcopy(nic.nicparams)
3406 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3408 # check parameter syntax
3410 objects.NIC.CheckParameterSyntax(params_filled)
3411 except errors.ConfigurationError, err:
3412 nic_errors.append("Instance %s, nic/%d: %s" %
3413 (instance.name, nic_idx, err))
3415 # if we're moving instances to routed, check that they have an ip
3416 target_mode = params_filled[constants.NIC_MODE]
3417 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3418 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3419 " address" % (instance.name, nic_idx))
3421 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3422 "\n".join(nic_errors))
3424 # hypervisor list/parameters
3425 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3426 if self.op.hvparams:
3427 for hv_name, hv_dict in self.op.hvparams.items():
3428 if hv_name not in self.new_hvparams:
3429 self.new_hvparams[hv_name] = hv_dict
3431 self.new_hvparams[hv_name].update(hv_dict)
3433 # os hypervisor parameters
3434 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3436 for os_name, hvs in self.op.os_hvp.items():
3437 if os_name not in self.new_os_hvp:
3438 self.new_os_hvp[os_name] = hvs
3440 for hv_name, hv_dict in hvs.items():
3441 if hv_name not in self.new_os_hvp[os_name]:
3442 self.new_os_hvp[os_name][hv_name] = hv_dict
3444 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3447 self.new_osp = objects.FillDict(cluster.osparams, {})
3448 if self.op.osparams:
3449 for os_name, osp in self.op.osparams.items():
3450 if os_name not in self.new_osp:
3451 self.new_osp[os_name] = {}
3453 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3456 if not self.new_osp[os_name]:
3457 # we removed all parameters
3458 del self.new_osp[os_name]
3460 # check the parameter validity (remote check)
3461 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3462 os_name, self.new_osp[os_name])
3464 # changes to the hypervisor list
3465 if self.op.enabled_hypervisors is not None:
3466 self.hv_list = self.op.enabled_hypervisors
3467 for hv in self.hv_list:
3468 # if the hypervisor doesn't already exist in the cluster
3469 # hvparams, we initialize it to empty, and then (in both
3470 # cases) we make sure to fill the defaults, as we might not
3471 # have a complete defaults list if the hypervisor wasn't
3473 if hv not in new_hvp:
3475 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3476 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3478 self.hv_list = cluster.enabled_hypervisors
3480 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3481 # either the enabled list has changed, or the parameters have, validate
3482 for hv_name, hv_params in self.new_hvparams.items():
3483 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3484 (self.op.enabled_hypervisors and
3485 hv_name in self.op.enabled_hypervisors)):
3486 # either this is a new hypervisor, or its parameters have changed
3487 hv_class = hypervisor.GetHypervisor(hv_name)
3488 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3489 hv_class.CheckParameterSyntax(hv_params)
3490 _CheckHVParams(self, node_list, hv_name, hv_params)
3493 # no need to check any newly-enabled hypervisors, since the
3494 # defaults have already been checked in the above code-block
3495 for os_name, os_hvp in self.new_os_hvp.items():
3496 for hv_name, hv_params in os_hvp.items():
3497 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3498 # we need to fill in the new os_hvp on top of the actual hv_p
3499 cluster_defaults = self.new_hvparams.get(hv_name, {})
3500 new_osp = objects.FillDict(cluster_defaults, hv_params)
3501 hv_class = hypervisor.GetHypervisor(hv_name)
3502 hv_class.CheckParameterSyntax(new_osp)
3503 _CheckHVParams(self, node_list, hv_name, new_osp)
3505 if self.op.default_iallocator:
3506 alloc_script = utils.FindFile(self.op.default_iallocator,
3507 constants.IALLOCATOR_SEARCH_PATH,
3509 if alloc_script is None:
3510 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3511 " specified" % self.op.default_iallocator,
3514 def Exec(self, feedback_fn):
3515 """Change the parameters of the cluster.
3518 if self.op.vg_name is not None:
3519 new_volume = self.op.vg_name
3522 if new_volume != self.cfg.GetVGName():
3523 self.cfg.SetVGName(new_volume)
3525 feedback_fn("Cluster LVM configuration already in desired"
3526 " state, not changing")
3527 if self.op.drbd_helper is not None:
3528 new_helper = self.op.drbd_helper
3531 if new_helper != self.cfg.GetDRBDHelper():
3532 self.cfg.SetDRBDHelper(new_helper)
3534 feedback_fn("Cluster DRBD helper already in desired state,"
3536 if self.op.hvparams:
3537 self.cluster.hvparams = self.new_hvparams
3539 self.cluster.os_hvp = self.new_os_hvp
3540 if self.op.enabled_hypervisors is not None:
3541 self.cluster.hvparams = self.new_hvparams
3542 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3543 if self.op.beparams:
3544 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3545 if self.op.nicparams:
3546 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3547 if self.op.osparams:
3548 self.cluster.osparams = self.new_osp
3549 if self.op.ndparams:
3550 self.cluster.ndparams = self.new_ndparams
3552 if self.op.candidate_pool_size is not None:
3553 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3554 # we need to update the pool size here, otherwise the save will fail
3555 _AdjustCandidatePool(self, [])
3557 if self.op.maintain_node_health is not None:
3558 self.cluster.maintain_node_health = self.op.maintain_node_health
3560 if self.op.prealloc_wipe_disks is not None:
3561 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3563 if self.op.add_uids is not None:
3564 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3566 if self.op.remove_uids is not None:
3567 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3569 if self.op.uid_pool is not None:
3570 self.cluster.uid_pool = self.op.uid_pool
3572 if self.op.default_iallocator is not None:
3573 self.cluster.default_iallocator = self.op.default_iallocator
3575 if self.op.reserved_lvs is not None:
3576 self.cluster.reserved_lvs = self.op.reserved_lvs
3578 def helper_os(aname, mods, desc):
3580 lst = getattr(self.cluster, aname)
3581 for key, val in mods:
3582 if key == constants.DDM_ADD:
3584 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3587 elif key == constants.DDM_REMOVE:
3591 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3593 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3595 if self.op.hidden_os:
3596 helper_os("hidden_os", self.op.hidden_os, "hidden")
3598 if self.op.blacklisted_os:
3599 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3601 if self.op.master_netdev:
3602 master = self.cfg.GetMasterNode()
3603 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3604 self.cluster.master_netdev)
3605 result = self.rpc.call_node_stop_master(master, False)
3606 result.Raise("Could not disable the master ip")
3607 feedback_fn("Changing master_netdev from %s to %s" %
3608 (self.cluster.master_netdev, self.op.master_netdev))
3609 self.cluster.master_netdev = self.op.master_netdev
3611 self.cfg.Update(self.cluster, feedback_fn)
3613 if self.op.master_netdev:
3614 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3615 self.op.master_netdev)
3616 result = self.rpc.call_node_start_master(master, False, False)
3618 self.LogWarning("Could not re-enable the master ip on"
3619 " the master, please restart manually: %s",
3623 def _UploadHelper(lu, nodes, fname):
3624 """Helper for uploading a file and showing warnings.
3627 if os.path.exists(fname):
3628 result = lu.rpc.call_upload_file(nodes, fname)
3629 for to_node, to_result in result.items():
3630 msg = to_result.fail_msg
3632 msg = ("Copy of file %s to node %s failed: %s" %
3633 (fname, to_node, msg))
3634 lu.proc.LogWarning(msg)
3637 def _ComputeAncillaryFiles(cluster, redist):
3638 """Compute files external to Ganeti which need to be consistent.
3640 @type redist: boolean
3641 @param redist: Whether to include files which need to be redistributed
3644 # Compute files for all nodes
3646 constants.SSH_KNOWN_HOSTS_FILE,
3647 constants.CONFD_HMAC_KEY,
3648 constants.CLUSTER_DOMAIN_SECRET_FILE,
3652 files_all.update(constants.ALL_CERT_FILES)
3653 files_all.update(ssconf.SimpleStore().GetFileList())
3655 if cluster.modify_etc_hosts:
3656 files_all.add(constants.ETC_HOSTS)
3658 # Files which must either exist on all nodes or on none
3659 files_all_opt = set([
3660 constants.RAPI_USERS_FILE,
3663 # Files which should only be on master candidates
3666 files_mc.add(constants.CLUSTER_CONF_FILE)
3668 # Files which should only be on VM-capable nodes
3669 files_vm = set(filename
3670 for hv_name in cluster.enabled_hypervisors
3671 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3673 # Filenames must be unique
3674 assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3675 sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3676 "Found file listed in more than one file list"
3678 return (files_all, files_all_opt, files_mc, files_vm)
3681 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3682 """Distribute additional files which are part of the cluster configuration.
3684 ConfigWriter takes care of distributing the config and ssconf files, but
3685 there are more files which should be distributed to all nodes. This function
3686 makes sure those are copied.
3688 @param lu: calling logical unit
3689 @param additional_nodes: list of nodes not in the config to distribute to
3690 @type additional_vm: boolean
3691 @param additional_vm: whether the additional nodes are vm-capable or not
3694 # Gather target nodes
3695 cluster = lu.cfg.GetClusterInfo()
3696 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3698 online_nodes = lu.cfg.GetOnlineNodeList()
3699 vm_nodes = lu.cfg.GetVmCapableNodeList()
3701 if additional_nodes is not None:
3702 online_nodes.extend(additional_nodes)
3704 vm_nodes.extend(additional_nodes)
3706 # Never distribute to master node
3707 for nodelist in [online_nodes, vm_nodes]:
3708 if master_info.name in nodelist:
3709 nodelist.remove(master_info.name)
3712 (files_all, files_all_opt, files_mc, files_vm) = \
3713 _ComputeAncillaryFiles(cluster, True)
3715 # Never re-distribute configuration file from here
3716 assert not (constants.CLUSTER_CONF_FILE in files_all or
3717 constants.CLUSTER_CONF_FILE in files_vm)
3718 assert not files_mc, "Master candidates not handled in this function"
3721 (online_nodes, files_all),
3722 (online_nodes, files_all_opt),
3723 (vm_nodes, files_vm),
3727 for (node_list, files) in filemap:
3729 _UploadHelper(lu, node_list, fname)
3732 class LUClusterRedistConf(NoHooksLU):
3733 """Force the redistribution of cluster configuration.
3735 This is a very simple LU.
3740 def ExpandNames(self):
3741 self.needed_locks = {
3742 locking.LEVEL_NODE: locking.ALL_SET,
3744 self.share_locks[locking.LEVEL_NODE] = 1
3746 def Exec(self, feedback_fn):
3747 """Redistribute the configuration.
3750 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3751 _RedistributeAncillaryFiles(self)
3754 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3755 """Sleep and poll for an instance's disk to sync.
3758 if not instance.disks or disks is not None and not disks:
3761 disks = _ExpandCheckDisks(instance, disks)
3764 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3766 node = instance.primary_node
3769 lu.cfg.SetDiskID(dev, node)
3771 # TODO: Convert to utils.Retry
3774 degr_retries = 10 # in seconds, as we sleep 1 second each time
3778 cumul_degraded = False
3779 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3780 msg = rstats.fail_msg
3782 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3785 raise errors.RemoteError("Can't contact node %s for mirror data,"
3786 " aborting." % node)
3789 rstats = rstats.payload
3791 for i, mstat in enumerate(rstats):
3793 lu.LogWarning("Can't compute data for node %s/%s",
3794 node, disks[i].iv_name)
3797 cumul_degraded = (cumul_degraded or
3798 (mstat.is_degraded and mstat.sync_percent is None))
3799 if mstat.sync_percent is not None:
3801 if mstat.estimated_time is not None:
3802 rem_time = ("%s remaining (estimated)" %
3803 utils.FormatSeconds(mstat.estimated_time))
3804 max_time = mstat.estimated_time
3806 rem_time = "no time estimate"
3807 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3808 (disks[i].iv_name, mstat.sync_percent, rem_time))
3810 # if we're done but degraded, let's do a few small retries, to
3811 # make sure we see a stable and not transient situation; therefore
3812 # we force restart of the loop
3813 if (done or oneshot) and cumul_degraded and degr_retries > 0:
3814 logging.info("Degraded disks found, %d retries left", degr_retries)
3822 time.sleep(min(60, max_time))
3825 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3826 return not cumul_degraded
3829 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3830 """Check that mirrors are not degraded.
3832 The ldisk parameter, if True, will change the test from the
3833 is_degraded attribute (which represents overall non-ok status for
3834 the device(s)) to the ldisk (representing the local storage status).
3837 lu.cfg.SetDiskID(dev, node)
3841 if on_primary or dev.AssembleOnSecondary():
3842 rstats = lu.rpc.call_blockdev_find(node, dev)
3843 msg = rstats.fail_msg
3845 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3847 elif not rstats.payload:
3848 lu.LogWarning("Can't find disk on node %s", node)
3852 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3854 result = result and not rstats.payload.is_degraded
3857 for child in dev.children:
3858 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3863 class LUOobCommand(NoHooksLU):
3864 """Logical unit for OOB handling.
3868 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3870 def ExpandNames(self):
3871 """Gather locks we need.
3874 if self.op.node_names:
3875 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3876 lock_names = self.op.node_names
3878 lock_names = locking.ALL_SET
3880 self.needed_locks = {
3881 locking.LEVEL_NODE: lock_names,
3884 def CheckPrereq(self):
3885 """Check prerequisites.
3888 - the node exists in the configuration
3891 Any errors are signaled by raising errors.OpPrereqError.
3895 self.master_node = self.cfg.GetMasterNode()
3897 assert self.op.power_delay >= 0.0
3899 if self.op.node_names:
3900 if (self.op.command in self._SKIP_MASTER and
3901 self.master_node in self.op.node_names):
3902 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3903 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3905 if master_oob_handler:
3906 additional_text = ("run '%s %s %s' if you want to operate on the"
3907 " master regardless") % (master_oob_handler,
3911 additional_text = "it does not support out-of-band operations"
3913 raise errors.OpPrereqError(("Operating on the master node %s is not"
3914 " allowed for %s; %s") %
3915 (self.master_node, self.op.command,
3916 additional_text), errors.ECODE_INVAL)
3918 self.op.node_names = self.cfg.GetNodeList()
3919 if self.op.command in self._SKIP_MASTER:
3920 self.op.node_names.remove(self.master_node)
3922 if self.op.command in self._SKIP_MASTER:
3923 assert self.master_node not in self.op.node_names
3925 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3927 raise errors.OpPrereqError("Node %s not found" % node_name,
3930 self.nodes.append(node)
3932 if (not self.op.ignore_status and
3933 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3934 raise errors.OpPrereqError(("Cannot power off node %s because it is"
3935 " not marked offline") % node_name,
3938 def Exec(self, feedback_fn):
3939 """Execute OOB and return result if we expect any.
3942 master_node = self.master_node
3945 for idx, node in enumerate(utils.NiceSort(self.nodes,
3946 key=lambda node: node.name)):
3947 node_entry = [(constants.RS_NORMAL, node.name)]
3948 ret.append(node_entry)
3950 oob_program = _SupportsOob(self.cfg, node)
3953 node_entry.append((constants.RS_UNAVAIL, None))
3956 logging.info("Executing out-of-band command '%s' using '%s' on %s",
3957 self.op.command, oob_program, node.name)
3958 result = self.rpc.call_run_oob(master_node, oob_program,
3959 self.op.command, node.name,
3963 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3964 node.name, result.fail_msg)
3965 node_entry.append((constants.RS_NODATA, None))
3968 self._CheckPayload(result)
3969 except errors.OpExecError, err:
3970 self.LogWarning("Payload returned by node '%s' is not valid: %s",
3972 node_entry.append((constants.RS_NODATA, None))
3974 if self.op.command == constants.OOB_HEALTH:
3975 # For health we should log important events
3976 for item, status in result.payload:
3977 if status in [constants.OOB_STATUS_WARNING,
3978 constants.OOB_STATUS_CRITICAL]:
3979 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3980 item, node.name, status)
3982 if self.op.command == constants.OOB_POWER_ON:
3984 elif self.op.command == constants.OOB_POWER_OFF:
3985 node.powered = False
3986 elif self.op.command == constants.OOB_POWER_STATUS:
3987 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3988 if powered != node.powered:
3989 logging.warning(("Recorded power state (%s) of node '%s' does not"
3990 " match actual power state (%s)"), node.powered,
3993 # For configuration changing commands we should update the node
3994 if self.op.command in (constants.OOB_POWER_ON,
3995 constants.OOB_POWER_OFF):
3996 self.cfg.Update(node, feedback_fn)
3998 node_entry.append((constants.RS_NORMAL, result.payload))
4000 if (self.op.command == constants.OOB_POWER_ON and
4001 idx < len(self.nodes) - 1):
4002 time.sleep(self.op.power_delay)
4006 def _CheckPayload(self, result):
4007 """Checks if the payload is valid.
4009 @param result: RPC result
4010 @raises errors.OpExecError: If payload is not valid
4014 if self.op.command == constants.OOB_HEALTH:
4015 if not isinstance(result.payload, list):
4016 errs.append("command 'health' is expected to return a list but got %s" %
4017 type(result.payload))
4019 for item, status in result.payload:
4020 if status not in constants.OOB_STATUSES:
4021 errs.append("health item '%s' has invalid status '%s'" %
4024 if self.op.command == constants.OOB_POWER_STATUS:
4025 if not isinstance(result.payload, dict):
4026 errs.append("power-status is expected to return a dict but got %s" %
4027 type(result.payload))
4029 if self.op.command in [
4030 constants.OOB_POWER_ON,
4031 constants.OOB_POWER_OFF,
4032 constants.OOB_POWER_CYCLE,
4034 if result.payload is not None:
4035 errs.append("%s is expected to not return payload but got '%s'" %
4036 (self.op.command, result.payload))
4039 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4040 utils.CommaJoin(errs))
4042 class _OsQuery(_QueryBase):
4043 FIELDS = query.OS_FIELDS
4045 def ExpandNames(self, lu):
4046 # Lock all nodes in shared mode
4047 # Temporary removal of locks, should be reverted later
4048 # TODO: reintroduce locks when they are lighter-weight
4049 lu.needed_locks = {}
4050 #self.share_locks[locking.LEVEL_NODE] = 1
4051 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4053 # The following variables interact with _QueryBase._GetNames
4055 self.wanted = self.names
4057 self.wanted = locking.ALL_SET
4059 self.do_locking = self.use_locking
4061 def DeclareLocks(self, lu, level):
4065 def _DiagnoseByOS(rlist):
4066 """Remaps a per-node return list into an a per-os per-node dictionary
4068 @param rlist: a map with node names as keys and OS objects as values
4071 @return: a dictionary with osnames as keys and as value another
4072 map, with nodes as keys and tuples of (path, status, diagnose,
4073 variants, parameters, api_versions) as values, eg::
4075 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4076 (/srv/..., False, "invalid api")],
4077 "node2": [(/srv/..., True, "", [], [])]}
4082 # we build here the list of nodes that didn't fail the RPC (at RPC
4083 # level), so that nodes with a non-responding node daemon don't
4084 # make all OSes invalid
4085 good_nodes = [node_name for node_name in rlist
4086 if not rlist[node_name].fail_msg]
4087 for node_name, nr in rlist.items():
4088 if nr.fail_msg or not nr.payload:
4090 for (name, path, status, diagnose, variants,
4091 params, api_versions) in nr.payload:
4092 if name not in all_os:
4093 # build a list of nodes for this os containing empty lists
4094 # for each node in node_list
4096 for nname in good_nodes:
4097 all_os[name][nname] = []
4098 # convert params from [name, help] to (name, help)
4099 params = [tuple(v) for v in params]
4100 all_os[name][node_name].append((path, status, diagnose,
4101 variants, params, api_versions))
4104 def _GetQueryData(self, lu):
4105 """Computes the list of nodes and their attributes.
4108 # Locking is not used
4109 assert not (compat.any(lu.glm.is_owned(level)
4110 for level in locking.LEVELS
4111 if level != locking.LEVEL_CLUSTER) or
4112 self.do_locking or self.use_locking)
4114 valid_nodes = [node.name
4115 for node in lu.cfg.GetAllNodesInfo().values()
4116 if not node.offline and node.vm_capable]
4117 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4118 cluster = lu.cfg.GetClusterInfo()
4122 for (os_name, os_data) in pol.items():
4123 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4124 hidden=(os_name in cluster.hidden_os),
4125 blacklisted=(os_name in cluster.blacklisted_os))
4129 api_versions = set()
4131 for idx, osl in enumerate(os_data.values()):
4132 info.valid = bool(info.valid and osl and osl[0][1])
4136 (node_variants, node_params, node_api) = osl[0][3:6]
4139 variants.update(node_variants)
4140 parameters.update(node_params)
4141 api_versions.update(node_api)
4143 # Filter out inconsistent values
4144 variants.intersection_update(node_variants)
4145 parameters.intersection_update(node_params)
4146 api_versions.intersection_update(node_api)
4148 info.variants = list(variants)
4149 info.parameters = list(parameters)
4150 info.api_versions = list(api_versions)
4152 data[os_name] = info
4154 # Prepare data in requested order
4155 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4159 class LUOsDiagnose(NoHooksLU):
4160 """Logical unit for OS diagnose/query.
4166 def _BuildFilter(fields, names):
4167 """Builds a filter for querying OSes.
4170 name_filter = qlang.MakeSimpleFilter("name", names)
4172 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4173 # respective field is not requested
4174 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4175 for fname in ["hidden", "blacklisted"]
4176 if fname not in fields]
4177 if "valid" not in fields:
4178 status_filter.append([qlang.OP_TRUE, "valid"])
4181 status_filter.insert(0, qlang.OP_AND)
4183 status_filter = None
4185 if name_filter and status_filter:
4186 return [qlang.OP_AND, name_filter, status_filter]
4190 return status_filter
4192 def CheckArguments(self):
4193 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4194 self.op.output_fields, False)
4196 def ExpandNames(self):
4197 self.oq.ExpandNames(self)
4199 def Exec(self, feedback_fn):
4200 return self.oq.OldStyleQuery(self)
4203 class LUNodeRemove(LogicalUnit):
4204 """Logical unit for removing a node.
4207 HPATH = "node-remove"
4208 HTYPE = constants.HTYPE_NODE
4210 def BuildHooksEnv(self):
4213 This doesn't run on the target node in the pre phase as a failed
4214 node would then be impossible to remove.
4218 "OP_TARGET": self.op.node_name,
4219 "NODE_NAME": self.op.node_name,
4222 def BuildHooksNodes(self):
4223 """Build hooks nodes.
4226 all_nodes = self.cfg.GetNodeList()
4228 all_nodes.remove(self.op.node_name)
4230 logging.warning("Node '%s', which is about to be removed, was not found"
4231 " in the list of all nodes", self.op.node_name)
4232 return (all_nodes, all_nodes)
4234 def CheckPrereq(self):
4235 """Check prerequisites.
4238 - the node exists in the configuration
4239 - it does not have primary or secondary instances
4240 - it's not the master
4242 Any errors are signaled by raising errors.OpPrereqError.
4245 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4246 node = self.cfg.GetNodeInfo(self.op.node_name)
4247 assert node is not None
4249 masternode = self.cfg.GetMasterNode()
4250 if node.name == masternode:
4251 raise errors.OpPrereqError("Node is the master node, failover to another"
4252 " node is required", errors.ECODE_INVAL)
4254 for instance_name, instance in self.cfg.GetAllInstancesInfo():
4255 if node.name in instance.all_nodes:
4256 raise errors.OpPrereqError("Instance %s is still running on the node,"
4257 " please remove first" % instance_name,
4259 self.op.node_name = node.name
4262 def Exec(self, feedback_fn):
4263 """Removes the node from the cluster.
4267 logging.info("Stopping the node daemon and removing configs from node %s",
4270 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4272 # Promote nodes to master candidate as needed
4273 _AdjustCandidatePool(self, exceptions=[node.name])
4274 self.context.RemoveNode(node.name)
4276 # Run post hooks on the node before it's removed
4277 _RunPostHook(self, node.name)
4279 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4280 msg = result.fail_msg
4282 self.LogWarning("Errors encountered on the remote node while leaving"
4283 " the cluster: %s", msg)
4285 # Remove node from our /etc/hosts
4286 if self.cfg.GetClusterInfo().modify_etc_hosts:
4287 master_node = self.cfg.GetMasterNode()
4288 result = self.rpc.call_etc_hosts_modify(master_node,
4289 constants.ETC_HOSTS_REMOVE,
4291 result.Raise("Can't update hosts file with new host data")
4292 _RedistributeAncillaryFiles(self)
4295 class _NodeQuery(_QueryBase):
4296 FIELDS = query.NODE_FIELDS
4298 def ExpandNames(self, lu):
4299 lu.needed_locks = {}
4300 lu.share_locks[locking.LEVEL_NODE] = 1
4303 self.wanted = _GetWantedNodes(lu, self.names)
4305 self.wanted = locking.ALL_SET
4307 self.do_locking = (self.use_locking and
4308 query.NQ_LIVE in self.requested_data)
4311 # if we don't request only static fields, we need to lock the nodes
4312 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4314 def DeclareLocks(self, lu, level):
4317 def _GetQueryData(self, lu):
4318 """Computes the list of nodes and their attributes.
4321 all_info = lu.cfg.GetAllNodesInfo()
4323 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4325 # Gather data as requested
4326 if query.NQ_LIVE in self.requested_data:
4327 # filter out non-vm_capable nodes
4328 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4330 node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4331 lu.cfg.GetHypervisorType())
4332 live_data = dict((name, nresult.payload)
4333 for (name, nresult) in node_data.items()
4334 if not nresult.fail_msg and nresult.payload)
4338 if query.NQ_INST in self.requested_data:
4339 node_to_primary = dict([(name, set()) for name in nodenames])
4340 node_to_secondary = dict([(name, set()) for name in nodenames])
4342 inst_data = lu.cfg.GetAllInstancesInfo()
4344 for inst in inst_data.values():
4345 if inst.primary_node in node_to_primary:
4346 node_to_primary[inst.primary_node].add(inst.name)
4347 for secnode in inst.secondary_nodes:
4348 if secnode in node_to_secondary:
4349 node_to_secondary[secnode].add(inst.name)
4351 node_to_primary = None
4352 node_to_secondary = None
4354 if query.NQ_OOB in self.requested_data:
4355 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4356 for name, node in all_info.iteritems())
4360 if query.NQ_GROUP in self.requested_data:
4361 groups = lu.cfg.GetAllNodeGroupsInfo()
4365 return query.NodeQueryData([all_info[name] for name in nodenames],
4366 live_data, lu.cfg.GetMasterNode(),
4367 node_to_primary, node_to_secondary, groups,
4368 oob_support, lu.cfg.GetClusterInfo())
4371 class LUNodeQuery(NoHooksLU):
4372 """Logical unit for querying nodes.
4375 # pylint: disable-msg=W0142
4378 def CheckArguments(self):
4379 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4380 self.op.output_fields, self.op.use_locking)
4382 def ExpandNames(self):
4383 self.nq.ExpandNames(self)
4385 def Exec(self, feedback_fn):
4386 return self.nq.OldStyleQuery(self)
4389 class LUNodeQueryvols(NoHooksLU):
4390 """Logical unit for getting volumes on node(s).
4394 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4395 _FIELDS_STATIC = utils.FieldSet("node")
4397 def CheckArguments(self):
4398 _CheckOutputFields(static=self._FIELDS_STATIC,
4399 dynamic=self._FIELDS_DYNAMIC,
4400 selected=self.op.output_fields)
4402 def ExpandNames(self):
4403 self.needed_locks = {}
4404 self.share_locks[locking.LEVEL_NODE] = 1
4405 if not self.op.nodes:
4406 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4408 self.needed_locks[locking.LEVEL_NODE] = \
4409 _GetWantedNodes(self, self.op.nodes)
4411 def Exec(self, feedback_fn):
4412 """Computes the list of nodes and their attributes.
4415 nodenames = self.owned_locks(locking.LEVEL_NODE)
4416 volumes = self.rpc.call_node_volumes(nodenames)
4418 ilist = self.cfg.GetAllInstancesInfo()
4419 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4422 for node in nodenames:
4423 nresult = volumes[node]
4426 msg = nresult.fail_msg
4428 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4431 node_vols = sorted(nresult.payload,
4432 key=operator.itemgetter("dev"))
4434 for vol in node_vols:
4436 for field in self.op.output_fields:
4439 elif field == "phys":
4443 elif field == "name":
4445 elif field == "size":
4446 val = int(float(vol["size"]))
4447 elif field == "instance":
4448 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4450 raise errors.ParameterError(field)
4451 node_output.append(str(val))
4453 output.append(node_output)
4458 class LUNodeQueryStorage(NoHooksLU):
4459 """Logical unit for getting information on storage units on node(s).
4462 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4465 def CheckArguments(self):
4466 _CheckOutputFields(static=self._FIELDS_STATIC,
4467 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4468 selected=self.op.output_fields)
4470 def ExpandNames(self):
4471 self.needed_locks = {}
4472 self.share_locks[locking.LEVEL_NODE] = 1
4475 self.needed_locks[locking.LEVEL_NODE] = \
4476 _GetWantedNodes(self, self.op.nodes)
4478 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4480 def Exec(self, feedback_fn):
4481 """Computes the list of nodes and their attributes.
4484 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4486 # Always get name to sort by
4487 if constants.SF_NAME in self.op.output_fields:
4488 fields = self.op.output_fields[:]
4490 fields = [constants.SF_NAME] + self.op.output_fields
4492 # Never ask for node or type as it's only known to the LU
4493 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4494 while extra in fields:
4495 fields.remove(extra)
4497 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4498 name_idx = field_idx[constants.SF_NAME]
4500 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4501 data = self.rpc.call_storage_list(self.nodes,
4502 self.op.storage_type, st_args,
4503 self.op.name, fields)
4507 for node in utils.NiceSort(self.nodes):
4508 nresult = data[node]
4512 msg = nresult.fail_msg
4514 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4517 rows = dict([(row[name_idx], row) for row in nresult.payload])
4519 for name in utils.NiceSort(rows.keys()):
4524 for field in self.op.output_fields:
4525 if field == constants.SF_NODE:
4527 elif field == constants.SF_TYPE:
4528 val = self.op.storage_type
4529 elif field in field_idx:
4530 val = row[field_idx[field]]
4532 raise errors.ParameterError(field)
4541 class _InstanceQuery(_QueryBase):
4542 FIELDS = query.INSTANCE_FIELDS
4544 def ExpandNames(self, lu):
4545 lu.needed_locks = {}
4546 lu.share_locks = _ShareAll()
4549 self.wanted = _GetWantedInstances(lu, self.names)
4551 self.wanted = locking.ALL_SET
4553 self.do_locking = (self.use_locking and
4554 query.IQ_LIVE in self.requested_data)
4556 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4557 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4558 lu.needed_locks[locking.LEVEL_NODE] = []
4559 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4561 self.do_grouplocks = (self.do_locking and
4562 query.IQ_NODES in self.requested_data)
4564 def DeclareLocks(self, lu, level):
4566 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4567 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4569 # Lock all groups used by instances optimistically; this requires going
4570 # via the node before it's locked, requiring verification later on
4571 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4573 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4574 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4575 elif level == locking.LEVEL_NODE:
4576 lu._LockInstancesNodes() # pylint: disable-msg=W0212
4579 def _CheckGroupLocks(lu):
4580 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4581 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4583 # Check if node groups for locked instances are still correct
4584 for instance_name in owned_instances:
4585 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4587 def _GetQueryData(self, lu):
4588 """Computes the list of instances and their attributes.
4591 if self.do_grouplocks:
4592 self._CheckGroupLocks(lu)
4594 cluster = lu.cfg.GetClusterInfo()
4595 all_info = lu.cfg.GetAllInstancesInfo()
4597 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4599 instance_list = [all_info[name] for name in instance_names]
4600 nodes = frozenset(itertools.chain(*(inst.all_nodes
4601 for inst in instance_list)))
4602 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4605 wrongnode_inst = set()
4607 # Gather data as requested
4608 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4610 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4612 result = node_data[name]
4614 # offline nodes will be in both lists
4615 assert result.fail_msg
4616 offline_nodes.append(name)
4618 bad_nodes.append(name)
4619 elif result.payload:
4620 for inst in result.payload:
4621 if inst in all_info:
4622 if all_info[inst].primary_node == name:
4623 live_data.update(result.payload)
4625 wrongnode_inst.add(inst)
4627 # orphan instance; we don't list it here as we don't
4628 # handle this case yet in the output of instance listing
4629 logging.warning("Orphan instance '%s' found on node %s",
4631 # else no instance is alive
4635 if query.IQ_DISKUSAGE in self.requested_data:
4636 disk_usage = dict((inst.name,
4637 _ComputeDiskSize(inst.disk_template,
4638 [{constants.IDISK_SIZE: disk.size}
4639 for disk in inst.disks]))
4640 for inst in instance_list)
4644 if query.IQ_CONSOLE in self.requested_data:
4646 for inst in instance_list:
4647 if inst.name in live_data:
4648 # Instance is running
4649 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4651 consinfo[inst.name] = None
4652 assert set(consinfo.keys()) == set(instance_names)
4656 if query.IQ_NODES in self.requested_data:
4657 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4659 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4660 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4661 for uuid in set(map(operator.attrgetter("group"),
4667 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4668 disk_usage, offline_nodes, bad_nodes,
4669 live_data, wrongnode_inst, consinfo,
4673 class LUQuery(NoHooksLU):
4674 """Query for resources/items of a certain kind.
4677 # pylint: disable-msg=W0142
4680 def CheckArguments(self):
4681 qcls = _GetQueryImplementation(self.op.what)
4683 self.impl = qcls(self.op.filter, self.op.fields, False)
4685 def ExpandNames(self):
4686 self.impl.ExpandNames(self)
4688 def DeclareLocks(self, level):
4689 self.impl.DeclareLocks(self, level)
4691 def Exec(self, feedback_fn):
4692 return self.impl.NewStyleQuery(self)
4695 class LUQueryFields(NoHooksLU):
4696 """Query for resources/items of a certain kind.
4699 # pylint: disable-msg=W0142
4702 def CheckArguments(self):
4703 self.qcls = _GetQueryImplementation(self.op.what)
4705 def ExpandNames(self):
4706 self.needed_locks = {}
4708 def Exec(self, feedback_fn):
4709 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4712 class LUNodeModifyStorage(NoHooksLU):
4713 """Logical unit for modifying a storage volume on a node.
4718 def CheckArguments(self):
4719 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4721 storage_type = self.op.storage_type
4724 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4726 raise errors.OpPrereqError("Storage units of type '%s' can not be"
4727 " modified" % storage_type,
4730 diff = set(self.op.changes.keys()) - modifiable
4732 raise errors.OpPrereqError("The following fields can not be modified for"
4733 " storage units of type '%s': %r" %
4734 (storage_type, list(diff)),
4737 def ExpandNames(self):
4738 self.needed_locks = {
4739 locking.LEVEL_NODE: self.op.node_name,
4742 def Exec(self, feedback_fn):
4743 """Computes the list of nodes and their attributes.
4746 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4747 result = self.rpc.call_storage_modify(self.op.node_name,
4748 self.op.storage_type, st_args,
4749 self.op.name, self.op.changes)
4750 result.Raise("Failed to modify storage unit '%s' on %s" %
4751 (self.op.name, self.op.node_name))
4754 class LUNodeAdd(LogicalUnit):
4755 """Logical unit for adding node to the cluster.
4759 HTYPE = constants.HTYPE_NODE
4760 _NFLAGS = ["master_capable", "vm_capable"]
4762 def CheckArguments(self):
4763 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4764 # validate/normalize the node name
4765 self.hostname = netutils.GetHostname(name=self.op.node_name,
4766 family=self.primary_ip_family)
4767 self.op.node_name = self.hostname.name
4769 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4770 raise errors.OpPrereqError("Cannot readd the master node",
4773 if self.op.readd and self.op.group:
4774 raise errors.OpPrereqError("Cannot pass a node group when a node is"
4775 " being readded", errors.ECODE_INVAL)
4777 def BuildHooksEnv(self):
4780 This will run on all nodes before, and on all nodes + the new node after.
4784 "OP_TARGET": self.op.node_name,
4785 "NODE_NAME": self.op.node_name,
4786 "NODE_PIP": self.op.primary_ip,
4787 "NODE_SIP": self.op.secondary_ip,
4788 "MASTER_CAPABLE": str(self.op.master_capable),
4789 "VM_CAPABLE": str(self.op.vm_capable),
4792 def BuildHooksNodes(self):
4793 """Build hooks nodes.
4796 # Exclude added node
4797 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4798 post_nodes = pre_nodes + [self.op.node_name, ]
4800 return (pre_nodes, post_nodes)
4802 def CheckPrereq(self):
4803 """Check prerequisites.
4806 - the new node is not already in the config
4808 - its parameters (single/dual homed) matches the cluster
4810 Any errors are signaled by raising errors.OpPrereqError.
4814 hostname = self.hostname
4815 node = hostname.name
4816 primary_ip = self.op.primary_ip = hostname.ip
4817 if self.op.secondary_ip is None:
4818 if self.primary_ip_family == netutils.IP6Address.family:
4819 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4820 " IPv4 address must be given as secondary",
4822 self.op.secondary_ip = primary_ip
4824 secondary_ip = self.op.secondary_ip
4825 if not netutils.IP4Address.IsValid(secondary_ip):
4826 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4827 " address" % secondary_ip, errors.ECODE_INVAL)
4829 node_list = cfg.GetNodeList()
4830 if not self.op.readd and node in node_list:
4831 raise errors.OpPrereqError("Node %s is already in the configuration" %
4832 node, errors.ECODE_EXISTS)
4833 elif self.op.readd and node not in node_list:
4834 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4837 self.changed_primary_ip = False
4839 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4840 if self.op.readd and node == existing_node_name:
4841 if existing_node.secondary_ip != secondary_ip:
4842 raise errors.OpPrereqError("Readded node doesn't have the same IP"
4843 " address configuration as before",
4845 if existing_node.primary_ip != primary_ip:
4846 self.changed_primary_ip = True
4850 if (existing_node.primary_ip == primary_ip or
4851 existing_node.secondary_ip == primary_ip or
4852 existing_node.primary_ip == secondary_ip or
4853 existing_node.secondary_ip == secondary_ip):
4854 raise errors.OpPrereqError("New node ip address(es) conflict with"
4855 " existing node %s" % existing_node.name,
4856 errors.ECODE_NOTUNIQUE)
4858 # After this 'if' block, None is no longer a valid value for the
4859 # _capable op attributes
4861 old_node = self.cfg.GetNodeInfo(node)
4862 assert old_node is not None, "Can't retrieve locked node %s" % node
4863 for attr in self._NFLAGS:
4864 if getattr(self.op, attr) is None:
4865 setattr(self.op, attr, getattr(old_node, attr))
4867 for attr in self._NFLAGS:
4868 if getattr(self.op, attr) is None:
4869 setattr(self.op, attr, True)
4871 if self.op.readd and not self.op.vm_capable:
4872 pri, sec = cfg.GetNodeInstances(node)
4874 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4875 " flag set to false, but it already holds"
4876 " instances" % node,
4879 # check that the type of the node (single versus dual homed) is the
4880 # same as for the master
4881 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4882 master_singlehomed = myself.secondary_ip == myself.primary_ip
4883 newbie_singlehomed = secondary_ip == primary_ip
4884 if master_singlehomed != newbie_singlehomed:
4885 if master_singlehomed:
4886 raise errors.OpPrereqError("The master has no secondary ip but the"
4887 " new node has one",
4890 raise errors.OpPrereqError("The master has a secondary ip but the"
4891 " new node doesn't have one",
4894 # checks reachability
4895 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4896 raise errors.OpPrereqError("Node not reachable by ping",
4897 errors.ECODE_ENVIRON)
4899 if not newbie_singlehomed:
4900 # check reachability from my secondary ip to newbie's secondary ip
4901 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4902 source=myself.secondary_ip):
4903 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4904 " based ping to node daemon port",
4905 errors.ECODE_ENVIRON)
4912 if self.op.master_capable:
4913 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4915 self.master_candidate = False
4918 self.new_node = old_node
4920 node_group = cfg.LookupNodeGroup(self.op.group)
4921 self.new_node = objects.Node(name=node,
4922 primary_ip=primary_ip,
4923 secondary_ip=secondary_ip,
4924 master_candidate=self.master_candidate,
4925 offline=False, drained=False,
4928 if self.op.ndparams:
4929 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4931 def Exec(self, feedback_fn):
4932 """Adds the new node to the cluster.
4935 new_node = self.new_node
4936 node = new_node.name
4938 # We adding a new node so we assume it's powered
4939 new_node.powered = True
4941 # for re-adds, reset the offline/drained/master-candidate flags;
4942 # we need to reset here, otherwise offline would prevent RPC calls
4943 # later in the procedure; this also means that if the re-add
4944 # fails, we are left with a non-offlined, broken node
4946 new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4947 self.LogInfo("Readding a node, the offline/drained flags were reset")
4948 # if we demote the node, we do cleanup later in the procedure
4949 new_node.master_candidate = self.master_candidate
4950 if self.changed_primary_ip:
4951 new_node.primary_ip = self.op.primary_ip
4953 # copy the master/vm_capable flags
4954 for attr in self._NFLAGS:
4955 setattr(new_node, attr, getattr(self.op, attr))
4957 # notify the user about any possible mc promotion
4958 if new_node.master_candidate:
4959 self.LogInfo("Node will be a master candidate")
4961 if self.op.ndparams:
4962 new_node.ndparams = self.op.ndparams
4964 new_node.ndparams = {}
4966 # check connectivity
4967 result = self.rpc.call_version([node])[node]
4968 result.Raise("Can't get version information from node %s" % node)
4969 if constants.PROTOCOL_VERSION == result.payload:
4970 logging.info("Communication to node %s fine, sw version %s match",
4971 node, result.payload)
4973 raise errors.OpExecError("Version mismatch master version %s,"
4974 " node version %s" %
4975 (constants.PROTOCOL_VERSION, result.payload))
4977 # Add node to our /etc/hosts, and add key to known_hosts
4978 if self.cfg.GetClusterInfo().modify_etc_hosts:
4979 master_node = self.cfg.GetMasterNode()
4980 result = self.rpc.call_etc_hosts_modify(master_node,
4981 constants.ETC_HOSTS_ADD,
4984 result.Raise("Can't update hosts file with new host data")
4986 if new_node.secondary_ip != new_node.primary_ip:
4987 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4990 node_verify_list = [self.cfg.GetMasterNode()]
4991 node_verify_param = {
4992 constants.NV_NODELIST: [node],
4993 # TODO: do a node-net-test as well?
4996 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4997 self.cfg.GetClusterName())
4998 for verifier in node_verify_list:
4999 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5000 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5002 for failed in nl_payload:
5003 feedback_fn("ssh/hostname verification failed"
5004 " (checking from %s): %s" %
5005 (verifier, nl_payload[failed]))
5006 raise errors.OpExecError("ssh/hostname verification failed")
5009 _RedistributeAncillaryFiles(self)
5010 self.context.ReaddNode(new_node)
5011 # make sure we redistribute the config
5012 self.cfg.Update(new_node, feedback_fn)
5013 # and make sure the new node will not have old files around
5014 if not new_node.master_candidate:
5015 result = self.rpc.call_node_demote_from_mc(new_node.name)
5016 msg = result.fail_msg
5018 self.LogWarning("Node failed to demote itself from master"
5019 " candidate status: %s" % msg)
5021 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5022 additional_vm=self.op.vm_capable)
5023 self.context.AddNode(new_node, self.proc.GetECId())
5026 class LUNodeSetParams(LogicalUnit):
5027 """Modifies the parameters of a node.
5029 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5030 to the node role (as _ROLE_*)
5031 @cvar _R2F: a dictionary from node role to tuples of flags
5032 @cvar _FLAGS: a list of attribute names corresponding to the flags
5035 HPATH = "node-modify"
5036 HTYPE = constants.HTYPE_NODE
5038 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5040 (True, False, False): _ROLE_CANDIDATE,
5041 (False, True, False): _ROLE_DRAINED,
5042 (False, False, True): _ROLE_OFFLINE,
5043 (False, False, False): _ROLE_REGULAR,
5045 _R2F = dict((v, k) for k, v in _F2R.items())
5046 _FLAGS = ["master_candidate", "drained", "offline"]
5048 def CheckArguments(self):
5049 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5050 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5051 self.op.master_capable, self.op.vm_capable,
5052 self.op.secondary_ip, self.op.ndparams]
5053 if all_mods.count(None) == len(all_mods):
5054 raise errors.OpPrereqError("Please pass at least one modification",
5056 if all_mods.count(True) > 1:
5057 raise errors.OpPrereqError("Can't set the node into more than one"
5058 " state at the same time",
5061 # Boolean value that tells us whether we might be demoting from MC
5062 self.might_demote = (self.op.master_candidate == False or
5063 self.op.offline == True or
5064 self.op.drained == True or
5065 self.op.master_capable == False)
5067 if self.op.secondary_ip:
5068 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5069 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5070 " address" % self.op.secondary_ip,
5073 self.lock_all = self.op.auto_promote and self.might_demote
5074 self.lock_instances = self.op.secondary_ip is not None
5076 def ExpandNames(self):
5078 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5080 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5082 if self.lock_instances:
5083 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5085 def DeclareLocks(self, level):
5086 # If we have locked all instances, before waiting to lock nodes, release
5087 # all the ones living on nodes unrelated to the current operation.
5088 if level == locking.LEVEL_NODE and self.lock_instances:
5089 self.affected_instances = []
5090 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5093 # Build list of instances to release
5094 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5095 for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5096 if (instance.disk_template in constants.DTS_INT_MIRROR and
5097 self.op.node_name in instance.all_nodes):
5098 instances_keep.append(instance_name)
5099 self.affected_instances.append(instance)
5101 _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5103 assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5104 set(instances_keep))
5106 def BuildHooksEnv(self):
5109 This runs on the master node.
5113 "OP_TARGET": self.op.node_name,
5114 "MASTER_CANDIDATE": str(self.op.master_candidate),
5115 "OFFLINE": str(self.op.offline),
5116 "DRAINED": str(self.op.drained),
5117 "MASTER_CAPABLE": str(self.op.master_capable),
5118 "VM_CAPABLE": str(self.op.vm_capable),
5121 def BuildHooksNodes(self):
5122 """Build hooks nodes.
5125 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5128 def CheckPrereq(self):
5129 """Check prerequisites.
5131 This only checks the instance list against the existing names.
5134 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5136 if (self.op.master_candidate is not None or
5137 self.op.drained is not None or
5138 self.op.offline is not None):
5139 # we can't change the master's node flags
5140 if self.op.node_name == self.cfg.GetMasterNode():
5141 raise errors.OpPrereqError("The master role can be changed"
5142 " only via master-failover",
5145 if self.op.master_candidate and not node.master_capable:
5146 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5147 " it a master candidate" % node.name,
5150 if self.op.vm_capable == False:
5151 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5153 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5154 " the vm_capable flag" % node.name,
5157 if node.master_candidate and self.might_demote and not self.lock_all:
5158 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5159 # check if after removing the current node, we're missing master
5161 (mc_remaining, mc_should, _) = \
5162 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5163 if mc_remaining < mc_should:
5164 raise errors.OpPrereqError("Not enough master candidates, please"
5165 " pass auto promote option to allow"
5166 " promotion", errors.ECODE_STATE)
5168 self.old_flags = old_flags = (node.master_candidate,
5169 node.drained, node.offline)
5170 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5171 self.old_role = old_role = self._F2R[old_flags]
5173 # Check for ineffective changes
5174 for attr in self._FLAGS:
5175 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5176 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5177 setattr(self.op, attr, None)
5179 # Past this point, any flag change to False means a transition
5180 # away from the respective state, as only real changes are kept
5182 # TODO: We might query the real power state if it supports OOB
5183 if _SupportsOob(self.cfg, node):
5184 if self.op.offline is False and not (node.powered or
5185 self.op.powered == True):
5186 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5187 " offline status can be reset") %
5189 elif self.op.powered is not None:
5190 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5191 " as it does not support out-of-band"
5192 " handling") % self.op.node_name)
5194 # If we're being deofflined/drained, we'll MC ourself if needed
5195 if (self.op.drained == False or self.op.offline == False or
5196 (self.op.master_capable and not node.master_capable)):
5197 if _DecideSelfPromotion(self):
5198 self.op.master_candidate = True
5199 self.LogInfo("Auto-promoting node to master candidate")
5201 # If we're no longer master capable, we'll demote ourselves from MC
5202 if self.op.master_capable == False and node.master_candidate:
5203 self.LogInfo("Demoting from master candidate")
5204 self.op.master_candidate = False
5207 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5208 if self.op.master_candidate:
5209 new_role = self._ROLE_CANDIDATE
5210 elif self.op.drained:
5211 new_role = self._ROLE_DRAINED
5212 elif self.op.offline:
5213 new_role = self._ROLE_OFFLINE
5214 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5215 # False is still in new flags, which means we're un-setting (the
5217 new_role = self._ROLE_REGULAR
5218 else: # no new flags, nothing, keep old role
5221 self.new_role = new_role
5223 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5224 # Trying to transition out of offline status
5225 result = self.rpc.call_version([node.name])[node.name]
5227 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5228 " to report its version: %s" %
5229 (node.name, result.fail_msg),
5232 self.LogWarning("Transitioning node from offline to online state"
5233 " without using re-add. Please make sure the node"
5236 if self.op.secondary_ip:
5237 # Ok even without locking, because this can't be changed by any LU
5238 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5239 master_singlehomed = master.secondary_ip == master.primary_ip
5240 if master_singlehomed and self.op.secondary_ip:
5241 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5242 " homed cluster", errors.ECODE_INVAL)
5245 if self.affected_instances:
5246 raise errors.OpPrereqError("Cannot change secondary ip: offline"
5247 " node has instances (%s) configured"
5248 " to use it" % self.affected_instances)
5250 # On online nodes, check that no instances are running, and that
5251 # the node has the new ip and we can reach it.
5252 for instance in self.affected_instances:
5253 _CheckInstanceDown(self, instance, "cannot change secondary ip")
5255 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5256 if master.name != node.name:
5257 # check reachability from master secondary ip to new secondary ip
5258 if not netutils.TcpPing(self.op.secondary_ip,
5259 constants.DEFAULT_NODED_PORT,
5260 source=master.secondary_ip):
5261 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5262 " based ping to node daemon port",
5263 errors.ECODE_ENVIRON)
5265 if self.op.ndparams:
5266 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5267 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5268 self.new_ndparams = new_ndparams
5270 def Exec(self, feedback_fn):
5275 old_role = self.old_role
5276 new_role = self.new_role
5280 if self.op.ndparams:
5281 node.ndparams = self.new_ndparams
5283 if self.op.powered is not None:
5284 node.powered = self.op.powered
5286 for attr in ["master_capable", "vm_capable"]:
5287 val = getattr(self.op, attr)
5289 setattr(node, attr, val)
5290 result.append((attr, str(val)))
5292 if new_role != old_role:
5293 # Tell the node to demote itself, if no longer MC and not offline
5294 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5295 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5297 self.LogWarning("Node failed to demote itself: %s", msg)
5299 new_flags = self._R2F[new_role]
5300 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5302 result.append((desc, str(nf)))
5303 (node.master_candidate, node.drained, node.offline) = new_flags
5305 # we locked all nodes, we adjust the CP before updating this node
5307 _AdjustCandidatePool(self, [node.name])
5309 if self.op.secondary_ip:
5310 node.secondary_ip = self.op.secondary_ip
5311 result.append(("secondary_ip", self.op.secondary_ip))
5313 # this will trigger configuration file update, if needed
5314 self.cfg.Update(node, feedback_fn)
5316 # this will trigger job queue propagation or cleanup if the mc
5318 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5319 self.context.ReaddNode(node)
5324 class LUNodePowercycle(NoHooksLU):
5325 """Powercycles a node.
5330 def CheckArguments(self):
5331 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5332 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5333 raise errors.OpPrereqError("The node is the master and the force"
5334 " parameter was not set",
5337 def ExpandNames(self):
5338 """Locking for PowercycleNode.
5340 This is a last-resort option and shouldn't block on other
5341 jobs. Therefore, we grab no locks.
5344 self.needed_locks = {}
5346 def Exec(self, feedback_fn):
5350 result = self.rpc.call_node_powercycle(self.op.node_name,
5351 self.cfg.GetHypervisorType())
5352 result.Raise("Failed to schedule the reboot")
5353 return result.payload
5356 class LUClusterQuery(NoHooksLU):
5357 """Query cluster configuration.
5362 def ExpandNames(self):
5363 self.needed_locks = {}
5365 def Exec(self, feedback_fn):
5366 """Return cluster config.
5369 cluster = self.cfg.GetClusterInfo()
5372 # Filter just for enabled hypervisors
5373 for os_name, hv_dict in cluster.os_hvp.items():
5374 os_hvp[os_name] = {}
5375 for hv_name, hv_params in hv_dict.items():
5376 if hv_name in cluster.enabled_hypervisors:
5377 os_hvp[os_name][hv_name] = hv_params
5379 # Convert ip_family to ip_version
5380 primary_ip_version = constants.IP4_VERSION
5381 if cluster.primary_ip_family == netutils.IP6Address.family:
5382 primary_ip_version = constants.IP6_VERSION
5385 "software_version": constants.RELEASE_VERSION,
5386 "protocol_version": constants.PROTOCOL_VERSION,
5387 "config_version": constants.CONFIG_VERSION,
5388 "os_api_version": max(constants.OS_API_VERSIONS),
5389 "export_version": constants.EXPORT_VERSION,
5390 "architecture": (platform.architecture()[0], platform.machine()),
5391 "name": cluster.cluster_name,
5392 "master": cluster.master_node,
5393 "default_hypervisor": cluster.enabled_hypervisors[0],
5394 "enabled_hypervisors": cluster.enabled_hypervisors,
5395 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5396 for hypervisor_name in cluster.enabled_hypervisors]),
5398 "beparams": cluster.beparams,
5399 "osparams": cluster.osparams,
5400 "nicparams": cluster.nicparams,
5401 "ndparams": cluster.ndparams,
5402 "candidate_pool_size": cluster.candidate_pool_size,
5403 "master_netdev": cluster.master_netdev,
5404 "volume_group_name": cluster.volume_group_name,
5405 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5406 "file_storage_dir": cluster.file_storage_dir,
5407 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5408 "maintain_node_health": cluster.maintain_node_health,
5409 "ctime": cluster.ctime,
5410 "mtime": cluster.mtime,
5411 "uuid": cluster.uuid,
5412 "tags": list(cluster.GetTags()),
5413 "uid_pool": cluster.uid_pool,
5414 "default_iallocator": cluster.default_iallocator,
5415 "reserved_lvs": cluster.reserved_lvs,
5416 "primary_ip_version": primary_ip_version,
5417 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5418 "hidden_os": cluster.hidden_os,
5419 "blacklisted_os": cluster.blacklisted_os,
5425 class LUClusterConfigQuery(NoHooksLU):
5426 """Return configuration values.
5430 _FIELDS_DYNAMIC = utils.FieldSet()
5431 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5432 "watcher_pause", "volume_group_name")
5434 def CheckArguments(self):
5435 _CheckOutputFields(static=self._FIELDS_STATIC,
5436 dynamic=self._FIELDS_DYNAMIC,
5437 selected=self.op.output_fields)
5439 def ExpandNames(self):
5440 self.needed_locks = {}
5442 def Exec(self, feedback_fn):
5443 """Dump a representation of the cluster config to the standard output.
5447 for field in self.op.output_fields:
5448 if field == "cluster_name":
5449 entry = self.cfg.GetClusterName()
5450 elif field == "master_node":
5451 entry = self.cfg.GetMasterNode()
5452 elif field == "drain_flag":
5453 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5454 elif field == "watcher_pause":
5455 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5456 elif field == "volume_group_name":
5457 entry = self.cfg.GetVGName()
5459 raise errors.ParameterError(field)
5460 values.append(entry)
5464 class LUInstanceActivateDisks(NoHooksLU):
5465 """Bring up an instance's disks.
5470 def ExpandNames(self):
5471 self._ExpandAndLockInstance()
5472 self.needed_locks[locking.LEVEL_NODE] = []
5473 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5475 def DeclareLocks(self, level):
5476 if level == locking.LEVEL_NODE:
5477 self._LockInstancesNodes()
5479 def CheckPrereq(self):
5480 """Check prerequisites.
5482 This checks that the instance is in the cluster.
5485 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5486 assert self.instance is not None, \
5487 "Cannot retrieve locked instance %s" % self.op.instance_name
5488 _CheckNodeOnline(self, self.instance.primary_node)
5490 def Exec(self, feedback_fn):
5491 """Activate the disks.
5494 disks_ok, disks_info = \
5495 _AssembleInstanceDisks(self, self.instance,
5496 ignore_size=self.op.ignore_size)
5498 raise errors.OpExecError("Cannot activate block devices")
5503 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5505 """Prepare the block devices for an instance.
5507 This sets up the block devices on all nodes.
5509 @type lu: L{LogicalUnit}
5510 @param lu: the logical unit on whose behalf we execute
5511 @type instance: L{objects.Instance}
5512 @param instance: the instance for whose disks we assemble
5513 @type disks: list of L{objects.Disk} or None
5514 @param disks: which disks to assemble (or all, if None)
5515 @type ignore_secondaries: boolean
5516 @param ignore_secondaries: if true, errors on secondary nodes
5517 won't result in an error return from the function
5518 @type ignore_size: boolean
5519 @param ignore_size: if true, the current known size of the disk
5520 will not be used during the disk activation, useful for cases
5521 when the size is wrong
5522 @return: False if the operation failed, otherwise a list of
5523 (host, instance_visible_name, node_visible_name)
5524 with the mapping from node devices to instance devices
5529 iname = instance.name
5530 disks = _ExpandCheckDisks(instance, disks)
5532 # With the two passes mechanism we try to reduce the window of
5533 # opportunity for the race condition of switching DRBD to primary
5534 # before handshaking occured, but we do not eliminate it
5536 # The proper fix would be to wait (with some limits) until the
5537 # connection has been made and drbd transitions from WFConnection
5538 # into any other network-connected state (Connected, SyncTarget,
5541 # 1st pass, assemble on all nodes in secondary mode
5542 for idx, inst_disk in enumerate(disks):
5543 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5545 node_disk = node_disk.Copy()
5546 node_disk.UnsetSize()
5547 lu.cfg.SetDiskID(node_disk, node)
5548 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5549 msg = result.fail_msg
5551 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5552 " (is_primary=False, pass=1): %s",
5553 inst_disk.iv_name, node, msg)
5554 if not ignore_secondaries:
5557 # FIXME: race condition on drbd migration to primary
5559 # 2nd pass, do only the primary node
5560 for idx, inst_disk in enumerate(disks):
5563 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5564 if node != instance.primary_node:
5567 node_disk = node_disk.Copy()
5568 node_disk.UnsetSize()
5569 lu.cfg.SetDiskID(node_disk, node)
5570 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5571 msg = result.fail_msg
5573 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5574 " (is_primary=True, pass=2): %s",
5575 inst_disk.iv_name, node, msg)
5578 dev_path = result.payload
5580 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5582 # leave the disks configured for the primary node
5583 # this is a workaround that would be fixed better by
5584 # improving the logical/physical id handling
5586 lu.cfg.SetDiskID(disk, instance.primary_node)
5588 return disks_ok, device_info
5591 def _StartInstanceDisks(lu, instance, force):
5592 """Start the disks of an instance.
5595 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5596 ignore_secondaries=force)
5598 _ShutdownInstanceDisks(lu, instance)
5599 if force is not None and not force:
5600 lu.proc.LogWarning("", hint="If the message above refers to a"
5602 " you can retry the operation using '--force'.")
5603 raise errors.OpExecError("Disk consistency error")
5606 class LUInstanceDeactivateDisks(NoHooksLU):
5607 """Shutdown an instance's disks.
5612 def ExpandNames(self):
5613 self._ExpandAndLockInstance()
5614 self.needed_locks[locking.LEVEL_NODE] = []
5615 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5617 def DeclareLocks(self, level):
5618 if level == locking.LEVEL_NODE:
5619 self._LockInstancesNodes()
5621 def CheckPrereq(self):
5622 """Check prerequisites.
5624 This checks that the instance is in the cluster.
5627 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5628 assert self.instance is not None, \
5629 "Cannot retrieve locked instance %s" % self.op.instance_name
5631 def Exec(self, feedback_fn):
5632 """Deactivate the disks
5635 instance = self.instance
5637 _ShutdownInstanceDisks(self, instance)
5639 _SafeShutdownInstanceDisks(self, instance)
5642 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5643 """Shutdown block devices of an instance.
5645 This function checks if an instance is running, before calling
5646 _ShutdownInstanceDisks.
5649 _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5650 _ShutdownInstanceDisks(lu, instance, disks=disks)
5653 def _ExpandCheckDisks(instance, disks):
5654 """Return the instance disks selected by the disks list
5656 @type disks: list of L{objects.Disk} or None
5657 @param disks: selected disks
5658 @rtype: list of L{objects.Disk}
5659 @return: selected instance disks to act on
5663 return instance.disks
5665 if not set(disks).issubset(instance.disks):
5666 raise errors.ProgrammerError("Can only act on disks belonging to the"
5671 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5672 """Shutdown block devices of an instance.
5674 This does the shutdown on all nodes of the instance.
5676 If the ignore_primary is false, errors on the primary node are
5681 disks = _ExpandCheckDisks(instance, disks)
5684 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5685 lu.cfg.SetDiskID(top_disk, node)
5686 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5687 msg = result.fail_msg
5689 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5690 disk.iv_name, node, msg)
5691 if ((node == instance.primary_node and not ignore_primary) or
5692 (node != instance.primary_node and not result.offline)):
5697 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5698 """Checks if a node has enough free memory.
5700 This function check if a given node has the needed amount of free
5701 memory. In case the node has less memory or we cannot get the
5702 information from the node, this function raise an OpPrereqError
5705 @type lu: C{LogicalUnit}
5706 @param lu: a logical unit from which we get configuration data
5708 @param node: the node to check
5709 @type reason: C{str}
5710 @param reason: string to use in the error message
5711 @type requested: C{int}
5712 @param requested: the amount of memory in MiB to check for
5713 @type hypervisor_name: C{str}
5714 @param hypervisor_name: the hypervisor to ask for memory stats
5715 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5716 we cannot check the node
5719 nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5720 nodeinfo[node].Raise("Can't get data from node %s" % node,
5721 prereq=True, ecode=errors.ECODE_ENVIRON)
5722 free_mem = nodeinfo[node].payload.get("memory_free", None)
5723 if not isinstance(free_mem, int):
5724 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5725 " was '%s'" % (node, free_mem),
5726 errors.ECODE_ENVIRON)
5727 if requested > free_mem:
5728 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5729 " needed %s MiB, available %s MiB" %
5730 (node, reason, requested, free_mem),
5734 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5735 """Checks if nodes have enough free disk space in the all VGs.
5737 This function check if all given nodes have the needed amount of
5738 free disk. In case any node has less disk or we cannot get the
5739 information from the node, this function raise an OpPrereqError
5742 @type lu: C{LogicalUnit}
5743 @param lu: a logical unit from which we get configuration data
5744 @type nodenames: C{list}
5745 @param nodenames: the list of node names to check
5746 @type req_sizes: C{dict}
5747 @param req_sizes: the hash of vg and corresponding amount of disk in
5749 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5750 or we cannot check the node
5753 for vg, req_size in req_sizes.items():
5754 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5757 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5758 """Checks if nodes have enough free disk space in the specified VG.
5760 This function check if all given nodes have the needed amount of
5761 free disk. In case any node has less disk or we cannot get the
5762 information from the node, this function raise an OpPrereqError
5765 @type lu: C{LogicalUnit}
5766 @param lu: a logical unit from which we get configuration data
5767 @type nodenames: C{list}
5768 @param nodenames: the list of node names to check
5770 @param vg: the volume group to check
5771 @type requested: C{int}
5772 @param requested: the amount of disk in MiB to check for
5773 @raise errors.OpPrereqError: if the node doesn't have enough disk,
5774 or we cannot check the node
5777 nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5778 for node in nodenames:
5779 info = nodeinfo[node]
5780 info.Raise("Cannot get current information from node %s" % node,
5781 prereq=True, ecode=errors.ECODE_ENVIRON)
5782 vg_free = info.payload.get("vg_free", None)
5783 if not isinstance(vg_free, int):
5784 raise errors.OpPrereqError("Can't compute free disk space on node"
5785 " %s for vg %s, result was '%s'" %
5786 (node, vg, vg_free), errors.ECODE_ENVIRON)
5787 if requested > vg_free:
5788 raise errors.OpPrereqError("Not enough disk space on target node %s"
5789 " vg %s: required %d MiB, available %d MiB" %
5790 (node, vg, requested, vg_free),
5794 class LUInstanceStartup(LogicalUnit):
5795 """Starts an instance.
5798 HPATH = "instance-start"
5799 HTYPE = constants.HTYPE_INSTANCE
5802 def CheckArguments(self):
5804 if self.op.beparams:
5805 # fill the beparams dict
5806 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5808 def ExpandNames(self):
5809 self._ExpandAndLockInstance()
5811 def BuildHooksEnv(self):
5814 This runs on master, primary and secondary nodes of the instance.
5818 "FORCE": self.op.force,
5821 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5825 def BuildHooksNodes(self):
5826 """Build hooks nodes.
5829 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5832 def CheckPrereq(self):
5833 """Check prerequisites.
5835 This checks that the instance is in the cluster.
5838 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5839 assert self.instance is not None, \
5840 "Cannot retrieve locked instance %s" % self.op.instance_name
5843 if self.op.hvparams:
5844 # check hypervisor parameter syntax (locally)
5845 cluster = self.cfg.GetClusterInfo()
5846 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5847 filled_hvp = cluster.FillHV(instance)
5848 filled_hvp.update(self.op.hvparams)
5849 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5850 hv_type.CheckParameterSyntax(filled_hvp)
5851 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5853 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5855 if self.primary_offline and self.op.ignore_offline_nodes:
5856 self.proc.LogWarning("Ignoring offline primary node")
5858 if self.op.hvparams or self.op.beparams:
5859 self.proc.LogWarning("Overridden parameters are ignored")
5861 _CheckNodeOnline(self, instance.primary_node)
5863 bep = self.cfg.GetClusterInfo().FillBE(instance)
5865 # check bridges existence
5866 _CheckInstanceBridgesExist(self, instance)
5868 remote_info = self.rpc.call_instance_info(instance.primary_node,
5870 instance.hypervisor)
5871 remote_info.Raise("Error checking node %s" % instance.primary_node,
5872 prereq=True, ecode=errors.ECODE_ENVIRON)
5873 if not remote_info.payload: # not running already
5874 _CheckNodeFreeMemory(self, instance.primary_node,
5875 "starting instance %s" % instance.name,
5876 bep[constants.BE_MEMORY], instance.hypervisor)
5878 def Exec(self, feedback_fn):
5879 """Start the instance.
5882 instance = self.instance
5883 force = self.op.force
5885 if not self.op.no_remember:
5886 self.cfg.MarkInstanceUp(instance.name)
5888 if self.primary_offline:
5889 assert self.op.ignore_offline_nodes
5890 self.proc.LogInfo("Primary node offline, marked instance as started")
5892 node_current = instance.primary_node
5894 _StartInstanceDisks(self, instance, force)
5896 result = self.rpc.call_instance_start(node_current, instance,
5897 self.op.hvparams, self.op.beparams,
5898 self.op.startup_paused)
5899 msg = result.fail_msg
5901 _ShutdownInstanceDisks(self, instance)
5902 raise errors.OpExecError("Could not start instance: %s" % msg)
5905 class LUInstanceReboot(LogicalUnit):
5906 """Reboot an instance.
5909 HPATH = "instance-reboot"
5910 HTYPE = constants.HTYPE_INSTANCE
5913 def ExpandNames(self):
5914 self._ExpandAndLockInstance()
5916 def BuildHooksEnv(self):
5919 This runs on master, primary and secondary nodes of the instance.
5923 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5924 "REBOOT_TYPE": self.op.reboot_type,
5925 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5928 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5932 def BuildHooksNodes(self):
5933 """Build hooks nodes.
5936 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5939 def CheckPrereq(self):
5940 """Check prerequisites.
5942 This checks that the instance is in the cluster.
5945 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5946 assert self.instance is not None, \
5947 "Cannot retrieve locked instance %s" % self.op.instance_name
5949 _CheckNodeOnline(self, instance.primary_node)
5951 # check bridges existence
5952 _CheckInstanceBridgesExist(self, instance)
5954 def Exec(self, feedback_fn):
5955 """Reboot the instance.
5958 instance = self.instance
5959 ignore_secondaries = self.op.ignore_secondaries
5960 reboot_type = self.op.reboot_type
5962 remote_info = self.rpc.call_instance_info(instance.primary_node,
5964 instance.hypervisor)
5965 remote_info.Raise("Error checking node %s" % instance.primary_node)
5966 instance_running = bool(remote_info.payload)
5968 node_current = instance.primary_node
5970 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5971 constants.INSTANCE_REBOOT_HARD]:
5972 for disk in instance.disks:
5973 self.cfg.SetDiskID(disk, node_current)
5974 result = self.rpc.call_instance_reboot(node_current, instance,
5976 self.op.shutdown_timeout)
5977 result.Raise("Could not reboot instance")
5979 if instance_running:
5980 result = self.rpc.call_instance_shutdown(node_current, instance,
5981 self.op.shutdown_timeout)
5982 result.Raise("Could not shutdown instance for full reboot")
5983 _ShutdownInstanceDisks(self, instance)
5985 self.LogInfo("Instance %s was already stopped, starting now",
5987 _StartInstanceDisks(self, instance, ignore_secondaries)
5988 result = self.rpc.call_instance_start(node_current, instance,
5990 msg = result.fail_msg
5992 _ShutdownInstanceDisks(self, instance)
5993 raise errors.OpExecError("Could not start instance for"
5994 " full reboot: %s" % msg)
5996 self.cfg.MarkInstanceUp(instance.name)
5999 class LUInstanceShutdown(LogicalUnit):
6000 """Shutdown an instance.
6003 HPATH = "instance-stop"
6004 HTYPE = constants.HTYPE_INSTANCE
6007 def ExpandNames(self):
6008 self._ExpandAndLockInstance()
6010 def BuildHooksEnv(self):
6013 This runs on master, primary and secondary nodes of the instance.
6016 env = _BuildInstanceHookEnvByObject(self, self.instance)
6017 env["TIMEOUT"] = self.op.timeout
6020 def BuildHooksNodes(self):
6021 """Build hooks nodes.
6024 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6027 def CheckPrereq(self):
6028 """Check prerequisites.
6030 This checks that the instance is in the cluster.
6033 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6034 assert self.instance is not None, \
6035 "Cannot retrieve locked instance %s" % self.op.instance_name
6037 self.primary_offline = \
6038 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6040 if self.primary_offline and self.op.ignore_offline_nodes:
6041 self.proc.LogWarning("Ignoring offline primary node")
6043 _CheckNodeOnline(self, self.instance.primary_node)
6045 def Exec(self, feedback_fn):
6046 """Shutdown the instance.
6049 instance = self.instance
6050 node_current = instance.primary_node
6051 timeout = self.op.timeout
6053 if not self.op.no_remember:
6054 self.cfg.MarkInstanceDown(instance.name)
6056 if self.primary_offline:
6057 assert self.op.ignore_offline_nodes
6058 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6060 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6061 msg = result.fail_msg
6063 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6065 _ShutdownInstanceDisks(self, instance)
6068 class LUInstanceReinstall(LogicalUnit):
6069 """Reinstall an instance.
6072 HPATH = "instance-reinstall"
6073 HTYPE = constants.HTYPE_INSTANCE
6076 def ExpandNames(self):
6077 self._ExpandAndLockInstance()
6079 def BuildHooksEnv(self):
6082 This runs on master, primary and secondary nodes of the instance.
6085 return _BuildInstanceHookEnvByObject(self, self.instance)
6087 def BuildHooksNodes(self):
6088 """Build hooks nodes.
6091 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6094 def CheckPrereq(self):
6095 """Check prerequisites.
6097 This checks that the instance is in the cluster and is not running.
6100 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6101 assert instance is not None, \
6102 "Cannot retrieve locked instance %s" % self.op.instance_name
6103 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6104 " offline, cannot reinstall")
6105 for node in instance.secondary_nodes:
6106 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6107 " cannot reinstall")
6109 if instance.disk_template == constants.DT_DISKLESS:
6110 raise errors.OpPrereqError("Instance '%s' has no disks" %
6111 self.op.instance_name,
6113 _CheckInstanceDown(self, instance, "cannot reinstall")
6115 if self.op.os_type is not None:
6117 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6118 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6119 instance_os = self.op.os_type
6121 instance_os = instance.os
6123 nodelist = list(instance.all_nodes)
6125 if self.op.osparams:
6126 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6127 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6128 self.os_inst = i_osdict # the new dict (without defaults)
6132 self.instance = instance
6134 def Exec(self, feedback_fn):
6135 """Reinstall the instance.
6138 inst = self.instance
6140 if self.op.os_type is not None:
6141 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6142 inst.os = self.op.os_type
6143 # Write to configuration
6144 self.cfg.Update(inst, feedback_fn)
6146 _StartInstanceDisks(self, inst, None)
6148 feedback_fn("Running the instance OS create scripts...")
6149 # FIXME: pass debug option from opcode to backend
6150 result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6151 self.op.debug_level,
6152 osparams=self.os_inst)
6153 result.Raise("Could not install OS for instance %s on node %s" %
6154 (inst.name, inst.primary_node))
6156 _ShutdownInstanceDisks(self, inst)
6159 class LUInstanceRecreateDisks(LogicalUnit):
6160 """Recreate an instance's missing disks.
6163 HPATH = "instance-recreate-disks"
6164 HTYPE = constants.HTYPE_INSTANCE
6167 def CheckArguments(self):
6168 # normalise the disk list
6169 self.op.disks = sorted(frozenset(self.op.disks))
6171 def ExpandNames(self):
6172 self._ExpandAndLockInstance()
6173 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6175 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6176 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6178 self.needed_locks[locking.LEVEL_NODE] = []
6180 def DeclareLocks(self, level):
6181 if level == locking.LEVEL_NODE:
6182 # if we replace the nodes, we only need to lock the old primary,
6183 # otherwise we need to lock all nodes for disk re-creation
6184 primary_only = bool(self.op.nodes)
6185 self._LockInstancesNodes(primary_only=primary_only)
6187 def BuildHooksEnv(self):
6190 This runs on master, primary and secondary nodes of the instance.
6193 return _BuildInstanceHookEnvByObject(self, self.instance)
6195 def BuildHooksNodes(self):
6196 """Build hooks nodes.
6199 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6202 def CheckPrereq(self):
6203 """Check prerequisites.
6205 This checks that the instance is in the cluster and is not running.
6208 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6209 assert instance is not None, \
6210 "Cannot retrieve locked instance %s" % self.op.instance_name
6212 if len(self.op.nodes) != len(instance.all_nodes):
6213 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6214 " %d replacement nodes were specified" %
6215 (instance.name, len(instance.all_nodes),
6216 len(self.op.nodes)),
6218 assert instance.disk_template != constants.DT_DRBD8 or \
6219 len(self.op.nodes) == 2
6220 assert instance.disk_template != constants.DT_PLAIN or \
6221 len(self.op.nodes) == 1
6222 primary_node = self.op.nodes[0]
6224 primary_node = instance.primary_node
6225 _CheckNodeOnline(self, primary_node)
6227 if instance.disk_template == constants.DT_DISKLESS:
6228 raise errors.OpPrereqError("Instance '%s' has no disks" %
6229 self.op.instance_name, errors.ECODE_INVAL)
6230 # if we replace nodes *and* the old primary is offline, we don't
6232 assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6233 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6234 if not (self.op.nodes and old_pnode.offline):
6235 _CheckInstanceDown(self, instance, "cannot recreate disks")
6237 if not self.op.disks:
6238 self.op.disks = range(len(instance.disks))
6240 for idx in self.op.disks:
6241 if idx >= len(instance.disks):
6242 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6244 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6245 raise errors.OpPrereqError("Can't recreate disks partially and"
6246 " change the nodes at the same time",
6248 self.instance = instance
6250 def Exec(self, feedback_fn):
6251 """Recreate the disks.
6254 instance = self.instance
6257 mods = [] # keeps track of needed logical_id changes
6259 for idx, disk in enumerate(instance.disks):
6260 if idx not in self.op.disks: # disk idx has not been passed in
6263 # update secondaries for disks, if needed
6265 if disk.dev_type == constants.LD_DRBD8:
6266 # need to update the nodes and minors
6267 assert len(self.op.nodes) == 2
6268 assert len(disk.logical_id) == 6 # otherwise disk internals
6270 (_, _, old_port, _, _, old_secret) = disk.logical_id
6271 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6272 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6273 new_minors[0], new_minors[1], old_secret)
6274 assert len(disk.logical_id) == len(new_id)
6275 mods.append((idx, new_id))
6277 # now that we have passed all asserts above, we can apply the mods
6278 # in a single run (to avoid partial changes)
6279 for idx, new_id in mods:
6280 instance.disks[idx].logical_id = new_id
6282 # change primary node, if needed
6284 instance.primary_node = self.op.nodes[0]
6285 self.LogWarning("Changing the instance's nodes, you will have to"
6286 " remove any disks left on the older nodes manually")
6289 self.cfg.Update(instance, feedback_fn)
6291 _CreateDisks(self, instance, to_skip=to_skip)
6294 class LUInstanceRename(LogicalUnit):
6295 """Rename an instance.
6298 HPATH = "instance-rename"
6299 HTYPE = constants.HTYPE_INSTANCE
6301 def CheckArguments(self):
6305 if self.op.ip_check and not self.op.name_check:
6306 # TODO: make the ip check more flexible and not depend on the name check
6307 raise errors.OpPrereqError("IP address check requires a name check",
6310 def BuildHooksEnv(self):
6313 This runs on master, primary and secondary nodes of the instance.
6316 env = _BuildInstanceHookEnvByObject(self, self.instance)
6317 env["INSTANCE_NEW_NAME"] = self.op.new_name
6320 def BuildHooksNodes(self):
6321 """Build hooks nodes.
6324 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6327 def CheckPrereq(self):
6328 """Check prerequisites.
6330 This checks that the instance is in the cluster and is not running.
6333 self.op.instance_name = _ExpandInstanceName(self.cfg,
6334 self.op.instance_name)
6335 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6336 assert instance is not None
6337 _CheckNodeOnline(self, instance.primary_node)
6338 _CheckInstanceDown(self, instance, "cannot rename")
6339 self.instance = instance
6341 new_name = self.op.new_name
6342 if self.op.name_check:
6343 hostname = netutils.GetHostname(name=new_name)
6344 if hostname != new_name:
6345 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6347 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6348 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6349 " same as given hostname '%s'") %
6350 (hostname.name, self.op.new_name),
6352 new_name = self.op.new_name = hostname.name
6353 if (self.op.ip_check and
6354 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6355 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6356 (hostname.ip, new_name),
6357 errors.ECODE_NOTUNIQUE)
6359 instance_list = self.cfg.GetInstanceList()
6360 if new_name in instance_list and new_name != instance.name:
6361 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6362 new_name, errors.ECODE_EXISTS)
6364 def Exec(self, feedback_fn):
6365 """Rename the instance.
6368 inst = self.instance
6369 old_name = inst.name
6371 rename_file_storage = False
6372 if (inst.disk_template in constants.DTS_FILEBASED and
6373 self.op.new_name != inst.name):
6374 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6375 rename_file_storage = True
6377 self.cfg.RenameInstance(inst.name, self.op.new_name)
6378 # Change the instance lock. This is definitely safe while we hold the BGL.
6379 # Otherwise the new lock would have to be added in acquired mode.
6381 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6382 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6384 # re-read the instance from the configuration after rename
6385 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6387 if rename_file_storage:
6388 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6389 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6390 old_file_storage_dir,
6391 new_file_storage_dir)
6392 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6393 " (but the instance has been renamed in Ganeti)" %
6394 (inst.primary_node, old_file_storage_dir,
6395 new_file_storage_dir))
6397 _StartInstanceDisks(self, inst, None)
6399 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6400 old_name, self.op.debug_level)
6401 msg = result.fail_msg
6403 msg = ("Could not run OS rename script for instance %s on node %s"
6404 " (but the instance has been renamed in Ganeti): %s" %
6405 (inst.name, inst.primary_node, msg))
6406 self.proc.LogWarning(msg)
6408 _ShutdownInstanceDisks(self, inst)
6413 class LUInstanceRemove(LogicalUnit):
6414 """Remove an instance.
6417 HPATH = "instance-remove"
6418 HTYPE = constants.HTYPE_INSTANCE
6421 def ExpandNames(self):
6422 self._ExpandAndLockInstance()
6423 self.needed_locks[locking.LEVEL_NODE] = []
6424 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6426 def DeclareLocks(self, level):
6427 if level == locking.LEVEL_NODE:
6428 self._LockInstancesNodes()
6430 def BuildHooksEnv(self):
6433 This runs on master, primary and secondary nodes of the instance.
6436 env = _BuildInstanceHookEnvByObject(self, self.instance)
6437 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6440 def BuildHooksNodes(self):
6441 """Build hooks nodes.
6444 nl = [self.cfg.GetMasterNode()]
6445 nl_post = list(self.instance.all_nodes) + nl
6446 return (nl, nl_post)
6448 def CheckPrereq(self):
6449 """Check prerequisites.
6451 This checks that the instance is in the cluster.
6454 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6455 assert self.instance is not None, \
6456 "Cannot retrieve locked instance %s" % self.op.instance_name
6458 def Exec(self, feedback_fn):
6459 """Remove the instance.
6462 instance = self.instance
6463 logging.info("Shutting down instance %s on node %s",
6464 instance.name, instance.primary_node)
6466 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6467 self.op.shutdown_timeout)
6468 msg = result.fail_msg
6470 if self.op.ignore_failures:
6471 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6473 raise errors.OpExecError("Could not shutdown instance %s on"
6475 (instance.name, instance.primary_node, msg))
6477 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6480 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6481 """Utility function to remove an instance.
6484 logging.info("Removing block devices for instance %s", instance.name)
6486 if not _RemoveDisks(lu, instance):
6487 if not ignore_failures:
6488 raise errors.OpExecError("Can't remove instance's disks")
6489 feedback_fn("Warning: can't remove instance's disks")
6491 logging.info("Removing instance %s out of cluster config", instance.name)
6493 lu.cfg.RemoveInstance(instance.name)
6495 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6496 "Instance lock removal conflict"
6498 # Remove lock for the instance
6499 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6502 class LUInstanceQuery(NoHooksLU):
6503 """Logical unit for querying instances.
6506 # pylint: disable-msg=W0142
6509 def CheckArguments(self):
6510 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6511 self.op.output_fields, self.op.use_locking)
6513 def ExpandNames(self):
6514 self.iq.ExpandNames(self)
6516 def DeclareLocks(self, level):
6517 self.iq.DeclareLocks(self, level)
6519 def Exec(self, feedback_fn):
6520 return self.iq.OldStyleQuery(self)
6523 class LUInstanceFailover(LogicalUnit):
6524 """Failover an instance.
6527 HPATH = "instance-failover"
6528 HTYPE = constants.HTYPE_INSTANCE
6531 def CheckArguments(self):
6532 """Check the arguments.
6535 self.iallocator = getattr(self.op, "iallocator", None)
6536 self.target_node = getattr(self.op, "target_node", None)
6538 def ExpandNames(self):
6539 self._ExpandAndLockInstance()
6541 if self.op.target_node is not None:
6542 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6544 self.needed_locks[locking.LEVEL_NODE] = []
6545 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6547 ignore_consistency = self.op.ignore_consistency
6548 shutdown_timeout = self.op.shutdown_timeout
6549 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6552 ignore_consistency=ignore_consistency,
6553 shutdown_timeout=shutdown_timeout)
6554 self.tasklets = [self._migrater]
6556 def DeclareLocks(self, level):
6557 if level == locking.LEVEL_NODE:
6558 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6559 if instance.disk_template in constants.DTS_EXT_MIRROR:
6560 if self.op.target_node is None:
6561 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6563 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6564 self.op.target_node]
6565 del self.recalculate_locks[locking.LEVEL_NODE]
6567 self._LockInstancesNodes()
6569 def BuildHooksEnv(self):
6572 This runs on master, primary and secondary nodes of the instance.
6575 instance = self._migrater.instance
6576 source_node = instance.primary_node
6577 target_node = self.op.target_node
6579 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6580 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6581 "OLD_PRIMARY": source_node,
6582 "NEW_PRIMARY": target_node,
6585 if instance.disk_template in constants.DTS_INT_MIRROR:
6586 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6587 env["NEW_SECONDARY"] = source_node
6589 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6591 env.update(_BuildInstanceHookEnvByObject(self, instance))
6595 def BuildHooksNodes(self):
6596 """Build hooks nodes.
6599 instance = self._migrater.instance
6600 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6601 return (nl, nl + [instance.primary_node])
6604 class LUInstanceMigrate(LogicalUnit):
6605 """Migrate an instance.
6607 This is migration without shutting down, compared to the failover,
6608 which is done with shutdown.
6611 HPATH = "instance-migrate"
6612 HTYPE = constants.HTYPE_INSTANCE
6615 def ExpandNames(self):
6616 self._ExpandAndLockInstance()
6618 if self.op.target_node is not None:
6619 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6621 self.needed_locks[locking.LEVEL_NODE] = []
6622 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6624 self._migrater = TLMigrateInstance(self, self.op.instance_name,
6625 cleanup=self.op.cleanup,
6627 fallback=self.op.allow_failover)
6628 self.tasklets = [self._migrater]
6630 def DeclareLocks(self, level):
6631 if level == locking.LEVEL_NODE:
6632 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6633 if instance.disk_template in constants.DTS_EXT_MIRROR:
6634 if self.op.target_node is None:
6635 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6637 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6638 self.op.target_node]
6639 del self.recalculate_locks[locking.LEVEL_NODE]
6641 self._LockInstancesNodes()
6643 def BuildHooksEnv(self):
6646 This runs on master, primary and secondary nodes of the instance.
6649 instance = self._migrater.instance
6650 source_node = instance.primary_node
6651 target_node = self.op.target_node
6652 env = _BuildInstanceHookEnvByObject(self, instance)
6654 "MIGRATE_LIVE": self._migrater.live,
6655 "MIGRATE_CLEANUP": self.op.cleanup,
6656 "OLD_PRIMARY": source_node,
6657 "NEW_PRIMARY": target_node,
6660 if instance.disk_template in constants.DTS_INT_MIRROR:
6661 env["OLD_SECONDARY"] = target_node
6662 env["NEW_SECONDARY"] = source_node
6664 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6668 def BuildHooksNodes(self):
6669 """Build hooks nodes.
6672 instance = self._migrater.instance
6673 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6674 return (nl, nl + [instance.primary_node])
6677 class LUInstanceMove(LogicalUnit):
6678 """Move an instance by data-copying.
6681 HPATH = "instance-move"
6682 HTYPE = constants.HTYPE_INSTANCE
6685 def ExpandNames(self):
6686 self._ExpandAndLockInstance()
6687 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6688 self.op.target_node = target_node
6689 self.needed_locks[locking.LEVEL_NODE] = [target_node]
6690 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6692 def DeclareLocks(self, level):
6693 if level == locking.LEVEL_NODE:
6694 self._LockInstancesNodes(primary_only=True)
6696 def BuildHooksEnv(self):
6699 This runs on master, primary and secondary nodes of the instance.
6703 "TARGET_NODE": self.op.target_node,
6704 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6706 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6709 def BuildHooksNodes(self):
6710 """Build hooks nodes.
6714 self.cfg.GetMasterNode(),
6715 self.instance.primary_node,
6716 self.op.target_node,
6720 def CheckPrereq(self):
6721 """Check prerequisites.
6723 This checks that the instance is in the cluster.
6726 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6727 assert self.instance is not None, \
6728 "Cannot retrieve locked instance %s" % self.op.instance_name
6730 node = self.cfg.GetNodeInfo(self.op.target_node)
6731 assert node is not None, \
6732 "Cannot retrieve locked node %s" % self.op.target_node
6734 self.target_node = target_node = node.name
6736 if target_node == instance.primary_node:
6737 raise errors.OpPrereqError("Instance %s is already on the node %s" %
6738 (instance.name, target_node),
6741 bep = self.cfg.GetClusterInfo().FillBE(instance)
6743 for idx, dsk in enumerate(instance.disks):
6744 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6745 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6746 " cannot copy" % idx, errors.ECODE_STATE)
6748 _CheckNodeOnline(self, target_node)
6749 _CheckNodeNotDrained(self, target_node)
6750 _CheckNodeVmCapable(self, target_node)
6752 if instance.admin_up:
6753 # check memory requirements on the secondary node
6754 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6755 instance.name, bep[constants.BE_MEMORY],
6756 instance.hypervisor)
6758 self.LogInfo("Not checking memory on the secondary node as"
6759 " instance will not be started")
6761 # check bridge existance
6762 _CheckInstanceBridgesExist(self, instance, node=target_node)
6764 def Exec(self, feedback_fn):
6765 """Move an instance.
6767 The move is done by shutting it down on its present node, copying
6768 the data over (slow) and starting it on the new node.
6771 instance = self.instance
6773 source_node = instance.primary_node
6774 target_node = self.target_node
6776 self.LogInfo("Shutting down instance %s on source node %s",
6777 instance.name, source_node)
6779 result = self.rpc.call_instance_shutdown(source_node, instance,
6780 self.op.shutdown_timeout)
6781 msg = result.fail_msg
6783 if self.op.ignore_consistency:
6784 self.proc.LogWarning("Could not shutdown instance %s on node %s."
6785 " Proceeding anyway. Please make sure node"
6786 " %s is down. Error details: %s",
6787 instance.name, source_node, source_node, msg)
6789 raise errors.OpExecError("Could not shutdown instance %s on"
6791 (instance.name, source_node, msg))
6793 # create the target disks
6795 _CreateDisks(self, instance, target_node=target_node)
6796 except errors.OpExecError:
6797 self.LogWarning("Device creation failed, reverting...")
6799 _RemoveDisks(self, instance, target_node=target_node)
6801 self.cfg.ReleaseDRBDMinors(instance.name)
6804 cluster_name = self.cfg.GetClusterInfo().cluster_name
6807 # activate, get path, copy the data over
6808 for idx, disk in enumerate(instance.disks):
6809 self.LogInfo("Copying data for disk %d", idx)
6810 result = self.rpc.call_blockdev_assemble(target_node, disk,
6811 instance.name, True, idx)
6813 self.LogWarning("Can't assemble newly created disk %d: %s",
6814 idx, result.fail_msg)
6815 errs.append(result.fail_msg)
6817 dev_path = result.payload
6818 result = self.rpc.call_blockdev_export(source_node, disk,
6819 target_node, dev_path,
6822 self.LogWarning("Can't copy data over for disk %d: %s",
6823 idx, result.fail_msg)
6824 errs.append(result.fail_msg)
6828 self.LogWarning("Some disks failed to copy, aborting")
6830 _RemoveDisks(self, instance, target_node=target_node)
6832 self.cfg.ReleaseDRBDMinors(instance.name)
6833 raise errors.OpExecError("Errors during disk copy: %s" %
6836 instance.primary_node = target_node
6837 self.cfg.Update(instance, feedback_fn)
6839 self.LogInfo("Removing the disks on the original node")
6840 _RemoveDisks(self, instance, target_node=source_node)
6842 # Only start the instance if it's marked as up
6843 if instance.admin_up:
6844 self.LogInfo("Starting instance %s on node %s",
6845 instance.name, target_node)
6847 disks_ok, _ = _AssembleInstanceDisks(self, instance,
6848 ignore_secondaries=True)
6850 _ShutdownInstanceDisks(self, instance)
6851 raise errors.OpExecError("Can't activate the instance's disks")
6853 result = self.rpc.call_instance_start(target_node, instance,
6855 msg = result.fail_msg
6857 _ShutdownInstanceDisks(self, instance)
6858 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6859 (instance.name, target_node, msg))
6862 class LUNodeMigrate(LogicalUnit):
6863 """Migrate all instances from a node.
6866 HPATH = "node-migrate"
6867 HTYPE = constants.HTYPE_NODE
6870 def CheckArguments(self):
6873 def ExpandNames(self):
6874 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6876 self.share_locks = _ShareAll()
6877 self.needed_locks = {
6878 locking.LEVEL_NODE: [self.op.node_name],
6881 def BuildHooksEnv(self):
6884 This runs on the master, the primary and all the secondaries.
6888 "NODE_NAME": self.op.node_name,
6891 def BuildHooksNodes(self):
6892 """Build hooks nodes.
6895 nl = [self.cfg.GetMasterNode()]
6898 def CheckPrereq(self):
6901 def Exec(self, feedback_fn):
6902 # Prepare jobs for migration instances
6904 [opcodes.OpInstanceMigrate(instance_name=inst.name,
6907 iallocator=self.op.iallocator,
6908 target_node=self.op.target_node)]
6909 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6912 # TODO: Run iallocator in this opcode and pass correct placement options to
6913 # OpInstanceMigrate. Since other jobs can modify the cluster between
6914 # running the iallocator and the actual migration, a good consistency model
6915 # will have to be found.
6917 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6918 frozenset([self.op.node_name]))
6920 return ResultWithJobs(jobs)
6923 class TLMigrateInstance(Tasklet):
6924 """Tasklet class for instance migration.
6927 @ivar live: whether the migration will be done live or non-live;
6928 this variable is initalized only after CheckPrereq has run
6929 @type cleanup: boolean
6930 @ivar cleanup: Wheater we cleanup from a failed migration
6931 @type iallocator: string
6932 @ivar iallocator: The iallocator used to determine target_node
6933 @type target_node: string
6934 @ivar target_node: If given, the target_node to reallocate the instance to
6935 @type failover: boolean
6936 @ivar failover: Whether operation results in failover or migration
6937 @type fallback: boolean
6938 @ivar fallback: Whether fallback to failover is allowed if migration not
6940 @type ignore_consistency: boolean
6941 @ivar ignore_consistency: Wheter we should ignore consistency between source
6943 @type shutdown_timeout: int
6944 @ivar shutdown_timeout: In case of failover timeout of the shutdown
6947 def __init__(self, lu, instance_name, cleanup=False,
6948 failover=False, fallback=False,
6949 ignore_consistency=False,
6950 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6951 """Initializes this class.
6954 Tasklet.__init__(self, lu)
6957 self.instance_name = instance_name
6958 self.cleanup = cleanup
6959 self.live = False # will be overridden later
6960 self.failover = failover
6961 self.fallback = fallback
6962 self.ignore_consistency = ignore_consistency
6963 self.shutdown_timeout = shutdown_timeout
6965 def CheckPrereq(self):
6966 """Check prerequisites.
6968 This checks that the instance is in the cluster.
6971 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6972 instance = self.cfg.GetInstanceInfo(instance_name)
6973 assert instance is not None
6974 self.instance = instance
6976 if (not self.cleanup and not instance.admin_up and not self.failover and
6978 self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6980 self.failover = True
6982 if instance.disk_template not in constants.DTS_MIRRORED:
6987 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6988 " %s" % (instance.disk_template, text),
6991 if instance.disk_template in constants.DTS_EXT_MIRROR:
6992 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6994 if self.lu.op.iallocator:
6995 self._RunAllocator()
6997 # We set set self.target_node as it is required by
6999 self.target_node = self.lu.op.target_node
7001 # self.target_node is already populated, either directly or by the
7003 target_node = self.target_node
7004 if self.target_node == instance.primary_node:
7005 raise errors.OpPrereqError("Cannot migrate instance %s"
7006 " to its primary (%s)" %
7007 (instance.name, instance.primary_node))
7009 if len(self.lu.tasklets) == 1:
7010 # It is safe to release locks only when we're the only tasklet
7012 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7013 keep=[instance.primary_node, self.target_node])
7016 secondary_nodes = instance.secondary_nodes
7017 if not secondary_nodes:
7018 raise errors.ConfigurationError("No secondary node but using"
7019 " %s disk template" %
7020 instance.disk_template)
7021 target_node = secondary_nodes[0]
7022 if self.lu.op.iallocator or (self.lu.op.target_node and
7023 self.lu.op.target_node != target_node):
7025 text = "failed over"
7028 raise errors.OpPrereqError("Instances with disk template %s cannot"
7029 " be %s to arbitrary nodes"
7030 " (neither an iallocator nor a target"
7031 " node can be passed)" %
7032 (instance.disk_template, text),
7035 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7037 # check memory requirements on the secondary node
7038 if not self.failover or instance.admin_up:
7039 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7040 instance.name, i_be[constants.BE_MEMORY],
7041 instance.hypervisor)
7043 self.lu.LogInfo("Not checking memory on the secondary node as"
7044 " instance will not be started")
7046 # check bridge existance
7047 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7049 if not self.cleanup:
7050 _CheckNodeNotDrained(self.lu, target_node)
7051 if not self.failover:
7052 result = self.rpc.call_instance_migratable(instance.primary_node,
7054 if result.fail_msg and self.fallback:
7055 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7057 self.failover = True
7059 result.Raise("Can't migrate, please use failover",
7060 prereq=True, ecode=errors.ECODE_STATE)
7062 assert not (self.failover and self.cleanup)
7064 if not self.failover:
7065 if self.lu.op.live is not None and self.lu.op.mode is not None:
7066 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7067 " parameters are accepted",
7069 if self.lu.op.live is not None:
7071 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7073 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7074 # reset the 'live' parameter to None so that repeated
7075 # invocations of CheckPrereq do not raise an exception
7076 self.lu.op.live = None
7077 elif self.lu.op.mode is None:
7078 # read the default value from the hypervisor
7079 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7081 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7083 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7085 # Failover is never live
7088 def _RunAllocator(self):
7089 """Run the allocator based on input opcode.
7092 ial = IAllocator(self.cfg, self.rpc,
7093 mode=constants.IALLOCATOR_MODE_RELOC,
7094 name=self.instance_name,
7095 # TODO See why hail breaks with a single node below
7096 relocate_from=[self.instance.primary_node,
7097 self.instance.primary_node],
7100 ial.Run(self.lu.op.iallocator)
7103 raise errors.OpPrereqError("Can't compute nodes using"
7104 " iallocator '%s': %s" %
7105 (self.lu.op.iallocator, ial.info),
7107 if len(ial.result) != ial.required_nodes:
7108 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7109 " of nodes (%s), required %s" %
7110 (self.lu.op.iallocator, len(ial.result),
7111 ial.required_nodes), errors.ECODE_FAULT)
7112 self.target_node = ial.result[0]
7113 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7114 self.instance_name, self.lu.op.iallocator,
7115 utils.CommaJoin(ial.result))
7117 def _WaitUntilSync(self):
7118 """Poll with custom rpc for disk sync.
7120 This uses our own step-based rpc call.
7123 self.feedback_fn("* wait until resync is done")
7127 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7129 self.instance.disks)
7131 for node, nres in result.items():
7132 nres.Raise("Cannot resync disks on node %s" % node)
7133 node_done, node_percent = nres.payload
7134 all_done = all_done and node_done
7135 if node_percent is not None:
7136 min_percent = min(min_percent, node_percent)
7138 if min_percent < 100:
7139 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7142 def _EnsureSecondary(self, node):
7143 """Demote a node to secondary.
7146 self.feedback_fn("* switching node %s to secondary mode" % node)
7148 for dev in self.instance.disks:
7149 self.cfg.SetDiskID(dev, node)
7151 result = self.rpc.call_blockdev_close(node, self.instance.name,
7152 self.instance.disks)
7153 result.Raise("Cannot change disk to secondary on node %s" % node)
7155 def _GoStandalone(self):
7156 """Disconnect from the network.
7159 self.feedback_fn("* changing into standalone mode")
7160 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7161 self.instance.disks)
7162 for node, nres in result.items():
7163 nres.Raise("Cannot disconnect disks node %s" % node)
7165 def _GoReconnect(self, multimaster):
7166 """Reconnect to the network.
7172 msg = "single-master"
7173 self.feedback_fn("* changing disks into %s mode" % msg)
7174 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7175 self.instance.disks,
7176 self.instance.name, multimaster)
7177 for node, nres in result.items():
7178 nres.Raise("Cannot change disks config on node %s" % node)
7180 def _ExecCleanup(self):
7181 """Try to cleanup after a failed migration.
7183 The cleanup is done by:
7184 - check that the instance is running only on one node
7185 (and update the config if needed)
7186 - change disks on its secondary node to secondary
7187 - wait until disks are fully synchronized
7188 - disconnect from the network
7189 - change disks into single-master mode
7190 - wait again until disks are fully synchronized
7193 instance = self.instance
7194 target_node = self.target_node
7195 source_node = self.source_node
7197 # check running on only one node
7198 self.feedback_fn("* checking where the instance actually runs"
7199 " (if this hangs, the hypervisor might be in"
7201 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7202 for node, result in ins_l.items():
7203 result.Raise("Can't contact node %s" % node)
7205 runningon_source = instance.name in ins_l[source_node].payload
7206 runningon_target = instance.name in ins_l[target_node].payload
7208 if runningon_source and runningon_target:
7209 raise errors.OpExecError("Instance seems to be running on two nodes,"
7210 " or the hypervisor is confused; you will have"
7211 " to ensure manually that it runs only on one"
7212 " and restart this operation")
7214 if not (runningon_source or runningon_target):
7215 raise errors.OpExecError("Instance does not seem to be running at all;"
7216 " in this case it's safer to repair by"
7217 " running 'gnt-instance stop' to ensure disk"
7218 " shutdown, and then restarting it")
7220 if runningon_target:
7221 # the migration has actually succeeded, we need to update the config
7222 self.feedback_fn("* instance running on secondary node (%s),"
7223 " updating config" % target_node)
7224 instance.primary_node = target_node
7225 self.cfg.Update(instance, self.feedback_fn)
7226 demoted_node = source_node
7228 self.feedback_fn("* instance confirmed to be running on its"
7229 " primary node (%s)" % source_node)
7230 demoted_node = target_node
7232 if instance.disk_template in constants.DTS_INT_MIRROR:
7233 self._EnsureSecondary(demoted_node)
7235 self._WaitUntilSync()
7236 except errors.OpExecError:
7237 # we ignore here errors, since if the device is standalone, it
7238 # won't be able to sync
7240 self._GoStandalone()
7241 self._GoReconnect(False)
7242 self._WaitUntilSync()
7244 self.feedback_fn("* done")
7246 def _RevertDiskStatus(self):
7247 """Try to revert the disk status after a failed migration.
7250 target_node = self.target_node
7251 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7255 self._EnsureSecondary(target_node)
7256 self._GoStandalone()
7257 self._GoReconnect(False)
7258 self._WaitUntilSync()
7259 except errors.OpExecError, err:
7260 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7261 " please try to recover the instance manually;"
7262 " error '%s'" % str(err))
7264 def _AbortMigration(self):
7265 """Call the hypervisor code to abort a started migration.
7268 instance = self.instance
7269 target_node = self.target_node
7270 migration_info = self.migration_info
7272 abort_result = self.rpc.call_finalize_migration(target_node,
7276 abort_msg = abort_result.fail_msg
7278 logging.error("Aborting migration failed on target node %s: %s",
7279 target_node, abort_msg)
7280 # Don't raise an exception here, as we stil have to try to revert the
7281 # disk status, even if this step failed.
7283 def _ExecMigration(self):
7284 """Migrate an instance.
7286 The migrate is done by:
7287 - change the disks into dual-master mode
7288 - wait until disks are fully synchronized again
7289 - migrate the instance
7290 - change disks on the new secondary node (the old primary) to secondary
7291 - wait until disks are fully synchronized
7292 - change disks into single-master mode
7295 instance = self.instance
7296 target_node = self.target_node
7297 source_node = self.source_node
7299 self.feedback_fn("* checking disk consistency between source and target")
7300 for dev in instance.disks:
7301 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7302 raise errors.OpExecError("Disk %s is degraded or not fully"
7303 " synchronized on target node,"
7304 " aborting migration" % dev.iv_name)
7306 # First get the migration information from the remote node
7307 result = self.rpc.call_migration_info(source_node, instance)
7308 msg = result.fail_msg
7310 log_err = ("Failed fetching source migration information from %s: %s" %
7312 logging.error(log_err)
7313 raise errors.OpExecError(log_err)
7315 self.migration_info = migration_info = result.payload
7317 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7318 # Then switch the disks to master/master mode
7319 self._EnsureSecondary(target_node)
7320 self._GoStandalone()
7321 self._GoReconnect(True)
7322 self._WaitUntilSync()
7324 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7325 result = self.rpc.call_accept_instance(target_node,
7328 self.nodes_ip[target_node])
7330 msg = result.fail_msg
7332 logging.error("Instance pre-migration failed, trying to revert"
7333 " disk status: %s", msg)
7334 self.feedback_fn("Pre-migration failed, aborting")
7335 self._AbortMigration()
7336 self._RevertDiskStatus()
7337 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7338 (instance.name, msg))
7340 self.feedback_fn("* migrating instance to %s" % target_node)
7341 result = self.rpc.call_instance_migrate(source_node, instance,
7342 self.nodes_ip[target_node],
7344 msg = result.fail_msg
7346 logging.error("Instance migration failed, trying to revert"
7347 " disk status: %s", msg)
7348 self.feedback_fn("Migration failed, aborting")
7349 self._AbortMigration()
7350 self._RevertDiskStatus()
7351 raise errors.OpExecError("Could not migrate instance %s: %s" %
7352 (instance.name, msg))
7354 instance.primary_node = target_node
7355 # distribute new instance config to the other nodes
7356 self.cfg.Update(instance, self.feedback_fn)
7358 result = self.rpc.call_finalize_migration(target_node,
7362 msg = result.fail_msg
7364 logging.error("Instance migration succeeded, but finalization failed:"
7366 raise errors.OpExecError("Could not finalize instance migration: %s" %
7369 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7370 self._EnsureSecondary(source_node)
7371 self._WaitUntilSync()
7372 self._GoStandalone()
7373 self._GoReconnect(False)
7374 self._WaitUntilSync()
7376 self.feedback_fn("* done")
7378 def _ExecFailover(self):
7379 """Failover an instance.
7381 The failover is done by shutting it down on its present node and
7382 starting it on the secondary.
7385 instance = self.instance
7386 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7388 source_node = instance.primary_node
7389 target_node = self.target_node
7391 if instance.admin_up:
7392 self.feedback_fn("* checking disk consistency between source and target")
7393 for dev in instance.disks:
7394 # for drbd, these are drbd over lvm
7395 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7396 if primary_node.offline:
7397 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7399 (primary_node.name, dev.iv_name, target_node))
7400 elif not self.ignore_consistency:
7401 raise errors.OpExecError("Disk %s is degraded on target node,"
7402 " aborting failover" % dev.iv_name)
7404 self.feedback_fn("* not checking disk consistency as instance is not"
7407 self.feedback_fn("* shutting down instance on source node")
7408 logging.info("Shutting down instance %s on node %s",
7409 instance.name, source_node)
7411 result = self.rpc.call_instance_shutdown(source_node, instance,
7412 self.shutdown_timeout)
7413 msg = result.fail_msg
7415 if self.ignore_consistency or primary_node.offline:
7416 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7417 " proceeding anyway; please make sure node"
7418 " %s is down; error details: %s",
7419 instance.name, source_node, source_node, msg)
7421 raise errors.OpExecError("Could not shutdown instance %s on"
7423 (instance.name, source_node, msg))
7425 self.feedback_fn("* deactivating the instance's disks on source node")
7426 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7427 raise errors.OpExecError("Can't shut down the instance's disks")
7429 instance.primary_node = target_node
7430 # distribute new instance config to the other nodes
7431 self.cfg.Update(instance, self.feedback_fn)
7433 # Only start the instance if it's marked as up
7434 if instance.admin_up:
7435 self.feedback_fn("* activating the instance's disks on target node %s" %
7437 logging.info("Starting instance %s on node %s",
7438 instance.name, target_node)
7440 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7441 ignore_secondaries=True)
7443 _ShutdownInstanceDisks(self.lu, instance)
7444 raise errors.OpExecError("Can't activate the instance's disks")
7446 self.feedback_fn("* starting the instance on the target node %s" %
7448 result = self.rpc.call_instance_start(target_node, instance, None, None,
7450 msg = result.fail_msg
7452 _ShutdownInstanceDisks(self.lu, instance)
7453 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7454 (instance.name, target_node, msg))
7456 def Exec(self, feedback_fn):
7457 """Perform the migration.
7460 self.feedback_fn = feedback_fn
7461 self.source_node = self.instance.primary_node
7463 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7464 if self.instance.disk_template in constants.DTS_INT_MIRROR:
7465 self.target_node = self.instance.secondary_nodes[0]
7466 # Otherwise self.target_node has been populated either
7467 # directly, or through an iallocator.
7469 self.all_nodes = [self.source_node, self.target_node]
7470 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7471 in self.cfg.GetMultiNodeInfo(self.all_nodes))
7474 feedback_fn("Failover instance %s" % self.instance.name)
7475 self._ExecFailover()
7477 feedback_fn("Migrating instance %s" % self.instance.name)
7480 return self._ExecCleanup()
7482 return self._ExecMigration()
7485 def _CreateBlockDev(lu, node, instance, device, force_create,
7487 """Create a tree of block devices on a given node.
7489 If this device type has to be created on secondaries, create it and
7492 If not, just recurse to children keeping the same 'force' value.
7494 @param lu: the lu on whose behalf we execute
7495 @param node: the node on which to create the device
7496 @type instance: L{objects.Instance}
7497 @param instance: the instance which owns the device
7498 @type device: L{objects.Disk}
7499 @param device: the device to create
7500 @type force_create: boolean
7501 @param force_create: whether to force creation of this device; this
7502 will be change to True whenever we find a device which has
7503 CreateOnSecondary() attribute
7504 @param info: the extra 'metadata' we should attach to the device
7505 (this will be represented as a LVM tag)
7506 @type force_open: boolean
7507 @param force_open: this parameter will be passes to the
7508 L{backend.BlockdevCreate} function where it specifies
7509 whether we run on primary or not, and it affects both
7510 the child assembly and the device own Open() execution
7513 if device.CreateOnSecondary():
7517 for child in device.children:
7518 _CreateBlockDev(lu, node, instance, child, force_create,
7521 if not force_create:
7524 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7527 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7528 """Create a single block device on a given node.
7530 This will not recurse over children of the device, so they must be
7533 @param lu: the lu on whose behalf we execute
7534 @param node: the node on which to create the device
7535 @type instance: L{objects.Instance}
7536 @param instance: the instance which owns the device
7537 @type device: L{objects.Disk}
7538 @param device: the device to create
7539 @param info: the extra 'metadata' we should attach to the device
7540 (this will be represented as a LVM tag)
7541 @type force_open: boolean
7542 @param force_open: this parameter will be passes to the
7543 L{backend.BlockdevCreate} function where it specifies
7544 whether we run on primary or not, and it affects both
7545 the child assembly and the device own Open() execution
7548 lu.cfg.SetDiskID(device, node)
7549 result = lu.rpc.call_blockdev_create(node, device, device.size,
7550 instance.name, force_open, info)
7551 result.Raise("Can't create block device %s on"
7552 " node %s for instance %s" % (device, node, instance.name))
7553 if device.physical_id is None:
7554 device.physical_id = result.payload
7557 def _GenerateUniqueNames(lu, exts):
7558 """Generate a suitable LV name.
7560 This will generate a logical volume name for the given instance.
7565 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7566 results.append("%s%s" % (new_id, val))
7570 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7571 iv_name, p_minor, s_minor):
7572 """Generate a drbd8 device complete with its children.
7575 assert len(vgnames) == len(names) == 2
7576 port = lu.cfg.AllocatePort()
7577 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7578 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7579 logical_id=(vgnames[0], names[0]))
7580 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7581 logical_id=(vgnames[1], names[1]))
7582 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7583 logical_id=(primary, secondary, port,
7586 children=[dev_data, dev_meta],
7591 def _GenerateDiskTemplate(lu, template_name,
7592 instance_name, primary_node,
7593 secondary_nodes, disk_info,
7594 file_storage_dir, file_driver,
7595 base_index, feedback_fn):
7596 """Generate the entire disk layout for a given template type.
7599 #TODO: compute space requirements
7601 vgname = lu.cfg.GetVGName()
7602 disk_count = len(disk_info)
7604 if template_name == constants.DT_DISKLESS:
7606 elif template_name == constants.DT_PLAIN:
7607 if len(secondary_nodes) != 0:
7608 raise errors.ProgrammerError("Wrong template configuration")
7610 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7611 for i in range(disk_count)])
7612 for idx, disk in enumerate(disk_info):
7613 disk_index = idx + base_index
7614 vg = disk.get(constants.IDISK_VG, vgname)
7615 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7616 disk_dev = objects.Disk(dev_type=constants.LD_LV,
7617 size=disk[constants.IDISK_SIZE],
7618 logical_id=(vg, names[idx]),
7619 iv_name="disk/%d" % disk_index,
7620 mode=disk[constants.IDISK_MODE])
7621 disks.append(disk_dev)
7622 elif template_name == constants.DT_DRBD8:
7623 if len(secondary_nodes) != 1:
7624 raise errors.ProgrammerError("Wrong template configuration")
7625 remote_node = secondary_nodes[0]
7626 minors = lu.cfg.AllocateDRBDMinor(
7627 [primary_node, remote_node] * len(disk_info), instance_name)
7630 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7631 for i in range(disk_count)]):
7632 names.append(lv_prefix + "_data")
7633 names.append(lv_prefix + "_meta")
7634 for idx, disk in enumerate(disk_info):
7635 disk_index = idx + base_index
7636 data_vg = disk.get(constants.IDISK_VG, vgname)
7637 meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7638 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7639 disk[constants.IDISK_SIZE],
7641 names[idx * 2:idx * 2 + 2],
7642 "disk/%d" % disk_index,
7643 minors[idx * 2], minors[idx * 2 + 1])
7644 disk_dev.mode = disk[constants.IDISK_MODE]
7645 disks.append(disk_dev)
7646 elif template_name == constants.DT_FILE:
7647 if len(secondary_nodes) != 0:
7648 raise errors.ProgrammerError("Wrong template configuration")
7650 opcodes.RequireFileStorage()
7652 for idx, disk in enumerate(disk_info):
7653 disk_index = idx + base_index
7654 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7655 size=disk[constants.IDISK_SIZE],
7656 iv_name="disk/%d" % disk_index,
7657 logical_id=(file_driver,
7658 "%s/disk%d" % (file_storage_dir,
7660 mode=disk[constants.IDISK_MODE])
7661 disks.append(disk_dev)
7662 elif template_name == constants.DT_SHARED_FILE:
7663 if len(secondary_nodes) != 0:
7664 raise errors.ProgrammerError("Wrong template configuration")
7666 opcodes.RequireSharedFileStorage()
7668 for idx, disk in enumerate(disk_info):
7669 disk_index = idx + base_index
7670 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7671 size=disk[constants.IDISK_SIZE],
7672 iv_name="disk/%d" % disk_index,
7673 logical_id=(file_driver,
7674 "%s/disk%d" % (file_storage_dir,
7676 mode=disk[constants.IDISK_MODE])
7677 disks.append(disk_dev)
7678 elif template_name == constants.DT_BLOCK:
7679 if len(secondary_nodes) != 0:
7680 raise errors.ProgrammerError("Wrong template configuration")
7682 for idx, disk in enumerate(disk_info):
7683 disk_index = idx + base_index
7684 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7685 size=disk[constants.IDISK_SIZE],
7686 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7687 disk[constants.IDISK_ADOPT]),
7688 iv_name="disk/%d" % disk_index,
7689 mode=disk[constants.IDISK_MODE])
7690 disks.append(disk_dev)
7693 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7697 def _GetInstanceInfoText(instance):
7698 """Compute that text that should be added to the disk's metadata.
7701 return "originstname+%s" % instance.name
7704 def _CalcEta(time_taken, written, total_size):
7705 """Calculates the ETA based on size written and total size.
7707 @param time_taken: The time taken so far
7708 @param written: amount written so far
7709 @param total_size: The total size of data to be written
7710 @return: The remaining time in seconds
7713 avg_time = time_taken / float(written)
7714 return (total_size - written) * avg_time
7717 def _WipeDisks(lu, instance):
7718 """Wipes instance disks.
7720 @type lu: L{LogicalUnit}
7721 @param lu: the logical unit on whose behalf we execute
7722 @type instance: L{objects.Instance}
7723 @param instance: the instance whose disks we should create
7724 @return: the success of the wipe
7727 node = instance.primary_node
7729 for device in instance.disks:
7730 lu.cfg.SetDiskID(device, node)
7732 logging.info("Pause sync of instance %s disks", instance.name)
7733 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7735 for idx, success in enumerate(result.payload):
7737 logging.warn("pause-sync of instance %s for disks %d failed",
7741 for idx, device in enumerate(instance.disks):
7742 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7743 # MAX_WIPE_CHUNK at max
7744 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7745 constants.MIN_WIPE_CHUNK_PERCENT)
7746 # we _must_ make this an int, otherwise rounding errors will
7748 wipe_chunk_size = int(wipe_chunk_size)
7750 lu.LogInfo("* Wiping disk %d", idx)
7751 logging.info("Wiping disk %d for instance %s, node %s using"
7752 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7757 start_time = time.time()
7759 while offset < size:
7760 wipe_size = min(wipe_chunk_size, size - offset)
7761 logging.debug("Wiping disk %d, offset %s, chunk %s",
7762 idx, offset, wipe_size)
7763 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7764 result.Raise("Could not wipe disk %d at offset %d for size %d" %
7765 (idx, offset, wipe_size))
7768 if now - last_output >= 60:
7769 eta = _CalcEta(now - start_time, offset, size)
7770 lu.LogInfo(" - done: %.1f%% ETA: %s" %
7771 (offset / float(size) * 100, utils.FormatSeconds(eta)))
7774 logging.info("Resume sync of instance %s disks", instance.name)
7776 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7778 for idx, success in enumerate(result.payload):
7780 lu.LogWarning("Resume sync of disk %d failed, please have a"
7781 " look at the status and troubleshoot the issue", idx)
7782 logging.warn("resume-sync of instance %s for disks %d failed",
7786 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7787 """Create all disks for an instance.
7789 This abstracts away some work from AddInstance.
7791 @type lu: L{LogicalUnit}
7792 @param lu: the logical unit on whose behalf we execute
7793 @type instance: L{objects.Instance}
7794 @param instance: the instance whose disks we should create
7796 @param to_skip: list of indices to skip
7797 @type target_node: string
7798 @param target_node: if passed, overrides the target node for creation
7800 @return: the success of the creation
7803 info = _GetInstanceInfoText(instance)
7804 if target_node is None:
7805 pnode = instance.primary_node
7806 all_nodes = instance.all_nodes
7811 if instance.disk_template in constants.DTS_FILEBASED:
7812 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7813 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7815 result.Raise("Failed to create directory '%s' on"
7816 " node %s" % (file_storage_dir, pnode))
7818 # Note: this needs to be kept in sync with adding of disks in
7819 # LUInstanceSetParams
7820 for idx, device in enumerate(instance.disks):
7821 if to_skip and idx in to_skip:
7823 logging.info("Creating volume %s for instance %s",
7824 device.iv_name, instance.name)
7826 for node in all_nodes:
7827 f_create = node == pnode
7828 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7831 def _RemoveDisks(lu, instance, target_node=None):
7832 """Remove all disks for an instance.
7834 This abstracts away some work from `AddInstance()` and
7835 `RemoveInstance()`. Note that in case some of the devices couldn't
7836 be removed, the removal will continue with the other ones (compare
7837 with `_CreateDisks()`).
7839 @type lu: L{LogicalUnit}
7840 @param lu: the logical unit on whose behalf we execute
7841 @type instance: L{objects.Instance}
7842 @param instance: the instance whose disks we should remove
7843 @type target_node: string
7844 @param target_node: used to override the node on which to remove the disks
7846 @return: the success of the removal
7849 logging.info("Removing block devices for instance %s", instance.name)
7852 for device in instance.disks:
7854 edata = [(target_node, device)]
7856 edata = device.ComputeNodeTree(instance.primary_node)
7857 for node, disk in edata:
7858 lu.cfg.SetDiskID(disk, node)
7859 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7861 lu.LogWarning("Could not remove block device %s on node %s,"
7862 " continuing anyway: %s", device.iv_name, node, msg)
7865 if instance.disk_template == constants.DT_FILE:
7866 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7870 tgt = instance.primary_node
7871 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7873 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7874 file_storage_dir, instance.primary_node, result.fail_msg)
7880 def _ComputeDiskSizePerVG(disk_template, disks):
7881 """Compute disk size requirements in the volume group
7884 def _compute(disks, payload):
7885 """Universal algorithm.
7890 vgs[disk[constants.IDISK_VG]] = \
7891 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7895 # Required free disk space as a function of disk and swap space
7897 constants.DT_DISKLESS: {},
7898 constants.DT_PLAIN: _compute(disks, 0),
7899 # 128 MB are added for drbd metadata for each disk
7900 constants.DT_DRBD8: _compute(disks, 128),
7901 constants.DT_FILE: {},
7902 constants.DT_SHARED_FILE: {},
7905 if disk_template not in req_size_dict:
7906 raise errors.ProgrammerError("Disk template '%s' size requirement"
7907 " is unknown" % disk_template)
7909 return req_size_dict[disk_template]
7912 def _ComputeDiskSize(disk_template, disks):
7913 """Compute disk size requirements in the volume group
7916 # Required free disk space as a function of disk and swap space
7918 constants.DT_DISKLESS: None,
7919 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7920 # 128 MB are added for drbd metadata for each disk
7921 constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7922 constants.DT_FILE: None,
7923 constants.DT_SHARED_FILE: 0,
7924 constants.DT_BLOCK: 0,
7927 if disk_template not in req_size_dict:
7928 raise errors.ProgrammerError("Disk template '%s' size requirement"
7929 " is unknown" % disk_template)
7931 return req_size_dict[disk_template]
7934 def _FilterVmNodes(lu, nodenames):
7935 """Filters out non-vm_capable nodes from a list.
7937 @type lu: L{LogicalUnit}
7938 @param lu: the logical unit for which we check
7939 @type nodenames: list
7940 @param nodenames: the list of nodes on which we should check
7942 @return: the list of vm-capable nodes
7945 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7946 return [name for name in nodenames if name not in vm_nodes]
7949 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7950 """Hypervisor parameter validation.
7952 This function abstract the hypervisor parameter validation to be
7953 used in both instance create and instance modify.
7955 @type lu: L{LogicalUnit}
7956 @param lu: the logical unit for which we check
7957 @type nodenames: list
7958 @param nodenames: the list of nodes on which we should check
7959 @type hvname: string
7960 @param hvname: the name of the hypervisor we should use
7961 @type hvparams: dict
7962 @param hvparams: the parameters which we need to check
7963 @raise errors.OpPrereqError: if the parameters are not valid
7966 nodenames = _FilterVmNodes(lu, nodenames)
7967 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7970 for node in nodenames:
7974 info.Raise("Hypervisor parameter validation failed on node %s" % node)
7977 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7978 """OS parameters validation.
7980 @type lu: L{LogicalUnit}
7981 @param lu: the logical unit for which we check
7982 @type required: boolean
7983 @param required: whether the validation should fail if the OS is not
7985 @type nodenames: list
7986 @param nodenames: the list of nodes on which we should check
7987 @type osname: string
7988 @param osname: the name of the hypervisor we should use
7989 @type osparams: dict
7990 @param osparams: the parameters which we need to check
7991 @raise errors.OpPrereqError: if the parameters are not valid
7994 nodenames = _FilterVmNodes(lu, nodenames)
7995 result = lu.rpc.call_os_validate(required, nodenames, osname,
7996 [constants.OS_VALIDATE_PARAMETERS],
7998 for node, nres in result.items():
7999 # we don't check for offline cases since this should be run only
8000 # against the master node and/or an instance's nodes
8001 nres.Raise("OS Parameters validation failed on node %s" % node)
8002 if not nres.payload:
8003 lu.LogInfo("OS %s not found on node %s, validation skipped",
8007 class LUInstanceCreate(LogicalUnit):
8008 """Create an instance.
8011 HPATH = "instance-add"
8012 HTYPE = constants.HTYPE_INSTANCE
8015 def CheckArguments(self):
8019 # do not require name_check to ease forward/backward compatibility
8021 if self.op.no_install and self.op.start:
8022 self.LogInfo("No-installation mode selected, disabling startup")
8023 self.op.start = False
8024 # validate/normalize the instance name
8025 self.op.instance_name = \
8026 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8028 if self.op.ip_check and not self.op.name_check:
8029 # TODO: make the ip check more flexible and not depend on the name check
8030 raise errors.OpPrereqError("Cannot do IP address check without a name"
8031 " check", errors.ECODE_INVAL)
8033 # check nics' parameter names
8034 for nic in self.op.nics:
8035 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8037 # check disks. parameter names and consistent adopt/no-adopt strategy
8038 has_adopt = has_no_adopt = False
8039 for disk in self.op.disks:
8040 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8041 if constants.IDISK_ADOPT in disk:
8045 if has_adopt and has_no_adopt:
8046 raise errors.OpPrereqError("Either all disks are adopted or none is",
8049 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8050 raise errors.OpPrereqError("Disk adoption is not supported for the"
8051 " '%s' disk template" %
8052 self.op.disk_template,
8054 if self.op.iallocator is not None:
8055 raise errors.OpPrereqError("Disk adoption not allowed with an"
8056 " iallocator script", errors.ECODE_INVAL)
8057 if self.op.mode == constants.INSTANCE_IMPORT:
8058 raise errors.OpPrereqError("Disk adoption not allowed for"
8059 " instance import", errors.ECODE_INVAL)
8061 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8062 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8063 " but no 'adopt' parameter given" %
8064 self.op.disk_template,
8067 self.adopt_disks = has_adopt
8069 # instance name verification
8070 if self.op.name_check:
8071 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8072 self.op.instance_name = self.hostname1.name
8073 # used in CheckPrereq for ip ping check
8074 self.check_ip = self.hostname1.ip
8076 self.check_ip = None
8078 # file storage checks
8079 if (self.op.file_driver and
8080 not self.op.file_driver in constants.FILE_DRIVER):
8081 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8082 self.op.file_driver, errors.ECODE_INVAL)
8084 if self.op.disk_template == constants.DT_FILE:
8085 opcodes.RequireFileStorage()
8086 elif self.op.disk_template == constants.DT_SHARED_FILE:
8087 opcodes.RequireSharedFileStorage()
8089 ### Node/iallocator related checks
8090 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8092 if self.op.pnode is not None:
8093 if self.op.disk_template in constants.DTS_INT_MIRROR:
8094 if self.op.snode is None:
8095 raise errors.OpPrereqError("The networked disk templates need"
8096 " a mirror node", errors.ECODE_INVAL)
8098 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8100 self.op.snode = None
8102 self._cds = _GetClusterDomainSecret()
8104 if self.op.mode == constants.INSTANCE_IMPORT:
8105 # On import force_variant must be True, because if we forced it at
8106 # initial install, our only chance when importing it back is that it
8108 self.op.force_variant = True
8110 if self.op.no_install:
8111 self.LogInfo("No-installation mode has no effect during import")
8113 elif self.op.mode == constants.INSTANCE_CREATE:
8114 if self.op.os_type is None:
8115 raise errors.OpPrereqError("No guest OS specified",
8117 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8118 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8119 " installation" % self.op.os_type,
8121 if self.op.disk_template is None:
8122 raise errors.OpPrereqError("No disk template specified",
8125 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8126 # Check handshake to ensure both clusters have the same domain secret
8127 src_handshake = self.op.source_handshake
8128 if not src_handshake:
8129 raise errors.OpPrereqError("Missing source handshake",
8132 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8135 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8138 # Load and check source CA
8139 self.source_x509_ca_pem = self.op.source_x509_ca
8140 if not self.source_x509_ca_pem:
8141 raise errors.OpPrereqError("Missing source X509 CA",
8145 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8147 except OpenSSL.crypto.Error, err:
8148 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8149 (err, ), errors.ECODE_INVAL)
8151 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8152 if errcode is not None:
8153 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8156 self.source_x509_ca = cert
8158 src_instance_name = self.op.source_instance_name
8159 if not src_instance_name:
8160 raise errors.OpPrereqError("Missing source instance name",
8163 self.source_instance_name = \
8164 netutils.GetHostname(name=src_instance_name).name
8167 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8168 self.op.mode, errors.ECODE_INVAL)
8170 def ExpandNames(self):
8171 """ExpandNames for CreateInstance.
8173 Figure out the right locks for instance creation.
8176 self.needed_locks = {}
8178 instance_name = self.op.instance_name
8179 # this is just a preventive check, but someone might still add this
8180 # instance in the meantime, and creation will fail at lock-add time
8181 if instance_name in self.cfg.GetInstanceList():
8182 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8183 instance_name, errors.ECODE_EXISTS)
8185 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8187 if self.op.iallocator:
8188 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8190 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8191 nodelist = [self.op.pnode]
8192 if self.op.snode is not None:
8193 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8194 nodelist.append(self.op.snode)
8195 self.needed_locks[locking.LEVEL_NODE] = nodelist
8197 # in case of import lock the source node too
8198 if self.op.mode == constants.INSTANCE_IMPORT:
8199 src_node = self.op.src_node
8200 src_path = self.op.src_path
8202 if src_path is None:
8203 self.op.src_path = src_path = self.op.instance_name
8205 if src_node is None:
8206 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8207 self.op.src_node = None
8208 if os.path.isabs(src_path):
8209 raise errors.OpPrereqError("Importing an instance from an absolute"
8210 " path requires a source node option",
8213 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8214 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8215 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8216 if not os.path.isabs(src_path):
8217 self.op.src_path = src_path = \
8218 utils.PathJoin(constants.EXPORT_DIR, src_path)
8220 def _RunAllocator(self):
8221 """Run the allocator based on input opcode.
8224 nics = [n.ToDict() for n in self.nics]
8225 ial = IAllocator(self.cfg, self.rpc,
8226 mode=constants.IALLOCATOR_MODE_ALLOC,
8227 name=self.op.instance_name,
8228 disk_template=self.op.disk_template,
8231 vcpus=self.be_full[constants.BE_VCPUS],
8232 memory=self.be_full[constants.BE_MEMORY],
8235 hypervisor=self.op.hypervisor,
8238 ial.Run(self.op.iallocator)
8241 raise errors.OpPrereqError("Can't compute nodes using"
8242 " iallocator '%s': %s" %
8243 (self.op.iallocator, ial.info),
8245 if len(ial.result) != ial.required_nodes:
8246 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8247 " of nodes (%s), required %s" %
8248 (self.op.iallocator, len(ial.result),
8249 ial.required_nodes), errors.ECODE_FAULT)
8250 self.op.pnode = ial.result[0]
8251 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8252 self.op.instance_name, self.op.iallocator,
8253 utils.CommaJoin(ial.result))
8254 if ial.required_nodes == 2:
8255 self.op.snode = ial.result[1]
8257 def BuildHooksEnv(self):
8260 This runs on master, primary and secondary nodes of the instance.
8264 "ADD_MODE": self.op.mode,
8266 if self.op.mode == constants.INSTANCE_IMPORT:
8267 env["SRC_NODE"] = self.op.src_node
8268 env["SRC_PATH"] = self.op.src_path
8269 env["SRC_IMAGES"] = self.src_images
8271 env.update(_BuildInstanceHookEnv(
8272 name=self.op.instance_name,
8273 primary_node=self.op.pnode,
8274 secondary_nodes=self.secondaries,
8275 status=self.op.start,
8276 os_type=self.op.os_type,
8277 memory=self.be_full[constants.BE_MEMORY],
8278 vcpus=self.be_full[constants.BE_VCPUS],
8279 nics=_NICListToTuple(self, self.nics),
8280 disk_template=self.op.disk_template,
8281 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8282 for d in self.disks],
8285 hypervisor_name=self.op.hypervisor,
8291 def BuildHooksNodes(self):
8292 """Build hooks nodes.
8295 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8298 def _ReadExportInfo(self):
8299 """Reads the export information from disk.
8301 It will override the opcode source node and path with the actual
8302 information, if these two were not specified before.
8304 @return: the export information
8307 assert self.op.mode == constants.INSTANCE_IMPORT
8309 src_node = self.op.src_node
8310 src_path = self.op.src_path
8312 if src_node is None:
8313 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8314 exp_list = self.rpc.call_export_list(locked_nodes)
8316 for node in exp_list:
8317 if exp_list[node].fail_msg:
8319 if src_path in exp_list[node].payload:
8321 self.op.src_node = src_node = node
8322 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8326 raise errors.OpPrereqError("No export found for relative path %s" %
8327 src_path, errors.ECODE_INVAL)
8329 _CheckNodeOnline(self, src_node)
8330 result = self.rpc.call_export_info(src_node, src_path)
8331 result.Raise("No export or invalid export found in dir %s" % src_path)
8333 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8334 if not export_info.has_section(constants.INISECT_EXP):
8335 raise errors.ProgrammerError("Corrupted export config",
8336 errors.ECODE_ENVIRON)
8338 ei_version = export_info.get(constants.INISECT_EXP, "version")
8339 if (int(ei_version) != constants.EXPORT_VERSION):
8340 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8341 (ei_version, constants.EXPORT_VERSION),
8342 errors.ECODE_ENVIRON)
8345 def _ReadExportParams(self, einfo):
8346 """Use export parameters as defaults.
8348 In case the opcode doesn't specify (as in override) some instance
8349 parameters, then try to use them from the export information, if
8353 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8355 if self.op.disk_template is None:
8356 if einfo.has_option(constants.INISECT_INS, "disk_template"):
8357 self.op.disk_template = einfo.get(constants.INISECT_INS,
8360 raise errors.OpPrereqError("No disk template specified and the export"
8361 " is missing the disk_template information",
8364 if not self.op.disks:
8365 if einfo.has_option(constants.INISECT_INS, "disk_count"):
8367 # TODO: import the disk iv_name too
8368 for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8369 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8370 disks.append({constants.IDISK_SIZE: disk_sz})
8371 self.op.disks = disks
8373 raise errors.OpPrereqError("No disk info specified and the export"
8374 " is missing the disk information",
8377 if (not self.op.nics and
8378 einfo.has_option(constants.INISECT_INS, "nic_count")):
8380 for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8382 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8383 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8388 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8389 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8391 if (self.op.hypervisor is None and
8392 einfo.has_option(constants.INISECT_INS, "hypervisor")):
8393 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8395 if einfo.has_section(constants.INISECT_HYP):
8396 # use the export parameters but do not override the ones
8397 # specified by the user
8398 for name, value in einfo.items(constants.INISECT_HYP):
8399 if name not in self.op.hvparams:
8400 self.op.hvparams[name] = value
8402 if einfo.has_section(constants.INISECT_BEP):
8403 # use the parameters, without overriding
8404 for name, value in einfo.items(constants.INISECT_BEP):
8405 if name not in self.op.beparams:
8406 self.op.beparams[name] = value
8408 # try to read the parameters old style, from the main section
8409 for name in constants.BES_PARAMETERS:
8410 if (name not in self.op.beparams and
8411 einfo.has_option(constants.INISECT_INS, name)):
8412 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8414 if einfo.has_section(constants.INISECT_OSP):
8415 # use the parameters, without overriding
8416 for name, value in einfo.items(constants.INISECT_OSP):
8417 if name not in self.op.osparams:
8418 self.op.osparams[name] = value
8420 def _RevertToDefaults(self, cluster):
8421 """Revert the instance parameters to the default values.
8425 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8426 for name in self.op.hvparams.keys():
8427 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8428 del self.op.hvparams[name]
8430 be_defs = cluster.SimpleFillBE({})
8431 for name in self.op.beparams.keys():
8432 if name in be_defs and be_defs[name] == self.op.beparams[name]:
8433 del self.op.beparams[name]
8435 nic_defs = cluster.SimpleFillNIC({})
8436 for nic in self.op.nics:
8437 for name in constants.NICS_PARAMETERS:
8438 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8441 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8442 for name in self.op.osparams.keys():
8443 if name in os_defs and os_defs[name] == self.op.osparams[name]:
8444 del self.op.osparams[name]
8446 def _CalculateFileStorageDir(self):
8447 """Calculate final instance file storage dir.
8450 # file storage dir calculation/check
8451 self.instance_file_storage_dir = None
8452 if self.op.disk_template in constants.DTS_FILEBASED:
8453 # build the full file storage dir path
8456 if self.op.disk_template == constants.DT_SHARED_FILE:
8457 get_fsd_fn = self.cfg.GetSharedFileStorageDir
8459 get_fsd_fn = self.cfg.GetFileStorageDir
8461 cfg_storagedir = get_fsd_fn()
8462 if not cfg_storagedir:
8463 raise errors.OpPrereqError("Cluster file storage dir not defined")
8464 joinargs.append(cfg_storagedir)
8466 if self.op.file_storage_dir is not None:
8467 joinargs.append(self.op.file_storage_dir)
8469 joinargs.append(self.op.instance_name)
8471 # pylint: disable-msg=W0142
8472 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8474 def CheckPrereq(self):
8475 """Check prerequisites.
8478 self._CalculateFileStorageDir()
8480 if self.op.mode == constants.INSTANCE_IMPORT:
8481 export_info = self._ReadExportInfo()
8482 self._ReadExportParams(export_info)
8484 if (not self.cfg.GetVGName() and
8485 self.op.disk_template not in constants.DTS_NOT_LVM):
8486 raise errors.OpPrereqError("Cluster does not support lvm-based"
8487 " instances", errors.ECODE_STATE)
8489 if self.op.hypervisor is None:
8490 self.op.hypervisor = self.cfg.GetHypervisorType()
8492 cluster = self.cfg.GetClusterInfo()
8493 enabled_hvs = cluster.enabled_hypervisors
8494 if self.op.hypervisor not in enabled_hvs:
8495 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8496 " cluster (%s)" % (self.op.hypervisor,
8497 ",".join(enabled_hvs)),
8500 # Check tag validity
8501 for tag in self.op.tags:
8502 objects.TaggableObject.ValidateTag(tag)
8504 # check hypervisor parameter syntax (locally)
8505 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8506 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8508 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8509 hv_type.CheckParameterSyntax(filled_hvp)
8510 self.hv_full = filled_hvp
8511 # check that we don't specify global parameters on an instance
8512 _CheckGlobalHvParams(self.op.hvparams)
8514 # fill and remember the beparams dict
8515 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8516 self.be_full = cluster.SimpleFillBE(self.op.beparams)
8518 # build os parameters
8519 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8521 # now that hvp/bep are in final format, let's reset to defaults,
8523 if self.op.identify_defaults:
8524 self._RevertToDefaults(cluster)
8528 for idx, nic in enumerate(self.op.nics):
8529 nic_mode_req = nic.get(constants.INIC_MODE, None)
8530 nic_mode = nic_mode_req
8531 if nic_mode is None:
8532 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8534 # in routed mode, for the first nic, the default ip is 'auto'
8535 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8536 default_ip_mode = constants.VALUE_AUTO
8538 default_ip_mode = constants.VALUE_NONE
8540 # ip validity checks
8541 ip = nic.get(constants.INIC_IP, default_ip_mode)
8542 if ip is None or ip.lower() == constants.VALUE_NONE:
8544 elif ip.lower() == constants.VALUE_AUTO:
8545 if not self.op.name_check:
8546 raise errors.OpPrereqError("IP address set to auto but name checks"
8547 " have been skipped",
8549 nic_ip = self.hostname1.ip
8551 if not netutils.IPAddress.IsValid(ip):
8552 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8556 # TODO: check the ip address for uniqueness
8557 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8558 raise errors.OpPrereqError("Routed nic mode requires an ip address",
8561 # MAC address verification
8562 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8563 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8564 mac = utils.NormalizeAndValidateMac(mac)
8567 self.cfg.ReserveMAC(mac, self.proc.GetECId())
8568 except errors.ReservationError:
8569 raise errors.OpPrereqError("MAC address %s already in use"
8570 " in cluster" % mac,
8571 errors.ECODE_NOTUNIQUE)
8573 # Build nic parameters
8574 link = nic.get(constants.INIC_LINK, None)
8577 nicparams[constants.NIC_MODE] = nic_mode_req
8579 nicparams[constants.NIC_LINK] = link
8581 check_params = cluster.SimpleFillNIC(nicparams)
8582 objects.NIC.CheckParameterSyntax(check_params)
8583 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8585 # disk checks/pre-build
8586 default_vg = self.cfg.GetVGName()
8588 for disk in self.op.disks:
8589 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8590 if mode not in constants.DISK_ACCESS_SET:
8591 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8592 mode, errors.ECODE_INVAL)
8593 size = disk.get(constants.IDISK_SIZE, None)
8595 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8598 except (TypeError, ValueError):
8599 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8602 data_vg = disk.get(constants.IDISK_VG, default_vg)
8604 constants.IDISK_SIZE: size,
8605 constants.IDISK_MODE: mode,
8606 constants.IDISK_VG: data_vg,
8607 constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8609 if constants.IDISK_ADOPT in disk:
8610 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8611 self.disks.append(new_disk)
8613 if self.op.mode == constants.INSTANCE_IMPORT:
8615 # Check that the new instance doesn't have less disks than the export
8616 instance_disks = len(self.disks)
8617 export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8618 if instance_disks < export_disks:
8619 raise errors.OpPrereqError("Not enough disks to import."
8620 " (instance: %d, export: %d)" %
8621 (instance_disks, export_disks),
8625 for idx in range(export_disks):
8626 option = "disk%d_dump" % idx
8627 if export_info.has_option(constants.INISECT_INS, option):
8628 # FIXME: are the old os-es, disk sizes, etc. useful?
8629 export_name = export_info.get(constants.INISECT_INS, option)
8630 image = utils.PathJoin(self.op.src_path, export_name)
8631 disk_images.append(image)
8633 disk_images.append(False)
8635 self.src_images = disk_images
8637 old_name = export_info.get(constants.INISECT_INS, "name")
8639 exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8640 except (TypeError, ValueError), err:
8641 raise errors.OpPrereqError("Invalid export file, nic_count is not"
8642 " an integer: %s" % str(err),
8644 if self.op.instance_name == old_name:
8645 for idx, nic in enumerate(self.nics):
8646 if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8647 nic_mac_ini = "nic%d_mac" % idx
8648 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8650 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8652 # ip ping checks (we use the same ip that was resolved in ExpandNames)
8653 if self.op.ip_check:
8654 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8655 raise errors.OpPrereqError("IP %s of instance %s already in use" %
8656 (self.check_ip, self.op.instance_name),
8657 errors.ECODE_NOTUNIQUE)
8659 #### mac address generation
8660 # By generating here the mac address both the allocator and the hooks get
8661 # the real final mac address rather than the 'auto' or 'generate' value.
8662 # There is a race condition between the generation and the instance object
8663 # creation, which means that we know the mac is valid now, but we're not
8664 # sure it will be when we actually add the instance. If things go bad
8665 # adding the instance will abort because of a duplicate mac, and the
8666 # creation job will fail.
8667 for nic in self.nics:
8668 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8669 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8673 if self.op.iallocator is not None:
8674 self._RunAllocator()
8676 #### node related checks
8678 # check primary node
8679 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8680 assert self.pnode is not None, \
8681 "Cannot retrieve locked node %s" % self.op.pnode
8683 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8684 pnode.name, errors.ECODE_STATE)
8686 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8687 pnode.name, errors.ECODE_STATE)
8688 if not pnode.vm_capable:
8689 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8690 " '%s'" % pnode.name, errors.ECODE_STATE)
8692 self.secondaries = []
8694 # mirror node verification
8695 if self.op.disk_template in constants.DTS_INT_MIRROR:
8696 if self.op.snode == pnode.name:
8697 raise errors.OpPrereqError("The secondary node cannot be the"
8698 " primary node", errors.ECODE_INVAL)
8699 _CheckNodeOnline(self, self.op.snode)
8700 _CheckNodeNotDrained(self, self.op.snode)
8701 _CheckNodeVmCapable(self, self.op.snode)
8702 self.secondaries.append(self.op.snode)
8704 nodenames = [pnode.name] + self.secondaries
8706 if not self.adopt_disks:
8707 # Check lv size requirements, if not adopting
8708 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8709 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8711 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8712 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8713 disk[constants.IDISK_ADOPT])
8714 for disk in self.disks])
8715 if len(all_lvs) != len(self.disks):
8716 raise errors.OpPrereqError("Duplicate volume names given for adoption",
8718 for lv_name in all_lvs:
8720 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8721 # to ReserveLV uses the same syntax
8722 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8723 except errors.ReservationError:
8724 raise errors.OpPrereqError("LV named %s used by another instance" %
8725 lv_name, errors.ECODE_NOTUNIQUE)
8727 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8728 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8730 node_lvs = self.rpc.call_lv_list([pnode.name],
8731 vg_names.payload.keys())[pnode.name]
8732 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8733 node_lvs = node_lvs.payload
8735 delta = all_lvs.difference(node_lvs.keys())
8737 raise errors.OpPrereqError("Missing logical volume(s): %s" %
8738 utils.CommaJoin(delta),
8740 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8742 raise errors.OpPrereqError("Online logical volumes found, cannot"
8743 " adopt: %s" % utils.CommaJoin(online_lvs),
8745 # update the size of disk based on what is found
8746 for dsk in self.disks:
8747 dsk[constants.IDISK_SIZE] = \
8748 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8749 dsk[constants.IDISK_ADOPT])][0]))
8751 elif self.op.disk_template == constants.DT_BLOCK:
8752 # Normalize and de-duplicate device paths
8753 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8754 for disk in self.disks])
8755 if len(all_disks) != len(self.disks):
8756 raise errors.OpPrereqError("Duplicate disk names given for adoption",
8758 baddisks = [d for d in all_disks
8759 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8761 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8762 " cannot be adopted" %
8763 (", ".join(baddisks),
8764 constants.ADOPTABLE_BLOCKDEV_ROOT),
8767 node_disks = self.rpc.call_bdev_sizes([pnode.name],
8768 list(all_disks))[pnode.name]
8769 node_disks.Raise("Cannot get block device information from node %s" %
8771 node_disks = node_disks.payload
8772 delta = all_disks.difference(node_disks.keys())
8774 raise errors.OpPrereqError("Missing block device(s): %s" %
8775 utils.CommaJoin(delta),
8777 for dsk in self.disks:
8778 dsk[constants.IDISK_SIZE] = \
8779 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8781 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8783 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8784 # check OS parameters (remotely)
8785 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8787 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8789 # memory check on primary node
8791 _CheckNodeFreeMemory(self, self.pnode.name,
8792 "creating instance %s" % self.op.instance_name,
8793 self.be_full[constants.BE_MEMORY],
8796 self.dry_run_result = list(nodenames)
8798 def Exec(self, feedback_fn):
8799 """Create and add the instance to the cluster.
8802 instance = self.op.instance_name
8803 pnode_name = self.pnode.name
8805 ht_kind = self.op.hypervisor
8806 if ht_kind in constants.HTS_REQ_PORT:
8807 network_port = self.cfg.AllocatePort()
8811 disks = _GenerateDiskTemplate(self,
8812 self.op.disk_template,
8813 instance, pnode_name,
8816 self.instance_file_storage_dir,
8817 self.op.file_driver,
8821 iobj = objects.Instance(name=instance, os=self.op.os_type,
8822 primary_node=pnode_name,
8823 nics=self.nics, disks=disks,
8824 disk_template=self.op.disk_template,
8826 network_port=network_port,
8827 beparams=self.op.beparams,
8828 hvparams=self.op.hvparams,
8829 hypervisor=self.op.hypervisor,
8830 osparams=self.op.osparams,
8834 for tag in self.op.tags:
8837 if self.adopt_disks:
8838 if self.op.disk_template == constants.DT_PLAIN:
8839 # rename LVs to the newly-generated names; we need to construct
8840 # 'fake' LV disks with the old data, plus the new unique_id
8841 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8843 for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8844 rename_to.append(t_dsk.logical_id)
8845 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8846 self.cfg.SetDiskID(t_dsk, pnode_name)
8847 result = self.rpc.call_blockdev_rename(pnode_name,
8848 zip(tmp_disks, rename_to))
8849 result.Raise("Failed to rename adoped LVs")
8851 feedback_fn("* creating instance disks...")
8853 _CreateDisks(self, iobj)
8854 except errors.OpExecError:
8855 self.LogWarning("Device creation failed, reverting...")
8857 _RemoveDisks(self, iobj)
8859 self.cfg.ReleaseDRBDMinors(instance)
8862 feedback_fn("adding instance %s to cluster config" % instance)
8864 self.cfg.AddInstance(iobj, self.proc.GetECId())
8866 # Declare that we don't want to remove the instance lock anymore, as we've
8867 # added the instance to the config
8868 del self.remove_locks[locking.LEVEL_INSTANCE]
8870 if self.op.mode == constants.INSTANCE_IMPORT:
8871 # Release unused nodes
8872 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8875 _ReleaseLocks(self, locking.LEVEL_NODE)
8878 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8879 feedback_fn("* wiping instance disks...")
8881 _WipeDisks(self, iobj)
8882 except errors.OpExecError, err:
8883 logging.exception("Wiping disks failed")
8884 self.LogWarning("Wiping instance disks failed (%s)", err)
8888 # Something is already wrong with the disks, don't do anything else
8890 elif self.op.wait_for_sync:
8891 disk_abort = not _WaitForSync(self, iobj)
8892 elif iobj.disk_template in constants.DTS_INT_MIRROR:
8893 # make sure the disks are not degraded (still sync-ing is ok)
8894 feedback_fn("* checking mirrors status")
8895 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8900 _RemoveDisks(self, iobj)
8901 self.cfg.RemoveInstance(iobj.name)
8902 # Make sure the instance lock gets removed
8903 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8904 raise errors.OpExecError("There are some degraded disks for"
8907 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8908 if self.op.mode == constants.INSTANCE_CREATE:
8909 if not self.op.no_install:
8910 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8911 not self.op.wait_for_sync)
8913 feedback_fn("* pausing disk sync to install instance OS")
8914 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8916 for idx, success in enumerate(result.payload):
8918 logging.warn("pause-sync of instance %s for disk %d failed",
8921 feedback_fn("* running the instance OS create scripts...")
8922 # FIXME: pass debug option from opcode to backend
8923 result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8924 self.op.debug_level)
8926 feedback_fn("* resuming disk sync")
8927 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8929 for idx, success in enumerate(result.payload):
8931 logging.warn("resume-sync of instance %s for disk %d failed",
8934 result.Raise("Could not add os for instance %s"
8935 " on node %s" % (instance, pnode_name))
8937 elif self.op.mode == constants.INSTANCE_IMPORT:
8938 feedback_fn("* running the instance OS import scripts...")
8942 for idx, image in enumerate(self.src_images):
8946 # FIXME: pass debug option from opcode to backend
8947 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8948 constants.IEIO_FILE, (image, ),
8949 constants.IEIO_SCRIPT,
8950 (iobj.disks[idx], idx),
8952 transfers.append(dt)
8955 masterd.instance.TransferInstanceData(self, feedback_fn,
8956 self.op.src_node, pnode_name,
8957 self.pnode.secondary_ip,
8959 if not compat.all(import_result):
8960 self.LogWarning("Some disks for instance %s on node %s were not"
8961 " imported successfully" % (instance, pnode_name))
8963 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8964 feedback_fn("* preparing remote import...")
8965 # The source cluster will stop the instance before attempting to make a
8966 # connection. In some cases stopping an instance can take a long time,
8967 # hence the shutdown timeout is added to the connection timeout.
8968 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8969 self.op.source_shutdown_timeout)
8970 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8972 assert iobj.primary_node == self.pnode.name
8974 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8975 self.source_x509_ca,
8976 self._cds, timeouts)
8977 if not compat.all(disk_results):
8978 # TODO: Should the instance still be started, even if some disks
8979 # failed to import (valid for local imports, too)?
8980 self.LogWarning("Some disks for instance %s on node %s were not"
8981 " imported successfully" % (instance, pnode_name))
8983 # Run rename script on newly imported instance
8984 assert iobj.name == instance
8985 feedback_fn("Running rename script for %s" % instance)
8986 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8987 self.source_instance_name,
8988 self.op.debug_level)
8990 self.LogWarning("Failed to run rename script for %s on node"
8991 " %s: %s" % (instance, pnode_name, result.fail_msg))
8994 # also checked in the prereq part
8995 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8999 iobj.admin_up = True
9000 self.cfg.Update(iobj, feedback_fn)
9001 logging.info("Starting instance %s on node %s", instance, pnode_name)
9002 feedback_fn("* starting instance...")
9003 result = self.rpc.call_instance_start(pnode_name, iobj,
9005 result.Raise("Could not start instance")
9007 return list(iobj.all_nodes)
9010 class LUInstanceConsole(NoHooksLU):
9011 """Connect to an instance's console.
9013 This is somewhat special in that it returns the command line that
9014 you need to run on the master node in order to connect to the
9020 def ExpandNames(self):
9021 self._ExpandAndLockInstance()
9023 def CheckPrereq(self):
9024 """Check prerequisites.
9026 This checks that the instance is in the cluster.
9029 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9030 assert self.instance is not None, \
9031 "Cannot retrieve locked instance %s" % self.op.instance_name
9032 _CheckNodeOnline(self, self.instance.primary_node)
9034 def Exec(self, feedback_fn):
9035 """Connect to the console of an instance
9038 instance = self.instance
9039 node = instance.primary_node
9041 node_insts = self.rpc.call_instance_list([node],
9042 [instance.hypervisor])[node]
9043 node_insts.Raise("Can't get node information from %s" % node)
9045 if instance.name not in node_insts.payload:
9046 if instance.admin_up:
9047 state = constants.INSTST_ERRORDOWN
9049 state = constants.INSTST_ADMINDOWN
9050 raise errors.OpExecError("Instance %s is not running (state %s)" %
9051 (instance.name, state))
9053 logging.debug("Connecting to console of %s on %s", instance.name, node)
9055 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9058 def _GetInstanceConsole(cluster, instance):
9059 """Returns console information for an instance.
9061 @type cluster: L{objects.Cluster}
9062 @type instance: L{objects.Instance}
9066 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9067 # beparams and hvparams are passed separately, to avoid editing the
9068 # instance and then saving the defaults in the instance itself.
9069 hvparams = cluster.FillHV(instance)
9070 beparams = cluster.FillBE(instance)
9071 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9073 assert console.instance == instance.name
9074 assert console.Validate()
9076 return console.ToDict()
9079 class LUInstanceReplaceDisks(LogicalUnit):
9080 """Replace the disks of an instance.
9083 HPATH = "mirrors-replace"
9084 HTYPE = constants.HTYPE_INSTANCE
9087 def CheckArguments(self):
9088 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9091 def ExpandNames(self):
9092 self._ExpandAndLockInstance()
9094 assert locking.LEVEL_NODE not in self.needed_locks
9095 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9097 assert self.op.iallocator is None or self.op.remote_node is None, \
9098 "Conflicting options"
9100 if self.op.remote_node is not None:
9101 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9103 # Warning: do not remove the locking of the new secondary here
9104 # unless DRBD8.AddChildren is changed to work in parallel;
9105 # currently it doesn't since parallel invocations of
9106 # FindUnusedMinor will conflict
9107 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9108 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9110 self.needed_locks[locking.LEVEL_NODE] = []
9111 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9113 if self.op.iallocator is not None:
9114 # iallocator will select a new node in the same group
9115 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9117 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9118 self.op.iallocator, self.op.remote_node,
9119 self.op.disks, False, self.op.early_release)
9121 self.tasklets = [self.replacer]
9123 def DeclareLocks(self, level):
9124 if level == locking.LEVEL_NODEGROUP:
9125 assert self.op.remote_node is None
9126 assert self.op.iallocator is not None
9127 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9129 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9130 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9131 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9133 elif level == locking.LEVEL_NODE:
9134 if self.op.iallocator is not None:
9135 assert self.op.remote_node is None
9136 assert not self.needed_locks[locking.LEVEL_NODE]
9138 # Lock member nodes of all locked groups
9139 self.needed_locks[locking.LEVEL_NODE] = [node_name
9140 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9141 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9143 self._LockInstancesNodes()
9145 def BuildHooksEnv(self):
9148 This runs on the master, the primary and all the secondaries.
9151 instance = self.replacer.instance
9153 "MODE": self.op.mode,
9154 "NEW_SECONDARY": self.op.remote_node,
9155 "OLD_SECONDARY": instance.secondary_nodes[0],
9157 env.update(_BuildInstanceHookEnvByObject(self, instance))
9160 def BuildHooksNodes(self):
9161 """Build hooks nodes.
9164 instance = self.replacer.instance
9166 self.cfg.GetMasterNode(),
9167 instance.primary_node,
9169 if self.op.remote_node is not None:
9170 nl.append(self.op.remote_node)
9173 def CheckPrereq(self):
9174 """Check prerequisites.
9177 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9178 self.op.iallocator is None)
9180 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9182 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9184 return LogicalUnit.CheckPrereq(self)
9187 class TLReplaceDisks(Tasklet):
9188 """Replaces disks for an instance.
9190 Note: Locking is not within the scope of this class.
9193 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9194 disks, delay_iallocator, early_release):
9195 """Initializes this class.
9198 Tasklet.__init__(self, lu)
9201 self.instance_name = instance_name
9203 self.iallocator_name = iallocator_name
9204 self.remote_node = remote_node
9206 self.delay_iallocator = delay_iallocator
9207 self.early_release = early_release
9210 self.instance = None
9211 self.new_node = None
9212 self.target_node = None
9213 self.other_node = None
9214 self.remote_node_info = None
9215 self.node_secondary_ip = None
9218 def CheckArguments(mode, remote_node, iallocator):
9219 """Helper function for users of this class.
9222 # check for valid parameter combination
9223 if mode == constants.REPLACE_DISK_CHG:
9224 if remote_node is None and iallocator is None:
9225 raise errors.OpPrereqError("When changing the secondary either an"
9226 " iallocator script must be used or the"
9227 " new node given", errors.ECODE_INVAL)
9229 if remote_node is not None and iallocator is not None:
9230 raise errors.OpPrereqError("Give either the iallocator or the new"
9231 " secondary, not both", errors.ECODE_INVAL)
9233 elif remote_node is not None or iallocator is not None:
9234 # Not replacing the secondary
9235 raise errors.OpPrereqError("The iallocator and new node options can"
9236 " only be used when changing the"
9237 " secondary node", errors.ECODE_INVAL)
9240 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9241 """Compute a new secondary node using an IAllocator.
9244 ial = IAllocator(lu.cfg, lu.rpc,
9245 mode=constants.IALLOCATOR_MODE_RELOC,
9247 relocate_from=list(relocate_from))
9249 ial.Run(iallocator_name)
9252 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9253 " %s" % (iallocator_name, ial.info),
9256 if len(ial.result) != ial.required_nodes:
9257 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9258 " of nodes (%s), required %s" %
9260 len(ial.result), ial.required_nodes),
9263 remote_node_name = ial.result[0]
9265 lu.LogInfo("Selected new secondary for instance '%s': %s",
9266 instance_name, remote_node_name)
9268 return remote_node_name
9270 def _FindFaultyDisks(self, node_name):
9271 """Wrapper for L{_FindFaultyInstanceDisks}.
9274 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9277 def _CheckDisksActivated(self, instance):
9278 """Checks if the instance disks are activated.
9280 @param instance: The instance to check disks
9281 @return: True if they are activated, False otherwise
9284 nodes = instance.all_nodes
9286 for idx, dev in enumerate(instance.disks):
9288 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9289 self.cfg.SetDiskID(dev, node)
9291 result = self.rpc.call_blockdev_find(node, dev)
9295 elif result.fail_msg or not result.payload:
9300 def CheckPrereq(self):
9301 """Check prerequisites.
9303 This checks that the instance is in the cluster.
9306 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9307 assert instance is not None, \
9308 "Cannot retrieve locked instance %s" % self.instance_name
9310 if instance.disk_template != constants.DT_DRBD8:
9311 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9312 " instances", errors.ECODE_INVAL)
9314 if len(instance.secondary_nodes) != 1:
9315 raise errors.OpPrereqError("The instance has a strange layout,"
9316 " expected one secondary but found %d" %
9317 len(instance.secondary_nodes),
9320 if not self.delay_iallocator:
9321 self._CheckPrereq2()
9323 def _CheckPrereq2(self):
9324 """Check prerequisites, second part.
9326 This function should always be part of CheckPrereq. It was separated and is
9327 now called from Exec because during node evacuation iallocator was only
9328 called with an unmodified cluster model, not taking planned changes into
9332 instance = self.instance
9333 secondary_node = instance.secondary_nodes[0]
9335 if self.iallocator_name is None:
9336 remote_node = self.remote_node
9338 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9339 instance.name, instance.secondary_nodes)
9341 if remote_node is None:
9342 self.remote_node_info = None
9344 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9345 "Remote node '%s' is not locked" % remote_node
9347 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9348 assert self.remote_node_info is not None, \
9349 "Cannot retrieve locked node %s" % remote_node
9351 if remote_node == self.instance.primary_node:
9352 raise errors.OpPrereqError("The specified node is the primary node of"
9353 " the instance", errors.ECODE_INVAL)
9355 if remote_node == secondary_node:
9356 raise errors.OpPrereqError("The specified node is already the"
9357 " secondary node of the instance",
9360 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9361 constants.REPLACE_DISK_CHG):
9362 raise errors.OpPrereqError("Cannot specify disks to be replaced",
9365 if self.mode == constants.REPLACE_DISK_AUTO:
9366 if not self._CheckDisksActivated(instance):
9367 raise errors.OpPrereqError("Please run activate-disks on instance %s"
9368 " first" % self.instance_name,
9370 faulty_primary = self._FindFaultyDisks(instance.primary_node)
9371 faulty_secondary = self._FindFaultyDisks(secondary_node)
9373 if faulty_primary and faulty_secondary:
9374 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9375 " one node and can not be repaired"
9376 " automatically" % self.instance_name,
9380 self.disks = faulty_primary
9381 self.target_node = instance.primary_node
9382 self.other_node = secondary_node
9383 check_nodes = [self.target_node, self.other_node]
9384 elif faulty_secondary:
9385 self.disks = faulty_secondary
9386 self.target_node = secondary_node
9387 self.other_node = instance.primary_node
9388 check_nodes = [self.target_node, self.other_node]
9394 # Non-automatic modes
9395 if self.mode == constants.REPLACE_DISK_PRI:
9396 self.target_node = instance.primary_node
9397 self.other_node = secondary_node
9398 check_nodes = [self.target_node, self.other_node]
9400 elif self.mode == constants.REPLACE_DISK_SEC:
9401 self.target_node = secondary_node
9402 self.other_node = instance.primary_node
9403 check_nodes = [self.target_node, self.other_node]
9405 elif self.mode == constants.REPLACE_DISK_CHG:
9406 self.new_node = remote_node
9407 self.other_node = instance.primary_node
9408 self.target_node = secondary_node
9409 check_nodes = [self.new_node, self.other_node]
9411 _CheckNodeNotDrained(self.lu, remote_node)
9412 _CheckNodeVmCapable(self.lu, remote_node)
9414 old_node_info = self.cfg.GetNodeInfo(secondary_node)
9415 assert old_node_info is not None
9416 if old_node_info.offline and not self.early_release:
9417 # doesn't make sense to delay the release
9418 self.early_release = True
9419 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9420 " early-release mode", secondary_node)
9423 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9426 # If not specified all disks should be replaced
9428 self.disks = range(len(self.instance.disks))
9430 for node in check_nodes:
9431 _CheckNodeOnline(self.lu, node)
9433 touched_nodes = frozenset(node_name for node_name in [self.new_node,
9436 if node_name is not None)
9438 # Release unneeded node locks
9439 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9441 # Release any owned node group
9442 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9443 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9445 # Check whether disks are valid
9446 for disk_idx in self.disks:
9447 instance.FindDisk(disk_idx)
9449 # Get secondary node IP addresses
9450 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9451 in self.cfg.GetMultiNodeInfo(touched_nodes))
9453 def Exec(self, feedback_fn):
9454 """Execute disk replacement.
9456 This dispatches the disk replacement to the appropriate handler.
9459 if self.delay_iallocator:
9460 self._CheckPrereq2()
9463 # Verify owned locks before starting operation
9464 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9465 assert set(owned_nodes) == set(self.node_secondary_ip), \
9466 ("Incorrect node locks, owning %s, expected %s" %
9467 (owned_nodes, self.node_secondary_ip.keys()))
9469 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9470 assert list(owned_instances) == [self.instance_name], \
9471 "Instance '%s' not locked" % self.instance_name
9473 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9474 "Should not own any node group lock at this point"
9477 feedback_fn("No disks need replacement")
9480 feedback_fn("Replacing disk(s) %s for %s" %
9481 (utils.CommaJoin(self.disks), self.instance.name))
9483 activate_disks = (not self.instance.admin_up)
9485 # Activate the instance disks if we're replacing them on a down instance
9487 _StartInstanceDisks(self.lu, self.instance, True)
9490 # Should we replace the secondary node?
9491 if self.new_node is not None:
9492 fn = self._ExecDrbd8Secondary
9494 fn = self._ExecDrbd8DiskOnly
9496 result = fn(feedback_fn)
9498 # Deactivate the instance disks if we're replacing them on a
9501 _SafeShutdownInstanceDisks(self.lu, self.instance)
9504 # Verify owned locks
9505 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9506 nodes = frozenset(self.node_secondary_ip)
9507 assert ((self.early_release and not owned_nodes) or
9508 (not self.early_release and not (set(owned_nodes) - nodes))), \
9509 ("Not owning the correct locks, early_release=%s, owned=%r,"
9510 " nodes=%r" % (self.early_release, owned_nodes, nodes))
9514 def _CheckVolumeGroup(self, nodes):
9515 self.lu.LogInfo("Checking volume groups")
9517 vgname = self.cfg.GetVGName()
9519 # Make sure volume group exists on all involved nodes
9520 results = self.rpc.call_vg_list(nodes)
9522 raise errors.OpExecError("Can't list volume groups on the nodes")
9526 res.Raise("Error checking node %s" % node)
9527 if vgname not in res.payload:
9528 raise errors.OpExecError("Volume group '%s' not found on node %s" %
9531 def _CheckDisksExistence(self, nodes):
9532 # Check disk existence
9533 for idx, dev in enumerate(self.instance.disks):
9534 if idx not in self.disks:
9538 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9539 self.cfg.SetDiskID(dev, node)
9541 result = self.rpc.call_blockdev_find(node, dev)
9543 msg = result.fail_msg
9544 if msg or not result.payload:
9546 msg = "disk not found"
9547 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9550 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9551 for idx, dev in enumerate(self.instance.disks):
9552 if idx not in self.disks:
9555 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9558 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9560 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9561 " replace disks for instance %s" %
9562 (node_name, self.instance.name))
9564 def _CreateNewStorage(self, node_name):
9565 """Create new storage on the primary or secondary node.
9567 This is only used for same-node replaces, not for changing the
9568 secondary node, hence we don't want to modify the existing disk.
9573 for idx, dev in enumerate(self.instance.disks):
9574 if idx not in self.disks:
9577 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9579 self.cfg.SetDiskID(dev, node_name)
9581 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9582 names = _GenerateUniqueNames(self.lu, lv_names)
9584 vg_data = dev.children[0].logical_id[0]
9585 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9586 logical_id=(vg_data, names[0]))
9587 vg_meta = dev.children[1].logical_id[0]
9588 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9589 logical_id=(vg_meta, names[1]))
9591 new_lvs = [lv_data, lv_meta]
9592 old_lvs = [child.Copy() for child in dev.children]
9593 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9595 # we pass force_create=True to force the LVM creation
9596 for new_lv in new_lvs:
9597 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9598 _GetInstanceInfoText(self.instance), False)
9602 def _CheckDevices(self, node_name, iv_names):
9603 for name, (dev, _, _) in iv_names.iteritems():
9604 self.cfg.SetDiskID(dev, node_name)
9606 result = self.rpc.call_blockdev_find(node_name, dev)
9608 msg = result.fail_msg
9609 if msg or not result.payload:
9611 msg = "disk not found"
9612 raise errors.OpExecError("Can't find DRBD device %s: %s" %
9615 if result.payload.is_degraded:
9616 raise errors.OpExecError("DRBD device %s is degraded!" % name)
9618 def _RemoveOldStorage(self, node_name, iv_names):
9619 for name, (_, old_lvs, _) in iv_names.iteritems():
9620 self.lu.LogInfo("Remove logical volumes for %s" % name)
9623 self.cfg.SetDiskID(lv, node_name)
9625 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9627 self.lu.LogWarning("Can't remove old LV: %s" % msg,
9628 hint="remove unused LVs manually")
9630 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9631 """Replace a disk on the primary or secondary for DRBD 8.
9633 The algorithm for replace is quite complicated:
9635 1. for each disk to be replaced:
9637 1. create new LVs on the target node with unique names
9638 1. detach old LVs from the drbd device
9639 1. rename old LVs to name_replaced.<time_t>
9640 1. rename new LVs to old LVs
9641 1. attach the new LVs (with the old names now) to the drbd device
9643 1. wait for sync across all devices
9645 1. for each modified disk:
9647 1. remove old LVs (which have the name name_replaces.<time_t>)
9649 Failures are not very well handled.
9654 # Step: check device activation
9655 self.lu.LogStep(1, steps_total, "Check device existence")
9656 self._CheckDisksExistence([self.other_node, self.target_node])
9657 self._CheckVolumeGroup([self.target_node, self.other_node])
9659 # Step: check other node consistency
9660 self.lu.LogStep(2, steps_total, "Check peer consistency")
9661 self._CheckDisksConsistency(self.other_node,
9662 self.other_node == self.instance.primary_node,
9665 # Step: create new storage
9666 self.lu.LogStep(3, steps_total, "Allocate new storage")
9667 iv_names = self._CreateNewStorage(self.target_node)
9669 # Step: for each lv, detach+rename*2+attach
9670 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9671 for dev, old_lvs, new_lvs in iv_names.itervalues():
9672 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9674 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9676 result.Raise("Can't detach drbd from local storage on node"
9677 " %s for device %s" % (self.target_node, dev.iv_name))
9679 #cfg.Update(instance)
9681 # ok, we created the new LVs, so now we know we have the needed
9682 # storage; as such, we proceed on the target node to rename
9683 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9684 # using the assumption that logical_id == physical_id (which in
9685 # turn is the unique_id on that node)
9687 # FIXME(iustin): use a better name for the replaced LVs
9688 temp_suffix = int(time.time())
9689 ren_fn = lambda d, suff: (d.physical_id[0],
9690 d.physical_id[1] + "_replaced-%s" % suff)
9692 # Build the rename list based on what LVs exist on the node
9693 rename_old_to_new = []
9694 for to_ren in old_lvs:
9695 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9696 if not result.fail_msg and result.payload:
9698 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9700 self.lu.LogInfo("Renaming the old LVs on the target node")
9701 result = self.rpc.call_blockdev_rename(self.target_node,
9703 result.Raise("Can't rename old LVs on node %s" % self.target_node)
9705 # Now we rename the new LVs to the old LVs
9706 self.lu.LogInfo("Renaming the new LVs on the target node")
9707 rename_new_to_old = [(new, old.physical_id)
9708 for old, new in zip(old_lvs, new_lvs)]
9709 result = self.rpc.call_blockdev_rename(self.target_node,
9711 result.Raise("Can't rename new LVs on node %s" % self.target_node)
9713 # Intermediate steps of in memory modifications
9714 for old, new in zip(old_lvs, new_lvs):
9715 new.logical_id = old.logical_id
9716 self.cfg.SetDiskID(new, self.target_node)
9718 # We need to modify old_lvs so that removal later removes the
9719 # right LVs, not the newly added ones; note that old_lvs is a
9721 for disk in old_lvs:
9722 disk.logical_id = ren_fn(disk, temp_suffix)
9723 self.cfg.SetDiskID(disk, self.target_node)
9725 # Now that the new lvs have the old name, we can add them to the device
9726 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9727 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9729 msg = result.fail_msg
9731 for new_lv in new_lvs:
9732 msg2 = self.rpc.call_blockdev_remove(self.target_node,
9735 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9736 hint=("cleanup manually the unused logical"
9738 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9741 if self.early_release:
9742 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9744 self._RemoveOldStorage(self.target_node, iv_names)
9745 # WARNING: we release both node locks here, do not do other RPCs
9746 # than WaitForSync to the primary node
9747 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9748 names=[self.target_node, self.other_node])
9751 # This can fail as the old devices are degraded and _WaitForSync
9752 # does a combined result over all disks, so we don't check its return value
9753 self.lu.LogStep(cstep, steps_total, "Sync devices")
9755 _WaitForSync(self.lu, self.instance)
9757 # Check all devices manually
9758 self._CheckDevices(self.instance.primary_node, iv_names)
9760 # Step: remove old storage
9761 if not self.early_release:
9762 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9764 self._RemoveOldStorage(self.target_node, iv_names)
9766 def _ExecDrbd8Secondary(self, feedback_fn):
9767 """Replace the secondary node for DRBD 8.
9769 The algorithm for replace is quite complicated:
9770 - for all disks of the instance:
9771 - create new LVs on the new node with same names
9772 - shutdown the drbd device on the old secondary
9773 - disconnect the drbd network on the primary
9774 - create the drbd device on the new secondary
9775 - network attach the drbd on the primary, using an artifice:
9776 the drbd code for Attach() will connect to the network if it
9777 finds a device which is connected to the good local disks but
9779 - wait for sync across all devices
9780 - remove all disks from the old secondary
9782 Failures are not very well handled.
9787 # Step: check device activation
9788 self.lu.LogStep(1, steps_total, "Check device existence")
9789 self._CheckDisksExistence([self.instance.primary_node])
9790 self._CheckVolumeGroup([self.instance.primary_node])
9792 # Step: check other node consistency
9793 self.lu.LogStep(2, steps_total, "Check peer consistency")
9794 self._CheckDisksConsistency(self.instance.primary_node, True, True)
9796 # Step: create new storage
9797 self.lu.LogStep(3, steps_total, "Allocate new storage")
9798 for idx, dev in enumerate(self.instance.disks):
9799 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9800 (self.new_node, idx))
9801 # we pass force_create=True to force LVM creation
9802 for new_lv in dev.children:
9803 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9804 _GetInstanceInfoText(self.instance), False)
9806 # Step 4: dbrd minors and drbd setups changes
9807 # after this, we must manually remove the drbd minors on both the
9808 # error and the success paths
9809 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9810 minors = self.cfg.AllocateDRBDMinor([self.new_node
9811 for dev in self.instance.disks],
9813 logging.debug("Allocated minors %r", minors)
9816 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9817 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9818 (self.new_node, idx))
9819 # create new devices on new_node; note that we create two IDs:
9820 # one without port, so the drbd will be activated without
9821 # networking information on the new node at this stage, and one
9822 # with network, for the latter activation in step 4
9823 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9824 if self.instance.primary_node == o_node1:
9827 assert self.instance.primary_node == o_node2, "Three-node instance?"
9830 new_alone_id = (self.instance.primary_node, self.new_node, None,
9831 p_minor, new_minor, o_secret)
9832 new_net_id = (self.instance.primary_node, self.new_node, o_port,
9833 p_minor, new_minor, o_secret)
9835 iv_names[idx] = (dev, dev.children, new_net_id)
9836 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9838 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9839 logical_id=new_alone_id,
9840 children=dev.children,
9843 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9844 _GetInstanceInfoText(self.instance), False)
9845 except errors.GenericError:
9846 self.cfg.ReleaseDRBDMinors(self.instance.name)
9849 # We have new devices, shutdown the drbd on the old secondary
9850 for idx, dev in enumerate(self.instance.disks):
9851 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9852 self.cfg.SetDiskID(dev, self.target_node)
9853 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9855 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9856 "node: %s" % (idx, msg),
9857 hint=("Please cleanup this device manually as"
9858 " soon as possible"))
9860 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9861 result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9862 self.node_secondary_ip,
9863 self.instance.disks)\
9864 [self.instance.primary_node]
9866 msg = result.fail_msg
9868 # detaches didn't succeed (unlikely)
9869 self.cfg.ReleaseDRBDMinors(self.instance.name)
9870 raise errors.OpExecError("Can't detach the disks from the network on"
9871 " old node: %s" % (msg,))
9873 # if we managed to detach at least one, we update all the disks of
9874 # the instance to point to the new secondary
9875 self.lu.LogInfo("Updating instance configuration")
9876 for dev, _, new_logical_id in iv_names.itervalues():
9877 dev.logical_id = new_logical_id
9878 self.cfg.SetDiskID(dev, self.instance.primary_node)
9880 self.cfg.Update(self.instance, feedback_fn)
9882 # and now perform the drbd attach
9883 self.lu.LogInfo("Attaching primary drbds to new secondary"
9884 " (standalone => connected)")
9885 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9887 self.node_secondary_ip,
9888 self.instance.disks,
9891 for to_node, to_result in result.items():
9892 msg = to_result.fail_msg
9894 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9896 hint=("please do a gnt-instance info to see the"
9897 " status of disks"))
9899 if self.early_release:
9900 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9902 self._RemoveOldStorage(self.target_node, iv_names)
9903 # WARNING: we release all node locks here, do not do other RPCs
9904 # than WaitForSync to the primary node
9905 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9906 names=[self.instance.primary_node,
9911 # This can fail as the old devices are degraded and _WaitForSync
9912 # does a combined result over all disks, so we don't check its return value
9913 self.lu.LogStep(cstep, steps_total, "Sync devices")
9915 _WaitForSync(self.lu, self.instance)
9917 # Check all devices manually
9918 self._CheckDevices(self.instance.primary_node, iv_names)
9920 # Step: remove old storage
9921 if not self.early_release:
9922 self.lu.LogStep(cstep, steps_total, "Removing old storage")
9923 self._RemoveOldStorage(self.target_node, iv_names)
9926 class LURepairNodeStorage(NoHooksLU):
9927 """Repairs the volume group on a node.
9932 def CheckArguments(self):
9933 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9935 storage_type = self.op.storage_type
9937 if (constants.SO_FIX_CONSISTENCY not in
9938 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9939 raise errors.OpPrereqError("Storage units of type '%s' can not be"
9940 " repaired" % storage_type,
9943 def ExpandNames(self):
9944 self.needed_locks = {
9945 locking.LEVEL_NODE: [self.op.node_name],
9948 def _CheckFaultyDisks(self, instance, node_name):
9949 """Ensure faulty disks abort the opcode or at least warn."""
9951 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9953 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9954 " node '%s'" % (instance.name, node_name),
9956 except errors.OpPrereqError, err:
9957 if self.op.ignore_consistency:
9958 self.proc.LogWarning(str(err.args[0]))
9962 def CheckPrereq(self):
9963 """Check prerequisites.
9966 # Check whether any instance on this node has faulty disks
9967 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9968 if not inst.admin_up:
9970 check_nodes = set(inst.all_nodes)
9971 check_nodes.discard(self.op.node_name)
9972 for inst_node_name in check_nodes:
9973 self._CheckFaultyDisks(inst, inst_node_name)
9975 def Exec(self, feedback_fn):
9976 feedback_fn("Repairing storage unit '%s' on %s ..." %
9977 (self.op.name, self.op.node_name))
9979 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9980 result = self.rpc.call_storage_execute(self.op.node_name,
9981 self.op.storage_type, st_args,
9983 constants.SO_FIX_CONSISTENCY)
9984 result.Raise("Failed to repair storage unit '%s' on %s" %
9985 (self.op.name, self.op.node_name))
9988 class LUNodeEvacuate(NoHooksLU):
9989 """Evacuates instances off a list of nodes.
9994 def CheckArguments(self):
9995 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9997 def ExpandNames(self):
9998 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10000 if self.op.remote_node is not None:
10001 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10002 assert self.op.remote_node
10004 if self.op.remote_node == self.op.node_name:
10005 raise errors.OpPrereqError("Can not use evacuated node as a new"
10006 " secondary node", errors.ECODE_INVAL)
10008 if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10009 raise errors.OpPrereqError("Without the use of an iallocator only"
10010 " secondary instances can be evacuated",
10011 errors.ECODE_INVAL)
10014 self.share_locks = _ShareAll()
10015 self.needed_locks = {
10016 locking.LEVEL_INSTANCE: [],
10017 locking.LEVEL_NODEGROUP: [],
10018 locking.LEVEL_NODE: [],
10021 if self.op.remote_node is None:
10022 # Iallocator will choose any node(s) in the same group
10023 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10025 group_nodes = frozenset([self.op.remote_node])
10027 # Determine nodes to be locked
10028 self.lock_nodes = set([self.op.node_name]) | group_nodes
10030 def _DetermineInstances(self):
10031 """Builds list of instances to operate on.
10034 assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10036 if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10037 # Primary instances only
10038 inst_fn = _GetNodePrimaryInstances
10039 assert self.op.remote_node is None, \
10040 "Evacuating primary instances requires iallocator"
10041 elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10042 # Secondary instances only
10043 inst_fn = _GetNodeSecondaryInstances
10046 assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10047 inst_fn = _GetNodeInstances
10049 return inst_fn(self.cfg, self.op.node_name)
10051 def DeclareLocks(self, level):
10052 if level == locking.LEVEL_INSTANCE:
10053 # Lock instances optimistically, needs verification once node and group
10054 # locks have been acquired
10055 self.needed_locks[locking.LEVEL_INSTANCE] = \
10056 set(i.name for i in self._DetermineInstances())
10058 elif level == locking.LEVEL_NODEGROUP:
10059 # Lock node groups optimistically, needs verification once nodes have
10061 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10062 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10064 elif level == locking.LEVEL_NODE:
10065 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10067 def CheckPrereq(self):
10069 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10070 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10071 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10073 assert owned_nodes == self.lock_nodes
10075 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10076 if owned_groups != wanted_groups:
10077 raise errors.OpExecError("Node groups changed since locks were acquired,"
10078 " current groups are '%s', used to be '%s'" %
10079 (utils.CommaJoin(wanted_groups),
10080 utils.CommaJoin(owned_groups)))
10082 # Determine affected instances
10083 self.instances = self._DetermineInstances()
10084 self.instance_names = [i.name for i in self.instances]
10086 if set(self.instance_names) != owned_instances:
10087 raise errors.OpExecError("Instances on node '%s' changed since locks"
10088 " were acquired, current instances are '%s',"
10089 " used to be '%s'" %
10090 (self.op.node_name,
10091 utils.CommaJoin(self.instance_names),
10092 utils.CommaJoin(owned_instances)))
10094 if self.instance_names:
10095 self.LogInfo("Evacuating instances from node '%s': %s",
10097 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10099 self.LogInfo("No instances to evacuate from node '%s'",
10102 if self.op.remote_node is not None:
10103 for i in self.instances:
10104 if i.primary_node == self.op.remote_node:
10105 raise errors.OpPrereqError("Node %s is the primary node of"
10106 " instance %s, cannot use it as"
10108 (self.op.remote_node, i.name),
10109 errors.ECODE_INVAL)
10111 def Exec(self, feedback_fn):
10112 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10114 if not self.instance_names:
10115 # No instances to evacuate
10118 elif self.op.iallocator is not None:
10119 # TODO: Implement relocation to other group
10120 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10121 evac_mode=self.op.mode,
10122 instances=list(self.instance_names))
10124 ial.Run(self.op.iallocator)
10126 if not ial.success:
10127 raise errors.OpPrereqError("Can't compute node evacuation using"
10128 " iallocator '%s': %s" %
10129 (self.op.iallocator, ial.info),
10130 errors.ECODE_NORES)
10132 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10134 elif self.op.remote_node is not None:
10135 assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10137 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10138 remote_node=self.op.remote_node,
10140 mode=constants.REPLACE_DISK_CHG,
10141 early_release=self.op.early_release)]
10142 for instance_name in self.instance_names
10146 raise errors.ProgrammerError("No iallocator or remote node")
10148 return ResultWithJobs(jobs)
10151 def _SetOpEarlyRelease(early_release, op):
10152 """Sets C{early_release} flag on opcodes if available.
10156 op.early_release = early_release
10157 except AttributeError:
10158 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10163 def _NodeEvacDest(use_nodes, group, nodes):
10164 """Returns group or nodes depending on caller's choice.
10168 return utils.CommaJoin(nodes)
10173 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10174 """Unpacks the result of change-group and node-evacuate iallocator requests.
10176 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10177 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10179 @type lu: L{LogicalUnit}
10180 @param lu: Logical unit instance
10181 @type alloc_result: tuple/list
10182 @param alloc_result: Result from iallocator
10183 @type early_release: bool
10184 @param early_release: Whether to release locks early if possible
10185 @type use_nodes: bool
10186 @param use_nodes: Whether to display node names instead of groups
10189 (moved, failed, jobs) = alloc_result
10192 lu.LogWarning("Unable to evacuate instances %s",
10193 utils.CommaJoin("%s (%s)" % (name, reason)
10194 for (name, reason) in failed))
10197 lu.LogInfo("Instances to be moved: %s",
10198 utils.CommaJoin("%s (to %s)" %
10199 (name, _NodeEvacDest(use_nodes, group, nodes))
10200 for (name, group, nodes) in moved))
10202 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10203 map(opcodes.OpCode.LoadOpCode, ops))
10207 class LUInstanceGrowDisk(LogicalUnit):
10208 """Grow a disk of an instance.
10211 HPATH = "disk-grow"
10212 HTYPE = constants.HTYPE_INSTANCE
10215 def ExpandNames(self):
10216 self._ExpandAndLockInstance()
10217 self.needed_locks[locking.LEVEL_NODE] = []
10218 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10220 def DeclareLocks(self, level):
10221 if level == locking.LEVEL_NODE:
10222 self._LockInstancesNodes()
10224 def BuildHooksEnv(self):
10225 """Build hooks env.
10227 This runs on the master, the primary and all the secondaries.
10231 "DISK": self.op.disk,
10232 "AMOUNT": self.op.amount,
10234 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10237 def BuildHooksNodes(self):
10238 """Build hooks nodes.
10241 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10244 def CheckPrereq(self):
10245 """Check prerequisites.
10247 This checks that the instance is in the cluster.
10250 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10251 assert instance is not None, \
10252 "Cannot retrieve locked instance %s" % self.op.instance_name
10253 nodenames = list(instance.all_nodes)
10254 for node in nodenames:
10255 _CheckNodeOnline(self, node)
10257 self.instance = instance
10259 if instance.disk_template not in constants.DTS_GROWABLE:
10260 raise errors.OpPrereqError("Instance's disk layout does not support"
10261 " growing", errors.ECODE_INVAL)
10263 self.disk = instance.FindDisk(self.op.disk)
10265 if instance.disk_template not in (constants.DT_FILE,
10266 constants.DT_SHARED_FILE):
10267 # TODO: check the free disk space for file, when that feature will be
10269 _CheckNodesFreeDiskPerVG(self, nodenames,
10270 self.disk.ComputeGrowth(self.op.amount))
10272 def Exec(self, feedback_fn):
10273 """Execute disk grow.
10276 instance = self.instance
10279 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10281 raise errors.OpExecError("Cannot activate block device to grow")
10283 # First run all grow ops in dry-run mode
10284 for node in instance.all_nodes:
10285 self.cfg.SetDiskID(disk, node)
10286 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10287 result.Raise("Grow request failed to node %s" % node)
10289 # We know that (as far as we can test) operations across different
10290 # nodes will succeed, time to run it for real
10291 for node in instance.all_nodes:
10292 self.cfg.SetDiskID(disk, node)
10293 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10294 result.Raise("Grow request failed to node %s" % node)
10296 # TODO: Rewrite code to work properly
10297 # DRBD goes into sync mode for a short amount of time after executing the
10298 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10299 # calling "resize" in sync mode fails. Sleeping for a short amount of
10300 # time is a work-around.
10303 disk.RecordGrow(self.op.amount)
10304 self.cfg.Update(instance, feedback_fn)
10305 if self.op.wait_for_sync:
10306 disk_abort = not _WaitForSync(self, instance, disks=[disk])
10308 self.proc.LogWarning("Disk sync-ing has not returned a good"
10309 " status; please check the instance")
10310 if not instance.admin_up:
10311 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10312 elif not instance.admin_up:
10313 self.proc.LogWarning("Not shutting down the disk even if the instance is"
10314 " not supposed to be running because no wait for"
10315 " sync mode was requested")
10318 class LUInstanceQueryData(NoHooksLU):
10319 """Query runtime instance data.
10324 def ExpandNames(self):
10325 self.needed_locks = {}
10327 # Use locking if requested or when non-static information is wanted
10328 if not (self.op.static or self.op.use_locking):
10329 self.LogWarning("Non-static data requested, locks need to be acquired")
10330 self.op.use_locking = True
10332 if self.op.instances or not self.op.use_locking:
10333 # Expand instance names right here
10334 self.wanted_names = _GetWantedInstances(self, self.op.instances)
10336 # Will use acquired locks
10337 self.wanted_names = None
10339 if self.op.use_locking:
10340 self.share_locks = _ShareAll()
10342 if self.wanted_names is None:
10343 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10345 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10347 self.needed_locks[locking.LEVEL_NODE] = []
10348 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10350 def DeclareLocks(self, level):
10351 if self.op.use_locking and level == locking.LEVEL_NODE:
10352 self._LockInstancesNodes()
10354 def CheckPrereq(self):
10355 """Check prerequisites.
10357 This only checks the optional instance list against the existing names.
10360 if self.wanted_names is None:
10361 assert self.op.use_locking, "Locking was not used"
10362 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10364 self.wanted_instances = \
10365 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10367 def _ComputeBlockdevStatus(self, node, instance_name, dev):
10368 """Returns the status of a block device
10371 if self.op.static or not node:
10374 self.cfg.SetDiskID(dev, node)
10376 result = self.rpc.call_blockdev_find(node, dev)
10380 result.Raise("Can't compute disk status for %s" % instance_name)
10382 status = result.payload
10386 return (status.dev_path, status.major, status.minor,
10387 status.sync_percent, status.estimated_time,
10388 status.is_degraded, status.ldisk_status)
10390 def _ComputeDiskStatus(self, instance, snode, dev):
10391 """Compute block device status.
10394 if dev.dev_type in constants.LDS_DRBD:
10395 # we change the snode then (otherwise we use the one passed in)
10396 if dev.logical_id[0] == instance.primary_node:
10397 snode = dev.logical_id[1]
10399 snode = dev.logical_id[0]
10401 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10402 instance.name, dev)
10403 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10406 dev_children = map(compat.partial(self._ComputeDiskStatus,
10413 "iv_name": dev.iv_name,
10414 "dev_type": dev.dev_type,
10415 "logical_id": dev.logical_id,
10416 "physical_id": dev.physical_id,
10417 "pstatus": dev_pstatus,
10418 "sstatus": dev_sstatus,
10419 "children": dev_children,
10424 def Exec(self, feedback_fn):
10425 """Gather and return data"""
10428 cluster = self.cfg.GetClusterInfo()
10430 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10431 for i in self.wanted_instances)
10432 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10433 if self.op.static or pnode.offline:
10434 remote_state = None
10436 self.LogWarning("Primary node %s is marked offline, returning static"
10437 " information only for instance %s" %
10438 (pnode.name, instance.name))
10440 remote_info = self.rpc.call_instance_info(instance.primary_node,
10442 instance.hypervisor)
10443 remote_info.Raise("Error checking node %s" % instance.primary_node)
10444 remote_info = remote_info.payload
10445 if remote_info and "state" in remote_info:
10446 remote_state = "up"
10448 remote_state = "down"
10450 if instance.admin_up:
10451 config_state = "up"
10453 config_state = "down"
10455 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10458 result[instance.name] = {
10459 "name": instance.name,
10460 "config_state": config_state,
10461 "run_state": remote_state,
10462 "pnode": instance.primary_node,
10463 "snodes": instance.secondary_nodes,
10465 # this happens to be the same format used for hooks
10466 "nics": _NICListToTuple(self, instance.nics),
10467 "disk_template": instance.disk_template,
10469 "hypervisor": instance.hypervisor,
10470 "network_port": instance.network_port,
10471 "hv_instance": instance.hvparams,
10472 "hv_actual": cluster.FillHV(instance, skip_globals=True),
10473 "be_instance": instance.beparams,
10474 "be_actual": cluster.FillBE(instance),
10475 "os_instance": instance.osparams,
10476 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10477 "serial_no": instance.serial_no,
10478 "mtime": instance.mtime,
10479 "ctime": instance.ctime,
10480 "uuid": instance.uuid,
10486 class LUInstanceSetParams(LogicalUnit):
10487 """Modifies an instances's parameters.
10490 HPATH = "instance-modify"
10491 HTYPE = constants.HTYPE_INSTANCE
10494 def CheckArguments(self):
10495 if not (self.op.nics or self.op.disks or self.op.disk_template or
10496 self.op.hvparams or self.op.beparams or self.op.os_name):
10497 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10499 if self.op.hvparams:
10500 _CheckGlobalHvParams(self.op.hvparams)
10504 for disk_op, disk_dict in self.op.disks:
10505 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10506 if disk_op == constants.DDM_REMOVE:
10507 disk_addremove += 1
10509 elif disk_op == constants.DDM_ADD:
10510 disk_addremove += 1
10512 if not isinstance(disk_op, int):
10513 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10514 if not isinstance(disk_dict, dict):
10515 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10516 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10518 if disk_op == constants.DDM_ADD:
10519 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10520 if mode not in constants.DISK_ACCESS_SET:
10521 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10522 errors.ECODE_INVAL)
10523 size = disk_dict.get(constants.IDISK_SIZE, None)
10525 raise errors.OpPrereqError("Required disk parameter size missing",
10526 errors.ECODE_INVAL)
10529 except (TypeError, ValueError), err:
10530 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10531 str(err), errors.ECODE_INVAL)
10532 disk_dict[constants.IDISK_SIZE] = size
10534 # modification of disk
10535 if constants.IDISK_SIZE in disk_dict:
10536 raise errors.OpPrereqError("Disk size change not possible, use"
10537 " grow-disk", errors.ECODE_INVAL)
10539 if disk_addremove > 1:
10540 raise errors.OpPrereqError("Only one disk add or remove operation"
10541 " supported at a time", errors.ECODE_INVAL)
10543 if self.op.disks and self.op.disk_template is not None:
10544 raise errors.OpPrereqError("Disk template conversion and other disk"
10545 " changes not supported at the same time",
10546 errors.ECODE_INVAL)
10548 if (self.op.disk_template and
10549 self.op.disk_template in constants.DTS_INT_MIRROR and
10550 self.op.remote_node is None):
10551 raise errors.OpPrereqError("Changing the disk template to a mirrored"
10552 " one requires specifying a secondary node",
10553 errors.ECODE_INVAL)
10557 for nic_op, nic_dict in self.op.nics:
10558 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10559 if nic_op == constants.DDM_REMOVE:
10562 elif nic_op == constants.DDM_ADD:
10565 if not isinstance(nic_op, int):
10566 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10567 if not isinstance(nic_dict, dict):
10568 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10569 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10571 # nic_dict should be a dict
10572 nic_ip = nic_dict.get(constants.INIC_IP, None)
10573 if nic_ip is not None:
10574 if nic_ip.lower() == constants.VALUE_NONE:
10575 nic_dict[constants.INIC_IP] = None
10577 if not netutils.IPAddress.IsValid(nic_ip):
10578 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10579 errors.ECODE_INVAL)
10581 nic_bridge = nic_dict.get("bridge", None)
10582 nic_link = nic_dict.get(constants.INIC_LINK, None)
10583 if nic_bridge and nic_link:
10584 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10585 " at the same time", errors.ECODE_INVAL)
10586 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10587 nic_dict["bridge"] = None
10588 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10589 nic_dict[constants.INIC_LINK] = None
10591 if nic_op == constants.DDM_ADD:
10592 nic_mac = nic_dict.get(constants.INIC_MAC, None)
10593 if nic_mac is None:
10594 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10596 if constants.INIC_MAC in nic_dict:
10597 nic_mac = nic_dict[constants.INIC_MAC]
10598 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10599 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10601 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10602 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10603 " modifying an existing nic",
10604 errors.ECODE_INVAL)
10606 if nic_addremove > 1:
10607 raise errors.OpPrereqError("Only one NIC add or remove operation"
10608 " supported at a time", errors.ECODE_INVAL)
10610 def ExpandNames(self):
10611 self._ExpandAndLockInstance()
10612 self.needed_locks[locking.LEVEL_NODE] = []
10613 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10615 def DeclareLocks(self, level):
10616 if level == locking.LEVEL_NODE:
10617 self._LockInstancesNodes()
10618 if self.op.disk_template and self.op.remote_node:
10619 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10620 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10622 def BuildHooksEnv(self):
10623 """Build hooks env.
10625 This runs on the master, primary and secondaries.
10629 if constants.BE_MEMORY in self.be_new:
10630 args["memory"] = self.be_new[constants.BE_MEMORY]
10631 if constants.BE_VCPUS in self.be_new:
10632 args["vcpus"] = self.be_new[constants.BE_VCPUS]
10633 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10634 # information at all.
10637 nic_override = dict(self.op.nics)
10638 for idx, nic in enumerate(self.instance.nics):
10639 if idx in nic_override:
10640 this_nic_override = nic_override[idx]
10642 this_nic_override = {}
10643 if constants.INIC_IP in this_nic_override:
10644 ip = this_nic_override[constants.INIC_IP]
10647 if constants.INIC_MAC in this_nic_override:
10648 mac = this_nic_override[constants.INIC_MAC]
10651 if idx in self.nic_pnew:
10652 nicparams = self.nic_pnew[idx]
10654 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10655 mode = nicparams[constants.NIC_MODE]
10656 link = nicparams[constants.NIC_LINK]
10657 args["nics"].append((ip, mac, mode, link))
10658 if constants.DDM_ADD in nic_override:
10659 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10660 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10661 nicparams = self.nic_pnew[constants.DDM_ADD]
10662 mode = nicparams[constants.NIC_MODE]
10663 link = nicparams[constants.NIC_LINK]
10664 args["nics"].append((ip, mac, mode, link))
10665 elif constants.DDM_REMOVE in nic_override:
10666 del args["nics"][-1]
10668 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10669 if self.op.disk_template:
10670 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10674 def BuildHooksNodes(self):
10675 """Build hooks nodes.
10678 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10681 def CheckPrereq(self):
10682 """Check prerequisites.
10684 This only checks the instance list against the existing names.
10687 # checking the new params on the primary/secondary nodes
10689 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10690 cluster = self.cluster = self.cfg.GetClusterInfo()
10691 assert self.instance is not None, \
10692 "Cannot retrieve locked instance %s" % self.op.instance_name
10693 pnode = instance.primary_node
10694 nodelist = list(instance.all_nodes)
10697 if self.op.os_name and not self.op.force:
10698 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10699 self.op.force_variant)
10700 instance_os = self.op.os_name
10702 instance_os = instance.os
10704 if self.op.disk_template:
10705 if instance.disk_template == self.op.disk_template:
10706 raise errors.OpPrereqError("Instance already has disk template %s" %
10707 instance.disk_template, errors.ECODE_INVAL)
10709 if (instance.disk_template,
10710 self.op.disk_template) not in self._DISK_CONVERSIONS:
10711 raise errors.OpPrereqError("Unsupported disk template conversion from"
10712 " %s to %s" % (instance.disk_template,
10713 self.op.disk_template),
10714 errors.ECODE_INVAL)
10715 _CheckInstanceDown(self, instance, "cannot change disk template")
10716 if self.op.disk_template in constants.DTS_INT_MIRROR:
10717 if self.op.remote_node == pnode:
10718 raise errors.OpPrereqError("Given new secondary node %s is the same"
10719 " as the primary node of the instance" %
10720 self.op.remote_node, errors.ECODE_STATE)
10721 _CheckNodeOnline(self, self.op.remote_node)
10722 _CheckNodeNotDrained(self, self.op.remote_node)
10723 # FIXME: here we assume that the old instance type is DT_PLAIN
10724 assert instance.disk_template == constants.DT_PLAIN
10725 disks = [{constants.IDISK_SIZE: d.size,
10726 constants.IDISK_VG: d.logical_id[0]}
10727 for d in instance.disks]
10728 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10729 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10731 # hvparams processing
10732 if self.op.hvparams:
10733 hv_type = instance.hypervisor
10734 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10735 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10736 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10739 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10740 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10741 self.hv_new = hv_new # the new actual values
10742 self.hv_inst = i_hvdict # the new dict (without defaults)
10744 self.hv_new = self.hv_inst = {}
10746 # beparams processing
10747 if self.op.beparams:
10748 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10750 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10751 be_new = cluster.SimpleFillBE(i_bedict)
10752 self.be_new = be_new # the new actual values
10753 self.be_inst = i_bedict # the new dict (without defaults)
10755 self.be_new = self.be_inst = {}
10756 be_old = cluster.FillBE(instance)
10758 # osparams processing
10759 if self.op.osparams:
10760 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10761 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10762 self.os_inst = i_osdict # the new dict (without defaults)
10768 if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10769 be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10770 mem_check_list = [pnode]
10771 if be_new[constants.BE_AUTO_BALANCE]:
10772 # either we changed auto_balance to yes or it was from before
10773 mem_check_list.extend(instance.secondary_nodes)
10774 instance_info = self.rpc.call_instance_info(pnode, instance.name,
10775 instance.hypervisor)
10776 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10777 instance.hypervisor)
10778 pninfo = nodeinfo[pnode]
10779 msg = pninfo.fail_msg
10781 # Assume the primary node is unreachable and go ahead
10782 self.warn.append("Can't get info from primary node %s: %s" %
10784 elif not isinstance(pninfo.payload.get("memory_free", None), int):
10785 self.warn.append("Node data from primary node %s doesn't contain"
10786 " free memory information" % pnode)
10787 elif instance_info.fail_msg:
10788 self.warn.append("Can't get instance runtime information: %s" %
10789 instance_info.fail_msg)
10791 if instance_info.payload:
10792 current_mem = int(instance_info.payload["memory"])
10794 # Assume instance not running
10795 # (there is a slight race condition here, but it's not very probable,
10796 # and we have no other way to check)
10798 miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10799 pninfo.payload["memory_free"])
10801 raise errors.OpPrereqError("This change will prevent the instance"
10802 " from starting, due to %d MB of memory"
10803 " missing on its primary node" % miss_mem,
10804 errors.ECODE_NORES)
10806 if be_new[constants.BE_AUTO_BALANCE]:
10807 for node, nres in nodeinfo.items():
10808 if node not in instance.secondary_nodes:
10810 nres.Raise("Can't get info from secondary node %s" % node,
10811 prereq=True, ecode=errors.ECODE_STATE)
10812 if not isinstance(nres.payload.get("memory_free", None), int):
10813 raise errors.OpPrereqError("Secondary node %s didn't return free"
10814 " memory information" % node,
10815 errors.ECODE_STATE)
10816 elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10817 raise errors.OpPrereqError("This change will prevent the instance"
10818 " from failover to its secondary node"
10819 " %s, due to not enough memory" % node,
10820 errors.ECODE_STATE)
10824 self.nic_pinst = {}
10825 for nic_op, nic_dict in self.op.nics:
10826 if nic_op == constants.DDM_REMOVE:
10827 if not instance.nics:
10828 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10829 errors.ECODE_INVAL)
10831 if nic_op != constants.DDM_ADD:
10833 if not instance.nics:
10834 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10835 " no NICs" % nic_op,
10836 errors.ECODE_INVAL)
10837 if nic_op < 0 or nic_op >= len(instance.nics):
10838 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10840 (nic_op, len(instance.nics) - 1),
10841 errors.ECODE_INVAL)
10842 old_nic_params = instance.nics[nic_op].nicparams
10843 old_nic_ip = instance.nics[nic_op].ip
10845 old_nic_params = {}
10848 update_params_dict = dict([(key, nic_dict[key])
10849 for key in constants.NICS_PARAMETERS
10850 if key in nic_dict])
10852 if "bridge" in nic_dict:
10853 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10855 new_nic_params = _GetUpdatedParams(old_nic_params,
10856 update_params_dict)
10857 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10858 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10859 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10860 self.nic_pinst[nic_op] = new_nic_params
10861 self.nic_pnew[nic_op] = new_filled_nic_params
10862 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10864 if new_nic_mode == constants.NIC_MODE_BRIDGED:
10865 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10866 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10868 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10870 self.warn.append(msg)
10872 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10873 if new_nic_mode == constants.NIC_MODE_ROUTED:
10874 if constants.INIC_IP in nic_dict:
10875 nic_ip = nic_dict[constants.INIC_IP]
10877 nic_ip = old_nic_ip
10879 raise errors.OpPrereqError("Cannot set the nic ip to None"
10880 " on a routed nic", errors.ECODE_INVAL)
10881 if constants.INIC_MAC in nic_dict:
10882 nic_mac = nic_dict[constants.INIC_MAC]
10883 if nic_mac is None:
10884 raise errors.OpPrereqError("Cannot set the nic mac to None",
10885 errors.ECODE_INVAL)
10886 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10887 # otherwise generate the mac
10888 nic_dict[constants.INIC_MAC] = \
10889 self.cfg.GenerateMAC(self.proc.GetECId())
10891 # or validate/reserve the current one
10893 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10894 except errors.ReservationError:
10895 raise errors.OpPrereqError("MAC address %s already in use"
10896 " in cluster" % nic_mac,
10897 errors.ECODE_NOTUNIQUE)
10900 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10901 raise errors.OpPrereqError("Disk operations not supported for"
10902 " diskless instances",
10903 errors.ECODE_INVAL)
10904 for disk_op, _ in self.op.disks:
10905 if disk_op == constants.DDM_REMOVE:
10906 if len(instance.disks) == 1:
10907 raise errors.OpPrereqError("Cannot remove the last disk of"
10908 " an instance", errors.ECODE_INVAL)
10909 _CheckInstanceDown(self, instance, "cannot remove disks")
10911 if (disk_op == constants.DDM_ADD and
10912 len(instance.disks) >= constants.MAX_DISKS):
10913 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10914 " add more" % constants.MAX_DISKS,
10915 errors.ECODE_STATE)
10916 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10918 if disk_op < 0 or disk_op >= len(instance.disks):
10919 raise errors.OpPrereqError("Invalid disk index %s, valid values"
10921 (disk_op, len(instance.disks)),
10922 errors.ECODE_INVAL)
10926 def _ConvertPlainToDrbd(self, feedback_fn):
10927 """Converts an instance from plain to drbd.
10930 feedback_fn("Converting template to drbd")
10931 instance = self.instance
10932 pnode = instance.primary_node
10933 snode = self.op.remote_node
10935 # create a fake disk info for _GenerateDiskTemplate
10936 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10937 constants.IDISK_VG: d.logical_id[0]}
10938 for d in instance.disks]
10939 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10940 instance.name, pnode, [snode],
10941 disk_info, None, None, 0, feedback_fn)
10942 info = _GetInstanceInfoText(instance)
10943 feedback_fn("Creating aditional volumes...")
10944 # first, create the missing data and meta devices
10945 for disk in new_disks:
10946 # unfortunately this is... not too nice
10947 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10949 for child in disk.children:
10950 _CreateSingleBlockDev(self, snode, instance, child, info, True)
10951 # at this stage, all new LVs have been created, we can rename the
10953 feedback_fn("Renaming original volumes...")
10954 rename_list = [(o, n.children[0].logical_id)
10955 for (o, n) in zip(instance.disks, new_disks)]
10956 result = self.rpc.call_blockdev_rename(pnode, rename_list)
10957 result.Raise("Failed to rename original LVs")
10959 feedback_fn("Initializing DRBD devices...")
10960 # all child devices are in place, we can now create the DRBD devices
10961 for disk in new_disks:
10962 for node in [pnode, snode]:
10963 f_create = node == pnode
10964 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10966 # at this point, the instance has been modified
10967 instance.disk_template = constants.DT_DRBD8
10968 instance.disks = new_disks
10969 self.cfg.Update(instance, feedback_fn)
10971 # disks are created, waiting for sync
10972 disk_abort = not _WaitForSync(self, instance,
10973 oneshot=not self.op.wait_for_sync)
10975 raise errors.OpExecError("There are some degraded disks for"
10976 " this instance, please cleanup manually")
10978 def _ConvertDrbdToPlain(self, feedback_fn):
10979 """Converts an instance from drbd to plain.
10982 instance = self.instance
10983 assert len(instance.secondary_nodes) == 1
10984 pnode = instance.primary_node
10985 snode = instance.secondary_nodes[0]
10986 feedback_fn("Converting template to plain")
10988 old_disks = instance.disks
10989 new_disks = [d.children[0] for d in old_disks]
10991 # copy over size and mode
10992 for parent, child in zip(old_disks, new_disks):
10993 child.size = parent.size
10994 child.mode = parent.mode
10996 # update instance structure
10997 instance.disks = new_disks
10998 instance.disk_template = constants.DT_PLAIN
10999 self.cfg.Update(instance, feedback_fn)
11001 feedback_fn("Removing volumes on the secondary node...")
11002 for disk in old_disks:
11003 self.cfg.SetDiskID(disk, snode)
11004 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11006 self.LogWarning("Could not remove block device %s on node %s,"
11007 " continuing anyway: %s", disk.iv_name, snode, msg)
11009 feedback_fn("Removing unneeded volumes on the primary node...")
11010 for idx, disk in enumerate(old_disks):
11011 meta = disk.children[1]
11012 self.cfg.SetDiskID(meta, pnode)
11013 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11015 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11016 " continuing anyway: %s", idx, pnode, msg)
11018 def Exec(self, feedback_fn):
11019 """Modifies an instance.
11021 All parameters take effect only at the next restart of the instance.
11024 # Process here the warnings from CheckPrereq, as we don't have a
11025 # feedback_fn there.
11026 for warn in self.warn:
11027 feedback_fn("WARNING: %s" % warn)
11030 instance = self.instance
11032 for disk_op, disk_dict in self.op.disks:
11033 if disk_op == constants.DDM_REMOVE:
11034 # remove the last disk
11035 device = instance.disks.pop()
11036 device_idx = len(instance.disks)
11037 for node, disk in device.ComputeNodeTree(instance.primary_node):
11038 self.cfg.SetDiskID(disk, node)
11039 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11041 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11042 " continuing anyway", device_idx, node, msg)
11043 result.append(("disk/%d" % device_idx, "remove"))
11044 elif disk_op == constants.DDM_ADD:
11046 if instance.disk_template in (constants.DT_FILE,
11047 constants.DT_SHARED_FILE):
11048 file_driver, file_path = instance.disks[0].logical_id
11049 file_path = os.path.dirname(file_path)
11051 file_driver = file_path = None
11052 disk_idx_base = len(instance.disks)
11053 new_disk = _GenerateDiskTemplate(self,
11054 instance.disk_template,
11055 instance.name, instance.primary_node,
11056 instance.secondary_nodes,
11060 disk_idx_base, feedback_fn)[0]
11061 instance.disks.append(new_disk)
11062 info = _GetInstanceInfoText(instance)
11064 logging.info("Creating volume %s for instance %s",
11065 new_disk.iv_name, instance.name)
11066 # Note: this needs to be kept in sync with _CreateDisks
11068 for node in instance.all_nodes:
11069 f_create = node == instance.primary_node
11071 _CreateBlockDev(self, node, instance, new_disk,
11072 f_create, info, f_create)
11073 except errors.OpExecError, err:
11074 self.LogWarning("Failed to create volume %s (%s) on"
11076 new_disk.iv_name, new_disk, node, err)
11077 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11078 (new_disk.size, new_disk.mode)))
11080 # change a given disk
11081 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11082 result.append(("disk.mode/%d" % disk_op,
11083 disk_dict[constants.IDISK_MODE]))
11085 if self.op.disk_template:
11086 r_shut = _ShutdownInstanceDisks(self, instance)
11088 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11089 " proceed with disk template conversion")
11090 mode = (instance.disk_template, self.op.disk_template)
11092 self._DISK_CONVERSIONS[mode](self, feedback_fn)
11094 self.cfg.ReleaseDRBDMinors(instance.name)
11096 result.append(("disk_template", self.op.disk_template))
11099 for nic_op, nic_dict in self.op.nics:
11100 if nic_op == constants.DDM_REMOVE:
11101 # remove the last nic
11102 del instance.nics[-1]
11103 result.append(("nic.%d" % len(instance.nics), "remove"))
11104 elif nic_op == constants.DDM_ADD:
11105 # mac and bridge should be set, by now
11106 mac = nic_dict[constants.INIC_MAC]
11107 ip = nic_dict.get(constants.INIC_IP, None)
11108 nicparams = self.nic_pinst[constants.DDM_ADD]
11109 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11110 instance.nics.append(new_nic)
11111 result.append(("nic.%d" % (len(instance.nics) - 1),
11112 "add:mac=%s,ip=%s,mode=%s,link=%s" %
11113 (new_nic.mac, new_nic.ip,
11114 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11115 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11118 for key in (constants.INIC_MAC, constants.INIC_IP):
11119 if key in nic_dict:
11120 setattr(instance.nics[nic_op], key, nic_dict[key])
11121 if nic_op in self.nic_pinst:
11122 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11123 for key, val in nic_dict.iteritems():
11124 result.append(("nic.%s/%d" % (key, nic_op), val))
11127 if self.op.hvparams:
11128 instance.hvparams = self.hv_inst
11129 for key, val in self.op.hvparams.iteritems():
11130 result.append(("hv/%s" % key, val))
11133 if self.op.beparams:
11134 instance.beparams = self.be_inst
11135 for key, val in self.op.beparams.iteritems():
11136 result.append(("be/%s" % key, val))
11139 if self.op.os_name:
11140 instance.os = self.op.os_name
11143 if self.op.osparams:
11144 instance.osparams = self.os_inst
11145 for key, val in self.op.osparams.iteritems():
11146 result.append(("os/%s" % key, val))
11148 self.cfg.Update(instance, feedback_fn)
11152 _DISK_CONVERSIONS = {
11153 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11154 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11158 class LUInstanceChangeGroup(LogicalUnit):
11159 HPATH = "instance-change-group"
11160 HTYPE = constants.HTYPE_INSTANCE
11163 def ExpandNames(self):
11164 self.share_locks = _ShareAll()
11165 self.needed_locks = {
11166 locking.LEVEL_NODEGROUP: [],
11167 locking.LEVEL_NODE: [],
11170 self._ExpandAndLockInstance()
11172 if self.op.target_groups:
11173 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11174 self.op.target_groups)
11176 self.req_target_uuids = None
11178 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11180 def DeclareLocks(self, level):
11181 if level == locking.LEVEL_NODEGROUP:
11182 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11184 if self.req_target_uuids:
11185 lock_groups = set(self.req_target_uuids)
11187 # Lock all groups used by instance optimistically; this requires going
11188 # via the node before it's locked, requiring verification later on
11189 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11190 lock_groups.update(instance_groups)
11192 # No target groups, need to lock all of them
11193 lock_groups = locking.ALL_SET
11195 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11197 elif level == locking.LEVEL_NODE:
11198 if self.req_target_uuids:
11199 # Lock all nodes used by instances
11200 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11201 self._LockInstancesNodes()
11203 # Lock all nodes in all potential target groups
11204 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11205 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11206 member_nodes = [node_name
11207 for group in lock_groups
11208 for node_name in self.cfg.GetNodeGroup(group).members]
11209 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11211 # Lock all nodes as all groups are potential targets
11212 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11214 def CheckPrereq(self):
11215 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11216 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11217 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11219 assert (self.req_target_uuids is None or
11220 owned_groups.issuperset(self.req_target_uuids))
11221 assert owned_instances == set([self.op.instance_name])
11223 # Get instance information
11224 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11226 # Check if node groups for locked instance are still correct
11227 assert owned_nodes.issuperset(self.instance.all_nodes), \
11228 ("Instance %s's nodes changed while we kept the lock" %
11229 self.op.instance_name)
11231 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11234 if self.req_target_uuids:
11235 # User requested specific target groups
11236 self.target_uuids = self.req_target_uuids
11238 # All groups except those used by the instance are potential targets
11239 self.target_uuids = owned_groups - inst_groups
11241 conflicting_groups = self.target_uuids & inst_groups
11242 if conflicting_groups:
11243 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11244 " used by the instance '%s'" %
11245 (utils.CommaJoin(conflicting_groups),
11246 self.op.instance_name),
11247 errors.ECODE_INVAL)
11249 if not self.target_uuids:
11250 raise errors.OpPrereqError("There are no possible target groups",
11251 errors.ECODE_INVAL)
11253 def BuildHooksEnv(self):
11254 """Build hooks env.
11257 assert self.target_uuids
11260 "TARGET_GROUPS": " ".join(self.target_uuids),
11263 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11267 def BuildHooksNodes(self):
11268 """Build hooks nodes.
11271 mn = self.cfg.GetMasterNode()
11272 return ([mn], [mn])
11274 def Exec(self, feedback_fn):
11275 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11277 assert instances == [self.op.instance_name], "Instance not locked"
11279 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11280 instances=instances, target_groups=list(self.target_uuids))
11282 ial.Run(self.op.iallocator)
11284 if not ial.success:
11285 raise errors.OpPrereqError("Can't compute solution for changing group of"
11286 " instance '%s' using iallocator '%s': %s" %
11287 (self.op.instance_name, self.op.iallocator,
11289 errors.ECODE_NORES)
11291 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11293 self.LogInfo("Iallocator returned %s job(s) for changing group of"
11294 " instance '%s'", len(jobs), self.op.instance_name)
11296 return ResultWithJobs(jobs)
11299 class LUBackupQuery(NoHooksLU):
11300 """Query the exports list
11305 def ExpandNames(self):
11306 self.needed_locks = {}
11307 self.share_locks[locking.LEVEL_NODE] = 1
11308 if not self.op.nodes:
11309 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11311 self.needed_locks[locking.LEVEL_NODE] = \
11312 _GetWantedNodes(self, self.op.nodes)
11314 def Exec(self, feedback_fn):
11315 """Compute the list of all the exported system images.
11318 @return: a dictionary with the structure node->(export-list)
11319 where export-list is a list of the instances exported on
11323 self.nodes = self.owned_locks(locking.LEVEL_NODE)
11324 rpcresult = self.rpc.call_export_list(self.nodes)
11326 for node in rpcresult:
11327 if rpcresult[node].fail_msg:
11328 result[node] = False
11330 result[node] = rpcresult[node].payload
11335 class LUBackupPrepare(NoHooksLU):
11336 """Prepares an instance for an export and returns useful information.
11341 def ExpandNames(self):
11342 self._ExpandAndLockInstance()
11344 def CheckPrereq(self):
11345 """Check prerequisites.
11348 instance_name = self.op.instance_name
11350 self.instance = self.cfg.GetInstanceInfo(instance_name)
11351 assert self.instance is not None, \
11352 "Cannot retrieve locked instance %s" % self.op.instance_name
11353 _CheckNodeOnline(self, self.instance.primary_node)
11355 self._cds = _GetClusterDomainSecret()
11357 def Exec(self, feedback_fn):
11358 """Prepares an instance for an export.
11361 instance = self.instance
11363 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11364 salt = utils.GenerateSecret(8)
11366 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11367 result = self.rpc.call_x509_cert_create(instance.primary_node,
11368 constants.RIE_CERT_VALIDITY)
11369 result.Raise("Can't create X509 key and certificate on %s" % result.node)
11371 (name, cert_pem) = result.payload
11373 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11377 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11378 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11380 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11386 class LUBackupExport(LogicalUnit):
11387 """Export an instance to an image in the cluster.
11390 HPATH = "instance-export"
11391 HTYPE = constants.HTYPE_INSTANCE
11394 def CheckArguments(self):
11395 """Check the arguments.
11398 self.x509_key_name = self.op.x509_key_name
11399 self.dest_x509_ca_pem = self.op.destination_x509_ca
11401 if self.op.mode == constants.EXPORT_MODE_REMOTE:
11402 if not self.x509_key_name:
11403 raise errors.OpPrereqError("Missing X509 key name for encryption",
11404 errors.ECODE_INVAL)
11406 if not self.dest_x509_ca_pem:
11407 raise errors.OpPrereqError("Missing destination X509 CA",
11408 errors.ECODE_INVAL)
11410 def ExpandNames(self):
11411 self._ExpandAndLockInstance()
11413 # Lock all nodes for local exports
11414 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11415 # FIXME: lock only instance primary and destination node
11417 # Sad but true, for now we have do lock all nodes, as we don't know where
11418 # the previous export might be, and in this LU we search for it and
11419 # remove it from its current node. In the future we could fix this by:
11420 # - making a tasklet to search (share-lock all), then create the
11421 # new one, then one to remove, after
11422 # - removing the removal operation altogether
11423 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11425 def DeclareLocks(self, level):
11426 """Last minute lock declaration."""
11427 # All nodes are locked anyway, so nothing to do here.
11429 def BuildHooksEnv(self):
11430 """Build hooks env.
11432 This will run on the master, primary node and target node.
11436 "EXPORT_MODE": self.op.mode,
11437 "EXPORT_NODE": self.op.target_node,
11438 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11439 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11440 # TODO: Generic function for boolean env variables
11441 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11444 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11448 def BuildHooksNodes(self):
11449 """Build hooks nodes.
11452 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11454 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11455 nl.append(self.op.target_node)
11459 def CheckPrereq(self):
11460 """Check prerequisites.
11462 This checks that the instance and node names are valid.
11465 instance_name = self.op.instance_name
11467 self.instance = self.cfg.GetInstanceInfo(instance_name)
11468 assert self.instance is not None, \
11469 "Cannot retrieve locked instance %s" % self.op.instance_name
11470 _CheckNodeOnline(self, self.instance.primary_node)
11472 if (self.op.remove_instance and self.instance.admin_up and
11473 not self.op.shutdown):
11474 raise errors.OpPrereqError("Can not remove instance without shutting it"
11477 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11478 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11479 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11480 assert self.dst_node is not None
11482 _CheckNodeOnline(self, self.dst_node.name)
11483 _CheckNodeNotDrained(self, self.dst_node.name)
11486 self.dest_disk_info = None
11487 self.dest_x509_ca = None
11489 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11490 self.dst_node = None
11492 if len(self.op.target_node) != len(self.instance.disks):
11493 raise errors.OpPrereqError(("Received destination information for %s"
11494 " disks, but instance %s has %s disks") %
11495 (len(self.op.target_node), instance_name,
11496 len(self.instance.disks)),
11497 errors.ECODE_INVAL)
11499 cds = _GetClusterDomainSecret()
11501 # Check X509 key name
11503 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11504 except (TypeError, ValueError), err:
11505 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11507 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11508 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11509 errors.ECODE_INVAL)
11511 # Load and verify CA
11513 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11514 except OpenSSL.crypto.Error, err:
11515 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11516 (err, ), errors.ECODE_INVAL)
11518 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11519 if errcode is not None:
11520 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11521 (msg, ), errors.ECODE_INVAL)
11523 self.dest_x509_ca = cert
11525 # Verify target information
11527 for idx, disk_data in enumerate(self.op.target_node):
11529 (host, port, magic) = \
11530 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11531 except errors.GenericError, err:
11532 raise errors.OpPrereqError("Target info for disk %s: %s" %
11533 (idx, err), errors.ECODE_INVAL)
11535 disk_info.append((host, port, magic))
11537 assert len(disk_info) == len(self.op.target_node)
11538 self.dest_disk_info = disk_info
11541 raise errors.ProgrammerError("Unhandled export mode %r" %
11544 # instance disk type verification
11545 # TODO: Implement export support for file-based disks
11546 for disk in self.instance.disks:
11547 if disk.dev_type == constants.LD_FILE:
11548 raise errors.OpPrereqError("Export not supported for instances with"
11549 " file-based disks", errors.ECODE_INVAL)
11551 def _CleanupExports(self, feedback_fn):
11552 """Removes exports of current instance from all other nodes.
11554 If an instance in a cluster with nodes A..D was exported to node C, its
11555 exports will be removed from the nodes A, B and D.
11558 assert self.op.mode != constants.EXPORT_MODE_REMOTE
11560 nodelist = self.cfg.GetNodeList()
11561 nodelist.remove(self.dst_node.name)
11563 # on one-node clusters nodelist will be empty after the removal
11564 # if we proceed the backup would be removed because OpBackupQuery
11565 # substitutes an empty list with the full cluster node list.
11566 iname = self.instance.name
11568 feedback_fn("Removing old exports for instance %s" % iname)
11569 exportlist = self.rpc.call_export_list(nodelist)
11570 for node in exportlist:
11571 if exportlist[node].fail_msg:
11573 if iname in exportlist[node].payload:
11574 msg = self.rpc.call_export_remove(node, iname).fail_msg
11576 self.LogWarning("Could not remove older export for instance %s"
11577 " on node %s: %s", iname, node, msg)
11579 def Exec(self, feedback_fn):
11580 """Export an instance to an image in the cluster.
11583 assert self.op.mode in constants.EXPORT_MODES
11585 instance = self.instance
11586 src_node = instance.primary_node
11588 if self.op.shutdown:
11589 # shutdown the instance, but not the disks
11590 feedback_fn("Shutting down instance %s" % instance.name)
11591 result = self.rpc.call_instance_shutdown(src_node, instance,
11592 self.op.shutdown_timeout)
11593 # TODO: Maybe ignore failures if ignore_remove_failures is set
11594 result.Raise("Could not shutdown instance %s on"
11595 " node %s" % (instance.name, src_node))
11597 # set the disks ID correctly since call_instance_start needs the
11598 # correct drbd minor to create the symlinks
11599 for disk in instance.disks:
11600 self.cfg.SetDiskID(disk, src_node)
11602 activate_disks = (not instance.admin_up)
11605 # Activate the instance disks if we'exporting a stopped instance
11606 feedback_fn("Activating disks for %s" % instance.name)
11607 _StartInstanceDisks(self, instance, None)
11610 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11613 helper.CreateSnapshots()
11615 if (self.op.shutdown and instance.admin_up and
11616 not self.op.remove_instance):
11617 assert not activate_disks
11618 feedback_fn("Starting instance %s" % instance.name)
11619 result = self.rpc.call_instance_start(src_node, instance,
11621 msg = result.fail_msg
11623 feedback_fn("Failed to start instance: %s" % msg)
11624 _ShutdownInstanceDisks(self, instance)
11625 raise errors.OpExecError("Could not start instance: %s" % msg)
11627 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11628 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11629 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11630 connect_timeout = constants.RIE_CONNECT_TIMEOUT
11631 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11633 (key_name, _, _) = self.x509_key_name
11636 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11639 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11640 key_name, dest_ca_pem,
11645 # Check for backwards compatibility
11646 assert len(dresults) == len(instance.disks)
11647 assert compat.all(isinstance(i, bool) for i in dresults), \
11648 "Not all results are boolean: %r" % dresults
11652 feedback_fn("Deactivating disks for %s" % instance.name)
11653 _ShutdownInstanceDisks(self, instance)
11655 if not (compat.all(dresults) and fin_resu):
11658 failures.append("export finalization")
11659 if not compat.all(dresults):
11660 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11662 failures.append("disk export: disk(s) %s" % fdsk)
11664 raise errors.OpExecError("Export failed, errors in %s" %
11665 utils.CommaJoin(failures))
11667 # At this point, the export was successful, we can cleanup/finish
11669 # Remove instance if requested
11670 if self.op.remove_instance:
11671 feedback_fn("Removing instance %s" % instance.name)
11672 _RemoveInstance(self, feedback_fn, instance,
11673 self.op.ignore_remove_failures)
11675 if self.op.mode == constants.EXPORT_MODE_LOCAL:
11676 self._CleanupExports(feedback_fn)
11678 return fin_resu, dresults
11681 class LUBackupRemove(NoHooksLU):
11682 """Remove exports related to the named instance.
11687 def ExpandNames(self):
11688 self.needed_locks = {}
11689 # We need all nodes to be locked in order for RemoveExport to work, but we
11690 # don't need to lock the instance itself, as nothing will happen to it (and
11691 # we can remove exports also for a removed instance)
11692 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11694 def Exec(self, feedback_fn):
11695 """Remove any export.
11698 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11699 # If the instance was not found we'll try with the name that was passed in.
11700 # This will only work if it was an FQDN, though.
11702 if not instance_name:
11704 instance_name = self.op.instance_name
11706 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11707 exportlist = self.rpc.call_export_list(locked_nodes)
11709 for node in exportlist:
11710 msg = exportlist[node].fail_msg
11712 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11714 if instance_name in exportlist[node].payload:
11716 result = self.rpc.call_export_remove(node, instance_name)
11717 msg = result.fail_msg
11719 logging.error("Could not remove export for instance %s"
11720 " on node %s: %s", instance_name, node, msg)
11722 if fqdn_warn and not found:
11723 feedback_fn("Export not found. If trying to remove an export belonging"
11724 " to a deleted instance please use its Fully Qualified"
11728 class LUGroupAdd(LogicalUnit):
11729 """Logical unit for creating node groups.
11732 HPATH = "group-add"
11733 HTYPE = constants.HTYPE_GROUP
11736 def ExpandNames(self):
11737 # We need the new group's UUID here so that we can create and acquire the
11738 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11739 # that it should not check whether the UUID exists in the configuration.
11740 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11741 self.needed_locks = {}
11742 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11744 def CheckPrereq(self):
11745 """Check prerequisites.
11747 This checks that the given group name is not an existing node group
11752 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11753 except errors.OpPrereqError:
11756 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11757 " node group (UUID: %s)" %
11758 (self.op.group_name, existing_uuid),
11759 errors.ECODE_EXISTS)
11761 if self.op.ndparams:
11762 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11764 def BuildHooksEnv(self):
11765 """Build hooks env.
11769 "GROUP_NAME": self.op.group_name,
11772 def BuildHooksNodes(self):
11773 """Build hooks nodes.
11776 mn = self.cfg.GetMasterNode()
11777 return ([mn], [mn])
11779 def Exec(self, feedback_fn):
11780 """Add the node group to the cluster.
11783 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11784 uuid=self.group_uuid,
11785 alloc_policy=self.op.alloc_policy,
11786 ndparams=self.op.ndparams)
11788 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11789 del self.remove_locks[locking.LEVEL_NODEGROUP]
11792 class LUGroupAssignNodes(NoHooksLU):
11793 """Logical unit for assigning nodes to groups.
11798 def ExpandNames(self):
11799 # These raise errors.OpPrereqError on their own:
11800 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11801 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11803 # We want to lock all the affected nodes and groups. We have readily
11804 # available the list of nodes, and the *destination* group. To gather the
11805 # list of "source" groups, we need to fetch node information later on.
11806 self.needed_locks = {
11807 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11808 locking.LEVEL_NODE: self.op.nodes,
11811 def DeclareLocks(self, level):
11812 if level == locking.LEVEL_NODEGROUP:
11813 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11815 # Try to get all affected nodes' groups without having the group or node
11816 # lock yet. Needs verification later in the code flow.
11817 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11819 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11821 def CheckPrereq(self):
11822 """Check prerequisites.
11825 assert self.needed_locks[locking.LEVEL_NODEGROUP]
11826 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11827 frozenset(self.op.nodes))
11829 expected_locks = (set([self.group_uuid]) |
11830 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11831 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11832 if actual_locks != expected_locks:
11833 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11834 " current groups are '%s', used to be '%s'" %
11835 (utils.CommaJoin(expected_locks),
11836 utils.CommaJoin(actual_locks)))
11838 self.node_data = self.cfg.GetAllNodesInfo()
11839 self.group = self.cfg.GetNodeGroup(self.group_uuid)
11840 instance_data = self.cfg.GetAllInstancesInfo()
11842 if self.group is None:
11843 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11844 (self.op.group_name, self.group_uuid))
11846 (new_splits, previous_splits) = \
11847 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11848 for node in self.op.nodes],
11849 self.node_data, instance_data)
11852 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11854 if not self.op.force:
11855 raise errors.OpExecError("The following instances get split by this"
11856 " change and --force was not given: %s" %
11859 self.LogWarning("This operation will split the following instances: %s",
11862 if previous_splits:
11863 self.LogWarning("In addition, these already-split instances continue"
11864 " to be split across groups: %s",
11865 utils.CommaJoin(utils.NiceSort(previous_splits)))
11867 def Exec(self, feedback_fn):
11868 """Assign nodes to a new group.
11871 for node in self.op.nodes:
11872 self.node_data[node].group = self.group_uuid
11874 # FIXME: Depends on side-effects of modifying the result of
11875 # C{cfg.GetAllNodesInfo}
11877 self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11880 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11881 """Check for split instances after a node assignment.
11883 This method considers a series of node assignments as an atomic operation,
11884 and returns information about split instances after applying the set of
11887 In particular, it returns information about newly split instances, and
11888 instances that were already split, and remain so after the change.
11890 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11893 @type changes: list of (node_name, new_group_uuid) pairs.
11894 @param changes: list of node assignments to consider.
11895 @param node_data: a dict with data for all nodes
11896 @param instance_data: a dict with all instances to consider
11897 @rtype: a two-tuple
11898 @return: a list of instances that were previously okay and result split as a
11899 consequence of this change, and a list of instances that were previously
11900 split and this change does not fix.
11903 changed_nodes = dict((node, group) for node, group in changes
11904 if node_data[node].group != group)
11906 all_split_instances = set()
11907 previously_split_instances = set()
11909 def InstanceNodes(instance):
11910 return [instance.primary_node] + list(instance.secondary_nodes)
11912 for inst in instance_data.values():
11913 if inst.disk_template not in constants.DTS_INT_MIRROR:
11916 instance_nodes = InstanceNodes(inst)
11918 if len(set(node_data[node].group for node in instance_nodes)) > 1:
11919 previously_split_instances.add(inst.name)
11921 if len(set(changed_nodes.get(node, node_data[node].group)
11922 for node in instance_nodes)) > 1:
11923 all_split_instances.add(inst.name)
11925 return (list(all_split_instances - previously_split_instances),
11926 list(previously_split_instances & all_split_instances))
11929 class _GroupQuery(_QueryBase):
11930 FIELDS = query.GROUP_FIELDS
11932 def ExpandNames(self, lu):
11933 lu.needed_locks = {}
11935 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11936 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11939 self.wanted = [name_to_uuid[name]
11940 for name in utils.NiceSort(name_to_uuid.keys())]
11942 # Accept names to be either names or UUIDs.
11945 all_uuid = frozenset(self._all_groups.keys())
11947 for name in self.names:
11948 if name in all_uuid:
11949 self.wanted.append(name)
11950 elif name in name_to_uuid:
11951 self.wanted.append(name_to_uuid[name])
11953 missing.append(name)
11956 raise errors.OpPrereqError("Some groups do not exist: %s" %
11957 utils.CommaJoin(missing),
11958 errors.ECODE_NOENT)
11960 def DeclareLocks(self, lu, level):
11963 def _GetQueryData(self, lu):
11964 """Computes the list of node groups and their attributes.
11967 do_nodes = query.GQ_NODE in self.requested_data
11968 do_instances = query.GQ_INST in self.requested_data
11970 group_to_nodes = None
11971 group_to_instances = None
11973 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11974 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11975 # latter GetAllInstancesInfo() is not enough, for we have to go through
11976 # instance->node. Hence, we will need to process nodes even if we only need
11977 # instance information.
11978 if do_nodes or do_instances:
11979 all_nodes = lu.cfg.GetAllNodesInfo()
11980 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11983 for node in all_nodes.values():
11984 if node.group in group_to_nodes:
11985 group_to_nodes[node.group].append(node.name)
11986 node_to_group[node.name] = node.group
11989 all_instances = lu.cfg.GetAllInstancesInfo()
11990 group_to_instances = dict((uuid, []) for uuid in self.wanted)
11992 for instance in all_instances.values():
11993 node = instance.primary_node
11994 if node in node_to_group:
11995 group_to_instances[node_to_group[node]].append(instance.name)
11998 # Do not pass on node information if it was not requested.
11999 group_to_nodes = None
12001 return query.GroupQueryData([self._all_groups[uuid]
12002 for uuid in self.wanted],
12003 group_to_nodes, group_to_instances)
12006 class LUGroupQuery(NoHooksLU):
12007 """Logical unit for querying node groups.
12012 def CheckArguments(self):
12013 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12014 self.op.output_fields, False)
12016 def ExpandNames(self):
12017 self.gq.ExpandNames(self)
12019 def Exec(self, feedback_fn):
12020 return self.gq.OldStyleQuery(self)
12023 class LUGroupSetParams(LogicalUnit):
12024 """Modifies the parameters of a node group.
12027 HPATH = "group-modify"
12028 HTYPE = constants.HTYPE_GROUP
12031 def CheckArguments(self):
12034 self.op.alloc_policy,
12037 if all_changes.count(None) == len(all_changes):
12038 raise errors.OpPrereqError("Please pass at least one modification",
12039 errors.ECODE_INVAL)
12041 def ExpandNames(self):
12042 # This raises errors.OpPrereqError on its own:
12043 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12045 self.needed_locks = {
12046 locking.LEVEL_NODEGROUP: [self.group_uuid],
12049 def CheckPrereq(self):
12050 """Check prerequisites.
12053 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12055 if self.group is None:
12056 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12057 (self.op.group_name, self.group_uuid))
12059 if self.op.ndparams:
12060 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12061 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12062 self.new_ndparams = new_ndparams
12064 def BuildHooksEnv(self):
12065 """Build hooks env.
12069 "GROUP_NAME": self.op.group_name,
12070 "NEW_ALLOC_POLICY": self.op.alloc_policy,
12073 def BuildHooksNodes(self):
12074 """Build hooks nodes.
12077 mn = self.cfg.GetMasterNode()
12078 return ([mn], [mn])
12080 def Exec(self, feedback_fn):
12081 """Modifies the node group.
12086 if self.op.ndparams:
12087 self.group.ndparams = self.new_ndparams
12088 result.append(("ndparams", str(self.group.ndparams)))
12090 if self.op.alloc_policy:
12091 self.group.alloc_policy = self.op.alloc_policy
12093 self.cfg.Update(self.group, feedback_fn)
12098 class LUGroupRemove(LogicalUnit):
12099 HPATH = "group-remove"
12100 HTYPE = constants.HTYPE_GROUP
12103 def ExpandNames(self):
12104 # This will raises errors.OpPrereqError on its own:
12105 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12106 self.needed_locks = {
12107 locking.LEVEL_NODEGROUP: [self.group_uuid],
12110 def CheckPrereq(self):
12111 """Check prerequisites.
12113 This checks that the given group name exists as a node group, that is
12114 empty (i.e., contains no nodes), and that is not the last group of the
12118 # Verify that the group is empty.
12119 group_nodes = [node.name
12120 for node in self.cfg.GetAllNodesInfo().values()
12121 if node.group == self.group_uuid]
12124 raise errors.OpPrereqError("Group '%s' not empty, has the following"
12126 (self.op.group_name,
12127 utils.CommaJoin(utils.NiceSort(group_nodes))),
12128 errors.ECODE_STATE)
12130 # Verify the cluster would not be left group-less.
12131 if len(self.cfg.GetNodeGroupList()) == 1:
12132 raise errors.OpPrereqError("Group '%s' is the only group,"
12133 " cannot be removed" %
12134 self.op.group_name,
12135 errors.ECODE_STATE)
12137 def BuildHooksEnv(self):
12138 """Build hooks env.
12142 "GROUP_NAME": self.op.group_name,
12145 def BuildHooksNodes(self):
12146 """Build hooks nodes.
12149 mn = self.cfg.GetMasterNode()
12150 return ([mn], [mn])
12152 def Exec(self, feedback_fn):
12153 """Remove the node group.
12157 self.cfg.RemoveNodeGroup(self.group_uuid)
12158 except errors.ConfigurationError:
12159 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12160 (self.op.group_name, self.group_uuid))
12162 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12165 class LUGroupRename(LogicalUnit):
12166 HPATH = "group-rename"
12167 HTYPE = constants.HTYPE_GROUP
12170 def ExpandNames(self):
12171 # This raises errors.OpPrereqError on its own:
12172 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12174 self.needed_locks = {
12175 locking.LEVEL_NODEGROUP: [self.group_uuid],
12178 def CheckPrereq(self):
12179 """Check prerequisites.
12181 Ensures requested new name is not yet used.
12185 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12186 except errors.OpPrereqError:
12189 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12190 " node group (UUID: %s)" %
12191 (self.op.new_name, new_name_uuid),
12192 errors.ECODE_EXISTS)
12194 def BuildHooksEnv(self):
12195 """Build hooks env.
12199 "OLD_NAME": self.op.group_name,
12200 "NEW_NAME": self.op.new_name,
12203 def BuildHooksNodes(self):
12204 """Build hooks nodes.
12207 mn = self.cfg.GetMasterNode()
12209 all_nodes = self.cfg.GetAllNodesInfo()
12210 all_nodes.pop(mn, None)
12213 run_nodes.extend(node.name for node in all_nodes.values()
12214 if node.group == self.group_uuid)
12216 return (run_nodes, run_nodes)
12218 def Exec(self, feedback_fn):
12219 """Rename the node group.
12222 group = self.cfg.GetNodeGroup(self.group_uuid)
12225 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12226 (self.op.group_name, self.group_uuid))
12228 group.name = self.op.new_name
12229 self.cfg.Update(group, feedback_fn)
12231 return self.op.new_name
12234 class LUGroupEvacuate(LogicalUnit):
12235 HPATH = "group-evacuate"
12236 HTYPE = constants.HTYPE_GROUP
12239 def ExpandNames(self):
12240 # This raises errors.OpPrereqError on its own:
12241 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12243 if self.op.target_groups:
12244 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12245 self.op.target_groups)
12247 self.req_target_uuids = []
12249 if self.group_uuid in self.req_target_uuids:
12250 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12251 " as a target group (targets are %s)" %
12253 utils.CommaJoin(self.req_target_uuids)),
12254 errors.ECODE_INVAL)
12256 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12258 self.share_locks = _ShareAll()
12259 self.needed_locks = {
12260 locking.LEVEL_INSTANCE: [],
12261 locking.LEVEL_NODEGROUP: [],
12262 locking.LEVEL_NODE: [],
12265 def DeclareLocks(self, level):
12266 if level == locking.LEVEL_INSTANCE:
12267 assert not self.needed_locks[locking.LEVEL_INSTANCE]
12269 # Lock instances optimistically, needs verification once node and group
12270 # locks have been acquired
12271 self.needed_locks[locking.LEVEL_INSTANCE] = \
12272 self.cfg.GetNodeGroupInstances(self.group_uuid)
12274 elif level == locking.LEVEL_NODEGROUP:
12275 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12277 if self.req_target_uuids:
12278 lock_groups = set([self.group_uuid] + self.req_target_uuids)
12280 # Lock all groups used by instances optimistically; this requires going
12281 # via the node before it's locked, requiring verification later on
12282 lock_groups.update(group_uuid
12283 for instance_name in
12284 self.owned_locks(locking.LEVEL_INSTANCE)
12286 self.cfg.GetInstanceNodeGroups(instance_name))
12288 # No target groups, need to lock all of them
12289 lock_groups = locking.ALL_SET
12291 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12293 elif level == locking.LEVEL_NODE:
12294 # This will only lock the nodes in the group to be evacuated which
12295 # contain actual instances
12296 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12297 self._LockInstancesNodes()
12299 # Lock all nodes in group to be evacuated and target groups
12300 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12301 assert self.group_uuid in owned_groups
12302 member_nodes = [node_name
12303 for group in owned_groups
12304 for node_name in self.cfg.GetNodeGroup(group).members]
12305 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12307 def CheckPrereq(self):
12308 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12309 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12310 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12312 assert owned_groups.issuperset(self.req_target_uuids)
12313 assert self.group_uuid in owned_groups
12315 # Check if locked instances are still correct
12316 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12318 # Get instance information
12319 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12321 # Check if node groups for locked instances are still correct
12322 for instance_name in owned_instances:
12323 inst = self.instances[instance_name]
12324 assert owned_nodes.issuperset(inst.all_nodes), \
12325 "Instance %s's nodes changed while we kept the lock" % instance_name
12327 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12330 assert self.group_uuid in inst_groups, \
12331 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12333 if self.req_target_uuids:
12334 # User requested specific target groups
12335 self.target_uuids = self.req_target_uuids
12337 # All groups except the one to be evacuated are potential targets
12338 self.target_uuids = [group_uuid for group_uuid in owned_groups
12339 if group_uuid != self.group_uuid]
12341 if not self.target_uuids:
12342 raise errors.OpPrereqError("There are no possible target groups",
12343 errors.ECODE_INVAL)
12345 def BuildHooksEnv(self):
12346 """Build hooks env.
12350 "GROUP_NAME": self.op.group_name,
12351 "TARGET_GROUPS": " ".join(self.target_uuids),
12354 def BuildHooksNodes(self):
12355 """Build hooks nodes.
12358 mn = self.cfg.GetMasterNode()
12360 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12362 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12364 return (run_nodes, run_nodes)
12366 def Exec(self, feedback_fn):
12367 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12369 assert self.group_uuid not in self.target_uuids
12371 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12372 instances=instances, target_groups=self.target_uuids)
12374 ial.Run(self.op.iallocator)
12376 if not ial.success:
12377 raise errors.OpPrereqError("Can't compute group evacuation using"
12378 " iallocator '%s': %s" %
12379 (self.op.iallocator, ial.info),
12380 errors.ECODE_NORES)
12382 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12384 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12385 len(jobs), self.op.group_name)
12387 return ResultWithJobs(jobs)
12390 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12391 """Generic tags LU.
12393 This is an abstract class which is the parent of all the other tags LUs.
12396 def ExpandNames(self):
12397 self.group_uuid = None
12398 self.needed_locks = {}
12399 if self.op.kind == constants.TAG_NODE:
12400 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12401 self.needed_locks[locking.LEVEL_NODE] = self.op.name
12402 elif self.op.kind == constants.TAG_INSTANCE:
12403 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12404 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12405 elif self.op.kind == constants.TAG_NODEGROUP:
12406 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12408 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12409 # not possible to acquire the BGL based on opcode parameters)
12411 def CheckPrereq(self):
12412 """Check prerequisites.
12415 if self.op.kind == constants.TAG_CLUSTER:
12416 self.target = self.cfg.GetClusterInfo()
12417 elif self.op.kind == constants.TAG_NODE:
12418 self.target = self.cfg.GetNodeInfo(self.op.name)
12419 elif self.op.kind == constants.TAG_INSTANCE:
12420 self.target = self.cfg.GetInstanceInfo(self.op.name)
12421 elif self.op.kind == constants.TAG_NODEGROUP:
12422 self.target = self.cfg.GetNodeGroup(self.group_uuid)
12424 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12425 str(self.op.kind), errors.ECODE_INVAL)
12428 class LUTagsGet(TagsLU):
12429 """Returns the tags of a given object.
12434 def ExpandNames(self):
12435 TagsLU.ExpandNames(self)
12437 # Share locks as this is only a read operation
12438 self.share_locks = _ShareAll()
12440 def Exec(self, feedback_fn):
12441 """Returns the tag list.
12444 return list(self.target.GetTags())
12447 class LUTagsSearch(NoHooksLU):
12448 """Searches the tags for a given pattern.
12453 def ExpandNames(self):
12454 self.needed_locks = {}
12456 def CheckPrereq(self):
12457 """Check prerequisites.
12459 This checks the pattern passed for validity by compiling it.
12463 self.re = re.compile(self.op.pattern)
12464 except re.error, err:
12465 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12466 (self.op.pattern, err), errors.ECODE_INVAL)
12468 def Exec(self, feedback_fn):
12469 """Returns the tag list.
12473 tgts = [("/cluster", cfg.GetClusterInfo())]
12474 ilist = cfg.GetAllInstancesInfo().values()
12475 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12476 nlist = cfg.GetAllNodesInfo().values()
12477 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12478 tgts.extend(("/nodegroup/%s" % n.name, n)
12479 for n in cfg.GetAllNodeGroupsInfo().values())
12481 for path, target in tgts:
12482 for tag in target.GetTags():
12483 if self.re.search(tag):
12484 results.append((path, tag))
12488 class LUTagsSet(TagsLU):
12489 """Sets a tag on a given object.
12494 def CheckPrereq(self):
12495 """Check prerequisites.
12497 This checks the type and length of the tag name and value.
12500 TagsLU.CheckPrereq(self)
12501 for tag in self.op.tags:
12502 objects.TaggableObject.ValidateTag(tag)
12504 def Exec(self, feedback_fn):
12509 for tag in self.op.tags:
12510 self.target.AddTag(tag)
12511 except errors.TagError, err:
12512 raise errors.OpExecError("Error while setting tag: %s" % str(err))
12513 self.cfg.Update(self.target, feedback_fn)
12516 class LUTagsDel(TagsLU):
12517 """Delete a list of tags from a given object.
12522 def CheckPrereq(self):
12523 """Check prerequisites.
12525 This checks that we have the given tag.
12528 TagsLU.CheckPrereq(self)
12529 for tag in self.op.tags:
12530 objects.TaggableObject.ValidateTag(tag)
12531 del_tags = frozenset(self.op.tags)
12532 cur_tags = self.target.GetTags()
12534 diff_tags = del_tags - cur_tags
12536 diff_names = ("'%s'" % i for i in sorted(diff_tags))
12537 raise errors.OpPrereqError("Tag(s) %s not found" %
12538 (utils.CommaJoin(diff_names), ),
12539 errors.ECODE_NOENT)
12541 def Exec(self, feedback_fn):
12542 """Remove the tag from the object.
12545 for tag in self.op.tags:
12546 self.target.RemoveTag(tag)
12547 self.cfg.Update(self.target, feedback_fn)
12550 class LUTestDelay(NoHooksLU):
12551 """Sleep for a specified amount of time.
12553 This LU sleeps on the master and/or nodes for a specified amount of
12559 def ExpandNames(self):
12560 """Expand names and set required locks.
12562 This expands the node list, if any.
12565 self.needed_locks = {}
12566 if self.op.on_nodes:
12567 # _GetWantedNodes can be used here, but is not always appropriate to use
12568 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12569 # more information.
12570 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12571 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12573 def _TestDelay(self):
12574 """Do the actual sleep.
12577 if self.op.on_master:
12578 if not utils.TestDelay(self.op.duration):
12579 raise errors.OpExecError("Error during master delay test")
12580 if self.op.on_nodes:
12581 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12582 for node, node_result in result.items():
12583 node_result.Raise("Failure during rpc call to node %s" % node)
12585 def Exec(self, feedback_fn):
12586 """Execute the test delay opcode, with the wanted repetitions.
12589 if self.op.repeat == 0:
12592 top_value = self.op.repeat - 1
12593 for i in range(self.op.repeat):
12594 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12598 class LUTestJqueue(NoHooksLU):
12599 """Utility LU to test some aspects of the job queue.
12604 # Must be lower than default timeout for WaitForJobChange to see whether it
12605 # notices changed jobs
12606 _CLIENT_CONNECT_TIMEOUT = 20.0
12607 _CLIENT_CONFIRM_TIMEOUT = 60.0
12610 def _NotifyUsingSocket(cls, cb, errcls):
12611 """Opens a Unix socket and waits for another program to connect.
12614 @param cb: Callback to send socket name to client
12615 @type errcls: class
12616 @param errcls: Exception class to use for errors
12619 # Using a temporary directory as there's no easy way to create temporary
12620 # sockets without writing a custom loop around tempfile.mktemp and
12622 tmpdir = tempfile.mkdtemp()
12624 tmpsock = utils.PathJoin(tmpdir, "sock")
12626 logging.debug("Creating temporary socket at %s", tmpsock)
12627 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12632 # Send details to client
12635 # Wait for client to connect before continuing
12636 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12638 (conn, _) = sock.accept()
12639 except socket.error, err:
12640 raise errcls("Client didn't connect in time (%s)" % err)
12644 # Remove as soon as client is connected
12645 shutil.rmtree(tmpdir)
12647 # Wait for client to close
12650 # pylint: disable-msg=E1101
12651 # Instance of '_socketobject' has no ... member
12652 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12654 except socket.error, err:
12655 raise errcls("Client failed to confirm notification (%s)" % err)
12659 def _SendNotification(self, test, arg, sockname):
12660 """Sends a notification to the client.
12663 @param test: Test name
12664 @param arg: Test argument (depends on test)
12665 @type sockname: string
12666 @param sockname: Socket path
12669 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12671 def _Notify(self, prereq, test, arg):
12672 """Notifies the client of a test.
12675 @param prereq: Whether this is a prereq-phase test
12677 @param test: Test name
12678 @param arg: Test argument (depends on test)
12682 errcls = errors.OpPrereqError
12684 errcls = errors.OpExecError
12686 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12690 def CheckArguments(self):
12691 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12692 self.expandnames_calls = 0
12694 def ExpandNames(self):
12695 checkargs_calls = getattr(self, "checkargs_calls", 0)
12696 if checkargs_calls < 1:
12697 raise errors.ProgrammerError("CheckArguments was not called")
12699 self.expandnames_calls += 1
12701 if self.op.notify_waitlock:
12702 self._Notify(True, constants.JQT_EXPANDNAMES, None)
12704 self.LogInfo("Expanding names")
12706 # Get lock on master node (just to get a lock, not for a particular reason)
12707 self.needed_locks = {
12708 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12711 def Exec(self, feedback_fn):
12712 if self.expandnames_calls < 1:
12713 raise errors.ProgrammerError("ExpandNames was not called")
12715 if self.op.notify_exec:
12716 self._Notify(False, constants.JQT_EXEC, None)
12718 self.LogInfo("Executing")
12720 if self.op.log_messages:
12721 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12722 for idx, msg in enumerate(self.op.log_messages):
12723 self.LogInfo("Sending log message %s", idx + 1)
12724 feedback_fn(constants.JQT_MSGPREFIX + msg)
12725 # Report how many test messages have been sent
12726 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12729 raise errors.OpExecError("Opcode failure was requested")
12734 class IAllocator(object):
12735 """IAllocator framework.
12737 An IAllocator instance has three sets of attributes:
12738 - cfg that is needed to query the cluster
12739 - input data (all members of the _KEYS class attribute are required)
12740 - four buffer attributes (in|out_data|text), that represent the
12741 input (to the external script) in text and data structure format,
12742 and the output from it, again in two formats
12743 - the result variables from the script (success, info, nodes) for
12747 # pylint: disable-msg=R0902
12748 # lots of instance attributes
12750 def __init__(self, cfg, rpc, mode, **kwargs):
12753 # init buffer variables
12754 self.in_text = self.out_text = self.in_data = self.out_data = None
12755 # init all input fields so that pylint is happy
12757 self.memory = self.disks = self.disk_template = None
12758 self.os = self.tags = self.nics = self.vcpus = None
12759 self.hypervisor = None
12760 self.relocate_from = None
12762 self.instances = None
12763 self.evac_mode = None
12764 self.target_groups = []
12766 self.required_nodes = None
12767 # init result fields
12768 self.success = self.info = self.result = None
12771 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12773 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12774 " IAllocator" % self.mode)
12776 keyset = [n for (n, _) in keydata]
12779 if key not in keyset:
12780 raise errors.ProgrammerError("Invalid input parameter '%s' to"
12781 " IAllocator" % key)
12782 setattr(self, key, kwargs[key])
12785 if key not in kwargs:
12786 raise errors.ProgrammerError("Missing input parameter '%s' to"
12787 " IAllocator" % key)
12788 self._BuildInputData(compat.partial(fn, self), keydata)
12790 def _ComputeClusterData(self):
12791 """Compute the generic allocator input data.
12793 This is the data that is independent of the actual operation.
12797 cluster_info = cfg.GetClusterInfo()
12800 "version": constants.IALLOCATOR_VERSION,
12801 "cluster_name": cfg.GetClusterName(),
12802 "cluster_tags": list(cluster_info.GetTags()),
12803 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12804 # we don't have job IDs
12806 ninfo = cfg.GetAllNodesInfo()
12807 iinfo = cfg.GetAllInstancesInfo().values()
12808 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12811 node_list = [n.name for n in ninfo.values() if n.vm_capable]
12813 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12814 hypervisor_name = self.hypervisor
12815 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12816 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12818 hypervisor_name = cluster_info.enabled_hypervisors[0]
12820 node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12823 self.rpc.call_all_instances_info(node_list,
12824 cluster_info.enabled_hypervisors)
12826 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12828 config_ndata = self._ComputeBasicNodeData(ninfo)
12829 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12830 i_list, config_ndata)
12831 assert len(data["nodes"]) == len(ninfo), \
12832 "Incomplete node data computed"
12834 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12836 self.in_data = data
12839 def _ComputeNodeGroupData(cfg):
12840 """Compute node groups data.
12843 ng = dict((guuid, {
12844 "name": gdata.name,
12845 "alloc_policy": gdata.alloc_policy,
12847 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12852 def _ComputeBasicNodeData(node_cfg):
12853 """Compute global node data.
12856 @returns: a dict of name: (node dict, node config)
12859 # fill in static (config-based) values
12860 node_results = dict((ninfo.name, {
12861 "tags": list(ninfo.GetTags()),
12862 "primary_ip": ninfo.primary_ip,
12863 "secondary_ip": ninfo.secondary_ip,
12864 "offline": ninfo.offline,
12865 "drained": ninfo.drained,
12866 "master_candidate": ninfo.master_candidate,
12867 "group": ninfo.group,
12868 "master_capable": ninfo.master_capable,
12869 "vm_capable": ninfo.vm_capable,
12871 for ninfo in node_cfg.values())
12873 return node_results
12876 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12878 """Compute global node data.
12880 @param node_results: the basic node structures as filled from the config
12883 # make a copy of the current dict
12884 node_results = dict(node_results)
12885 for nname, nresult in node_data.items():
12886 assert nname in node_results, "Missing basic data for node %s" % nname
12887 ninfo = node_cfg[nname]
12889 if not (ninfo.offline or ninfo.drained):
12890 nresult.Raise("Can't get data for node %s" % nname)
12891 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12893 remote_info = nresult.payload
12895 for attr in ["memory_total", "memory_free", "memory_dom0",
12896 "vg_size", "vg_free", "cpu_total"]:
12897 if attr not in remote_info:
12898 raise errors.OpExecError("Node '%s' didn't return attribute"
12899 " '%s'" % (nname, attr))
12900 if not isinstance(remote_info[attr], int):
12901 raise errors.OpExecError("Node '%s' returned invalid value"
12903 (nname, attr, remote_info[attr]))
12904 # compute memory used by primary instances
12905 i_p_mem = i_p_up_mem = 0
12906 for iinfo, beinfo in i_list:
12907 if iinfo.primary_node == nname:
12908 i_p_mem += beinfo[constants.BE_MEMORY]
12909 if iinfo.name not in node_iinfo[nname].payload:
12912 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12913 i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12914 remote_info["memory_free"] -= max(0, i_mem_diff)
12917 i_p_up_mem += beinfo[constants.BE_MEMORY]
12919 # compute memory used by instances
12921 "total_memory": remote_info["memory_total"],
12922 "reserved_memory": remote_info["memory_dom0"],
12923 "free_memory": remote_info["memory_free"],
12924 "total_disk": remote_info["vg_size"],
12925 "free_disk": remote_info["vg_free"],
12926 "total_cpus": remote_info["cpu_total"],
12927 "i_pri_memory": i_p_mem,
12928 "i_pri_up_memory": i_p_up_mem,
12930 pnr_dyn.update(node_results[nname])
12931 node_results[nname] = pnr_dyn
12933 return node_results
12936 def _ComputeInstanceData(cluster_info, i_list):
12937 """Compute global instance data.
12941 for iinfo, beinfo in i_list:
12943 for nic in iinfo.nics:
12944 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12948 "mode": filled_params[constants.NIC_MODE],
12949 "link": filled_params[constants.NIC_LINK],
12951 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12952 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12953 nic_data.append(nic_dict)
12955 "tags": list(iinfo.GetTags()),
12956 "admin_up": iinfo.admin_up,
12957 "vcpus": beinfo[constants.BE_VCPUS],
12958 "memory": beinfo[constants.BE_MEMORY],
12960 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12962 "disks": [{constants.IDISK_SIZE: dsk.size,
12963 constants.IDISK_MODE: dsk.mode}
12964 for dsk in iinfo.disks],
12965 "disk_template": iinfo.disk_template,
12966 "hypervisor": iinfo.hypervisor,
12968 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12970 instance_data[iinfo.name] = pir
12972 return instance_data
12974 def _AddNewInstance(self):
12975 """Add new instance data to allocator structure.
12977 This in combination with _AllocatorGetClusterData will create the
12978 correct structure needed as input for the allocator.
12980 The checks for the completeness of the opcode must have already been
12984 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12986 if self.disk_template in constants.DTS_INT_MIRROR:
12987 self.required_nodes = 2
12989 self.required_nodes = 1
12993 "disk_template": self.disk_template,
12996 "vcpus": self.vcpus,
12997 "memory": self.memory,
12998 "disks": self.disks,
12999 "disk_space_total": disk_space,
13001 "required_nodes": self.required_nodes,
13002 "hypervisor": self.hypervisor,
13007 def _AddRelocateInstance(self):
13008 """Add relocate instance data to allocator structure.
13010 This in combination with _IAllocatorGetClusterData will create the
13011 correct structure needed as input for the allocator.
13013 The checks for the completeness of the opcode must have already been
13017 instance = self.cfg.GetInstanceInfo(self.name)
13018 if instance is None:
13019 raise errors.ProgrammerError("Unknown instance '%s' passed to"
13020 " IAllocator" % self.name)
13022 if instance.disk_template not in constants.DTS_MIRRORED:
13023 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13024 errors.ECODE_INVAL)
13026 if instance.disk_template in constants.DTS_INT_MIRROR and \
13027 len(instance.secondary_nodes) != 1:
13028 raise errors.OpPrereqError("Instance has not exactly one secondary node",
13029 errors.ECODE_STATE)
13031 self.required_nodes = 1
13032 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13033 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13037 "disk_space_total": disk_space,
13038 "required_nodes": self.required_nodes,
13039 "relocate_from": self.relocate_from,
13043 def _AddNodeEvacuate(self):
13044 """Get data for node-evacuate requests.
13048 "instances": self.instances,
13049 "evac_mode": self.evac_mode,
13052 def _AddChangeGroup(self):
13053 """Get data for node-evacuate requests.
13057 "instances": self.instances,
13058 "target_groups": self.target_groups,
13061 def _BuildInputData(self, fn, keydata):
13062 """Build input data structures.
13065 self._ComputeClusterData()
13068 request["type"] = self.mode
13069 for keyname, keytype in keydata:
13070 if keyname not in request:
13071 raise errors.ProgrammerError("Request parameter %s is missing" %
13073 val = request[keyname]
13074 if not keytype(val):
13075 raise errors.ProgrammerError("Request parameter %s doesn't pass"
13076 " validation, value %s, expected"
13077 " type %s" % (keyname, val, keytype))
13078 self.in_data["request"] = request
13080 self.in_text = serializer.Dump(self.in_data)
13082 _STRING_LIST = ht.TListOf(ht.TString)
13083 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13084 # pylint: disable-msg=E1101
13085 # Class '...' has no 'OP_ID' member
13086 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13087 opcodes.OpInstanceMigrate.OP_ID,
13088 opcodes.OpInstanceReplaceDisks.OP_ID])
13092 ht.TListOf(ht.TAnd(ht.TIsLength(3),
13093 ht.TItems([ht.TNonEmptyString,
13094 ht.TNonEmptyString,
13095 ht.TListOf(ht.TNonEmptyString),
13098 ht.TListOf(ht.TAnd(ht.TIsLength(2),
13099 ht.TItems([ht.TNonEmptyString,
13102 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13103 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13106 constants.IALLOCATOR_MODE_ALLOC:
13109 ("name", ht.TString),
13110 ("memory", ht.TInt),
13111 ("disks", ht.TListOf(ht.TDict)),
13112 ("disk_template", ht.TString),
13113 ("os", ht.TString),
13114 ("tags", _STRING_LIST),
13115 ("nics", ht.TListOf(ht.TDict)),
13116 ("vcpus", ht.TInt),
13117 ("hypervisor", ht.TString),
13119 constants.IALLOCATOR_MODE_RELOC:
13120 (_AddRelocateInstance,
13121 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13123 constants.IALLOCATOR_MODE_NODE_EVAC:
13124 (_AddNodeEvacuate, [
13125 ("instances", _STRING_LIST),
13126 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13128 constants.IALLOCATOR_MODE_CHG_GROUP:
13129 (_AddChangeGroup, [
13130 ("instances", _STRING_LIST),
13131 ("target_groups", _STRING_LIST),
13135 def Run(self, name, validate=True, call_fn=None):
13136 """Run an instance allocator and return the results.
13139 if call_fn is None:
13140 call_fn = self.rpc.call_iallocator_runner
13142 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13143 result.Raise("Failure while running the iallocator script")
13145 self.out_text = result.payload
13147 self._ValidateResult()
13149 def _ValidateResult(self):
13150 """Process the allocator results.
13152 This will process and if successful save the result in
13153 self.out_data and the other parameters.
13157 rdict = serializer.Load(self.out_text)
13158 except Exception, err:
13159 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13161 if not isinstance(rdict, dict):
13162 raise errors.OpExecError("Can't parse iallocator results: not a dict")
13164 # TODO: remove backwards compatiblity in later versions
13165 if "nodes" in rdict and "result" not in rdict:
13166 rdict["result"] = rdict["nodes"]
13169 for key in "success", "info", "result":
13170 if key not in rdict:
13171 raise errors.OpExecError("Can't parse iallocator results:"
13172 " missing key '%s'" % key)
13173 setattr(self, key, rdict[key])
13175 if not self._result_check(self.result):
13176 raise errors.OpExecError("Iallocator returned invalid result,"
13177 " expected %s, got %s" %
13178 (self._result_check, self.result),
13179 errors.ECODE_INVAL)
13181 if self.mode == constants.IALLOCATOR_MODE_RELOC:
13182 assert self.relocate_from is not None
13183 assert self.required_nodes == 1
13185 node2group = dict((name, ndata["group"])
13186 for (name, ndata) in self.in_data["nodes"].items())
13188 fn = compat.partial(self._NodesToGroups, node2group,
13189 self.in_data["nodegroups"])
13191 request_groups = fn(self.relocate_from)
13192 result_groups = fn(rdict["result"])
13194 if self.success and result_groups != request_groups:
13195 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13196 " differ from original groups (%s)" %
13197 (utils.CommaJoin(result_groups),
13198 utils.CommaJoin(request_groups)))
13200 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13201 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13203 self.out_data = rdict
13206 def _NodesToGroups(node2group, groups, nodes):
13207 """Returns a list of unique group names for a list of nodes.
13209 @type node2group: dict
13210 @param node2group: Map from node name to group UUID
13212 @param groups: Group information
13214 @param nodes: Node names
13221 group_uuid = node2group[node]
13223 # Ignore unknown node
13227 group = groups[group_uuid]
13229 # Can't find group, let's use UUID
13230 group_name = group_uuid
13232 group_name = group["name"]
13234 result.add(group_name)
13236 return sorted(result)
13239 class LUTestAllocator(NoHooksLU):
13240 """Run allocator tests.
13242 This LU runs the allocator tests
13245 def CheckPrereq(self):
13246 """Check prerequisites.
13248 This checks the opcode parameters depending on the director and mode test.
13251 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13252 for attr in ["memory", "disks", "disk_template",
13253 "os", "tags", "nics", "vcpus"]:
13254 if not hasattr(self.op, attr):
13255 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13256 attr, errors.ECODE_INVAL)
13257 iname = self.cfg.ExpandInstanceName(self.op.name)
13258 if iname is not None:
13259 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13260 iname, errors.ECODE_EXISTS)
13261 if not isinstance(self.op.nics, list):
13262 raise errors.OpPrereqError("Invalid parameter 'nics'",
13263 errors.ECODE_INVAL)
13264 if not isinstance(self.op.disks, list):
13265 raise errors.OpPrereqError("Invalid parameter 'disks'",
13266 errors.ECODE_INVAL)
13267 for row in self.op.disks:
13268 if (not isinstance(row, dict) or
13269 constants.IDISK_SIZE not in row or
13270 not isinstance(row[constants.IDISK_SIZE], int) or
13271 constants.IDISK_MODE not in row or
13272 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13273 raise errors.OpPrereqError("Invalid contents of the 'disks'"
13274 " parameter", errors.ECODE_INVAL)
13275 if self.op.hypervisor is None:
13276 self.op.hypervisor = self.cfg.GetHypervisorType()
13277 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13278 fname = _ExpandInstanceName(self.cfg, self.op.name)
13279 self.op.name = fname
13280 self.relocate_from = \
13281 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13282 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13283 constants.IALLOCATOR_MODE_NODE_EVAC):
13284 if not self.op.instances:
13285 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13286 self.op.instances = _GetWantedInstances(self, self.op.instances)
13288 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13289 self.op.mode, errors.ECODE_INVAL)
13291 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13292 if self.op.allocator is None:
13293 raise errors.OpPrereqError("Missing allocator name",
13294 errors.ECODE_INVAL)
13295 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13296 raise errors.OpPrereqError("Wrong allocator test '%s'" %
13297 self.op.direction, errors.ECODE_INVAL)
13299 def Exec(self, feedback_fn):
13300 """Run the allocator test.
13303 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13304 ial = IAllocator(self.cfg, self.rpc,
13307 memory=self.op.memory,
13308 disks=self.op.disks,
13309 disk_template=self.op.disk_template,
13313 vcpus=self.op.vcpus,
13314 hypervisor=self.op.hypervisor,
13316 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13317 ial = IAllocator(self.cfg, self.rpc,
13320 relocate_from=list(self.relocate_from),
13322 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13323 ial = IAllocator(self.cfg, self.rpc,
13325 instances=self.op.instances,
13326 target_groups=self.op.target_groups)
13327 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13328 ial = IAllocator(self.cfg, self.rpc,
13330 instances=self.op.instances,
13331 evac_mode=self.op.evac_mode)
13333 raise errors.ProgrammerError("Uncatched mode %s in"
13334 " LUTestAllocator.Exec", self.op.mode)
13336 if self.op.direction == constants.IALLOCATOR_DIR_IN:
13337 result = ial.in_text
13339 ial.Run(self.op.allocator, validate=False)
13340 result = ial.out_text
13344 #: Query type implementations
13346 constants.QR_INSTANCE: _InstanceQuery,
13347 constants.QR_NODE: _NodeQuery,
13348 constants.QR_GROUP: _GroupQuery,
13349 constants.QR_OS: _OsQuery,
13352 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13355 def _GetQueryImplementation(name):
13356 """Returns the implemtnation for a query type.
13358 @param name: Query type, must be one of L{constants.QR_VIA_OP}
13362 return _QUERY_IMPL[name]
13364 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13365 errors.ECODE_INVAL)