4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _ExpandItemName(fn, name, kind):
1020 """Expand an item name.
1022 @param fn: the function to use for expansion
1023 @param name: requested item name
1024 @param kind: text description ('Node' or 'Instance')
1025 @return: the resolved (full) name
1026 @raise errors.OpPrereqError: if the item is not found
1029 full_name = fn(name)
1030 if full_name is None:
1031 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1036 def _ExpandNodeName(cfg, name):
1037 """Wrapper over L{_ExpandItemName} for nodes."""
1038 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1041 def _ExpandInstanceName(cfg, name):
1042 """Wrapper over L{_ExpandItemName} for instance."""
1043 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1046 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1047 minmem, maxmem, vcpus, nics, disk_template, disks,
1048 bep, hvp, hypervisor_name, tags):
1049 """Builds instance related env variables for hooks
1051 This builds the hook environment from individual variables.
1054 @param name: the name of the instance
1055 @type primary_node: string
1056 @param primary_node: the name of the instance's primary node
1057 @type secondary_nodes: list
1058 @param secondary_nodes: list of secondary nodes as strings
1059 @type os_type: string
1060 @param os_type: the name of the instance's OS
1061 @type status: string
1062 @param status: the desired status of the instance
1063 @type minmem: string
1064 @param minmem: the minimum memory size of the instance
1065 @type maxmem: string
1066 @param maxmem: the maximum memory size of the instance
1068 @param vcpus: the count of VCPUs the instance has
1070 @param nics: list of tuples (ip, mac, mode, link) representing
1071 the NICs the instance has
1072 @type disk_template: string
1073 @param disk_template: the disk template of the instance
1075 @param disks: the list of (size, mode) pairs
1077 @param bep: the backend parameters for the instance
1079 @param hvp: the hypervisor parameters for the instance
1080 @type hypervisor_name: string
1081 @param hypervisor_name: the hypervisor for the instance
1083 @param tags: list of instance tags as strings
1085 @return: the hook environment for this instance
1090 "INSTANCE_NAME": name,
1091 "INSTANCE_PRIMARY": primary_node,
1092 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1093 "INSTANCE_OS_TYPE": os_type,
1094 "INSTANCE_STATUS": status,
1095 "INSTANCE_MINMEM": minmem,
1096 "INSTANCE_MAXMEM": maxmem,
1097 # TODO(2.7) remove deprecated "memory" value
1098 "INSTANCE_MEMORY": maxmem,
1099 "INSTANCE_VCPUS": vcpus,
1100 "INSTANCE_DISK_TEMPLATE": disk_template,
1101 "INSTANCE_HYPERVISOR": hypervisor_name,
1104 nic_count = len(nics)
1105 for idx, (ip, mac, mode, link) in enumerate(nics):
1108 env["INSTANCE_NIC%d_IP" % idx] = ip
1109 env["INSTANCE_NIC%d_MAC" % idx] = mac
1110 env["INSTANCE_NIC%d_MODE" % idx] = mode
1111 env["INSTANCE_NIC%d_LINK" % idx] = link
1112 if mode == constants.NIC_MODE_BRIDGED:
1113 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1117 env["INSTANCE_NIC_COUNT"] = nic_count
1120 disk_count = len(disks)
1121 for idx, (size, mode) in enumerate(disks):
1122 env["INSTANCE_DISK%d_SIZE" % idx] = size
1123 env["INSTANCE_DISK%d_MODE" % idx] = mode
1127 env["INSTANCE_DISK_COUNT"] = disk_count
1132 env["INSTANCE_TAGS"] = " ".join(tags)
1134 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1135 for key, value in source.items():
1136 env["INSTANCE_%s_%s" % (kind, key)] = value
1141 def _NICListToTuple(lu, nics):
1142 """Build a list of nic information tuples.
1144 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1145 value in LUInstanceQueryData.
1147 @type lu: L{LogicalUnit}
1148 @param lu: the logical unit on whose behalf we execute
1149 @type nics: list of L{objects.NIC}
1150 @param nics: list of nics to convert to hooks tuples
1154 cluster = lu.cfg.GetClusterInfo()
1158 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1159 mode = filled_params[constants.NIC_MODE]
1160 link = filled_params[constants.NIC_LINK]
1161 hooks_nics.append((ip, mac, mode, link))
1165 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1166 """Builds instance related env variables for hooks from an object.
1168 @type lu: L{LogicalUnit}
1169 @param lu: the logical unit on whose behalf we execute
1170 @type instance: L{objects.Instance}
1171 @param instance: the instance for which we should build the
1173 @type override: dict
1174 @param override: dictionary with key/values that will override
1177 @return: the hook environment dictionary
1180 cluster = lu.cfg.GetClusterInfo()
1181 bep = cluster.FillBE(instance)
1182 hvp = cluster.FillHV(instance)
1184 "name": instance.name,
1185 "primary_node": instance.primary_node,
1186 "secondary_nodes": instance.secondary_nodes,
1187 "os_type": instance.os,
1188 "status": instance.admin_state,
1189 "maxmem": bep[constants.BE_MAXMEM],
1190 "minmem": bep[constants.BE_MINMEM],
1191 "vcpus": bep[constants.BE_VCPUS],
1192 "nics": _NICListToTuple(lu, instance.nics),
1193 "disk_template": instance.disk_template,
1194 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1197 "hypervisor_name": instance.hypervisor,
1198 "tags": instance.tags,
1201 args.update(override)
1202 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1205 def _AdjustCandidatePool(lu, exceptions):
1206 """Adjust the candidate pool after node operations.
1209 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1211 lu.LogInfo("Promoted nodes to master candidate role: %s",
1212 utils.CommaJoin(node.name for node in mod_list))
1213 for name in mod_list:
1214 lu.context.ReaddNode(name)
1215 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1217 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1221 def _DecideSelfPromotion(lu, exceptions=None):
1222 """Decide whether I should promote myself as a master candidate.
1225 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1226 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1227 # the new node will increase mc_max with one, so:
1228 mc_should = min(mc_should + 1, cp_size)
1229 return mc_now < mc_should
1232 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1233 """Check that the brigdes needed by a list of nics exist.
1236 cluster = lu.cfg.GetClusterInfo()
1237 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1238 brlist = [params[constants.NIC_LINK] for params in paramslist
1239 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1241 result = lu.rpc.call_bridges_exist(target_node, brlist)
1242 result.Raise("Error checking bridges on destination node '%s'" %
1243 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1246 def _CheckInstanceBridgesExist(lu, instance, node=None):
1247 """Check that the brigdes needed by an instance exist.
1251 node = instance.primary_node
1252 _CheckNicsBridgesExist(lu, instance.nics, node)
1255 def _CheckOSVariant(os_obj, name):
1256 """Check whether an OS name conforms to the os variants specification.
1258 @type os_obj: L{objects.OS}
1259 @param os_obj: OS object to check
1261 @param name: OS name passed by the user, to check for validity
1264 variant = objects.OS.GetVariant(name)
1265 if not os_obj.supported_variants:
1267 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1268 " passed)" % (os_obj.name, variant),
1272 raise errors.OpPrereqError("OS name must include a variant",
1275 if variant not in os_obj.supported_variants:
1276 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1279 def _GetNodeInstancesInner(cfg, fn):
1280 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1283 def _GetNodeInstances(cfg, node_name):
1284 """Returns a list of all primary and secondary instances on a node.
1288 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1291 def _GetNodePrimaryInstances(cfg, node_name):
1292 """Returns primary instances on a node.
1295 return _GetNodeInstancesInner(cfg,
1296 lambda inst: node_name == inst.primary_node)
1299 def _GetNodeSecondaryInstances(cfg, node_name):
1300 """Returns secondary instances on a node.
1303 return _GetNodeInstancesInner(cfg,
1304 lambda inst: node_name in inst.secondary_nodes)
1307 def _GetStorageTypeArgs(cfg, storage_type):
1308 """Returns the arguments for a storage type.
1311 # Special case for file storage
1312 if storage_type == constants.ST_FILE:
1313 # storage.FileStorage wants a list of storage directories
1314 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1319 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1322 for dev in instance.disks:
1323 cfg.SetDiskID(dev, node_name)
1325 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1326 result.Raise("Failed to get disk status from node %s" % node_name,
1327 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1329 for idx, bdev_status in enumerate(result.payload):
1330 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1336 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1337 """Check the sanity of iallocator and node arguments and use the
1338 cluster-wide iallocator if appropriate.
1340 Check that at most one of (iallocator, node) is specified. If none is
1341 specified, then the LU's opcode's iallocator slot is filled with the
1342 cluster-wide default iallocator.
1344 @type iallocator_slot: string
1345 @param iallocator_slot: the name of the opcode iallocator slot
1346 @type node_slot: string
1347 @param node_slot: the name of the opcode target node slot
1350 node = getattr(lu.op, node_slot, None)
1351 iallocator = getattr(lu.op, iallocator_slot, None)
1353 if node is not None and iallocator is not None:
1354 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1356 elif node is None and iallocator is None:
1357 default_iallocator = lu.cfg.GetDefaultIAllocator()
1358 if default_iallocator:
1359 setattr(lu.op, iallocator_slot, default_iallocator)
1361 raise errors.OpPrereqError("No iallocator or node given and no"
1362 " cluster-wide default iallocator found;"
1363 " please specify either an iallocator or a"
1364 " node, or set a cluster-wide default"
1368 def _GetDefaultIAllocator(cfg, iallocator):
1369 """Decides on which iallocator to use.
1371 @type cfg: L{config.ConfigWriter}
1372 @param cfg: Cluster configuration object
1373 @type iallocator: string or None
1374 @param iallocator: Iallocator specified in opcode
1376 @return: Iallocator name
1380 # Use default iallocator
1381 iallocator = cfg.GetDefaultIAllocator()
1384 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1385 " opcode nor as a cluster-wide default",
1391 class LUClusterPostInit(LogicalUnit):
1392 """Logical unit for running hooks after cluster initialization.
1395 HPATH = "cluster-init"
1396 HTYPE = constants.HTYPE_CLUSTER
1398 def BuildHooksEnv(self):
1403 "OP_TARGET": self.cfg.GetClusterName(),
1406 def BuildHooksNodes(self):
1407 """Build hooks nodes.
1410 return ([], [self.cfg.GetMasterNode()])
1412 def Exec(self, feedback_fn):
1419 class LUClusterDestroy(LogicalUnit):
1420 """Logical unit for destroying the cluster.
1423 HPATH = "cluster-destroy"
1424 HTYPE = constants.HTYPE_CLUSTER
1426 def BuildHooksEnv(self):
1431 "OP_TARGET": self.cfg.GetClusterName(),
1434 def BuildHooksNodes(self):
1435 """Build hooks nodes.
1440 def CheckPrereq(self):
1441 """Check prerequisites.
1443 This checks whether the cluster is empty.
1445 Any errors are signaled by raising errors.OpPrereqError.
1448 master = self.cfg.GetMasterNode()
1450 nodelist = self.cfg.GetNodeList()
1451 if len(nodelist) != 1 or nodelist[0] != master:
1452 raise errors.OpPrereqError("There are still %d node(s) in"
1453 " this cluster." % (len(nodelist) - 1),
1455 instancelist = self.cfg.GetInstanceList()
1457 raise errors.OpPrereqError("There are still %d instance(s) in"
1458 " this cluster." % len(instancelist),
1461 def Exec(self, feedback_fn):
1462 """Destroys the cluster.
1465 master_params = self.cfg.GetMasterNetworkParameters()
1467 # Run post hooks on master node before it's removed
1468 _RunPostHook(self, master_params.name)
1470 ems = self.cfg.GetUseExternalMipScript()
1471 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1473 result.Raise("Could not disable the master role")
1475 return master_params.name
1478 def _VerifyCertificate(filename):
1479 """Verifies a certificate for L{LUClusterVerifyConfig}.
1481 @type filename: string
1482 @param filename: Path to PEM file
1486 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1487 utils.ReadFile(filename))
1488 except Exception, err: # pylint: disable=W0703
1489 return (LUClusterVerifyConfig.ETYPE_ERROR,
1490 "Failed to load X509 certificate %s: %s" % (filename, err))
1493 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1494 constants.SSL_CERT_EXPIRATION_ERROR)
1497 fnamemsg = "While verifying %s: %s" % (filename, msg)
1502 return (None, fnamemsg)
1503 elif errcode == utils.CERT_WARNING:
1504 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1505 elif errcode == utils.CERT_ERROR:
1506 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1508 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1511 def _GetAllHypervisorParameters(cluster, instances):
1512 """Compute the set of all hypervisor parameters.
1514 @type cluster: L{objects.Cluster}
1515 @param cluster: the cluster object
1516 @param instances: list of L{objects.Instance}
1517 @param instances: additional instances from which to obtain parameters
1518 @rtype: list of (origin, hypervisor, parameters)
1519 @return: a list with all parameters found, indicating the hypervisor they
1520 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1525 for hv_name in cluster.enabled_hypervisors:
1526 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1528 for os_name, os_hvp in cluster.os_hvp.items():
1529 for hv_name, hv_params in os_hvp.items():
1531 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1532 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1534 # TODO: collapse identical parameter values in a single one
1535 for instance in instances:
1536 if instance.hvparams:
1537 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1538 cluster.FillHV(instance)))
1543 class _VerifyErrors(object):
1544 """Mix-in for cluster/group verify LUs.
1546 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1547 self.op and self._feedback_fn to be available.)
1551 ETYPE_FIELD = "code"
1552 ETYPE_ERROR = "ERROR"
1553 ETYPE_WARNING = "WARNING"
1555 def _Error(self, ecode, item, msg, *args, **kwargs):
1556 """Format an error message.
1558 Based on the opcode's error_codes parameter, either format a
1559 parseable error code, or a simpler error string.
1561 This must be called only from Exec and functions called from Exec.
1564 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1565 itype, etxt, _ = ecode
1566 # first complete the msg
1569 # then format the whole message
1570 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1571 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1577 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1578 # and finally report it via the feedback_fn
1579 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1581 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1582 """Log an error message if the passed condition is True.
1586 or self.op.debug_simulate_errors) # pylint: disable=E1101
1588 # If the error code is in the list of ignored errors, demote the error to a
1590 (_, etxt, _) = ecode
1591 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1592 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1595 self._Error(ecode, *args, **kwargs)
1597 # do not mark the operation as failed for WARN cases only
1598 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1599 self.bad = self.bad or cond
1602 class LUClusterVerify(NoHooksLU):
1603 """Submits all jobs necessary to verify the cluster.
1608 def ExpandNames(self):
1609 self.needed_locks = {}
1611 def Exec(self, feedback_fn):
1614 if self.op.group_name:
1615 groups = [self.op.group_name]
1616 depends_fn = lambda: None
1618 groups = self.cfg.GetNodeGroupList()
1620 # Verify global configuration
1622 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1625 # Always depend on global verification
1626 depends_fn = lambda: [(-len(jobs), [])]
1628 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1629 ignore_errors=self.op.ignore_errors,
1630 depends=depends_fn())]
1631 for group in groups)
1633 # Fix up all parameters
1634 for op in itertools.chain(*jobs): # pylint: disable=W0142
1635 op.debug_simulate_errors = self.op.debug_simulate_errors
1636 op.verbose = self.op.verbose
1637 op.error_codes = self.op.error_codes
1639 op.skip_checks = self.op.skip_checks
1640 except AttributeError:
1641 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1643 return ResultWithJobs(jobs)
1646 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1647 """Verifies the cluster config.
1652 def _VerifyHVP(self, hvp_data):
1653 """Verifies locally the syntax of the hypervisor parameters.
1656 for item, hv_name, hv_params in hvp_data:
1657 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1660 hv_class = hypervisor.GetHypervisor(hv_name)
1661 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1662 hv_class.CheckParameterSyntax(hv_params)
1663 except errors.GenericError, err:
1664 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1666 def ExpandNames(self):
1667 # Information can be safely retrieved as the BGL is acquired in exclusive
1669 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1670 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1671 self.all_node_info = self.cfg.GetAllNodesInfo()
1672 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1673 self.needed_locks = {}
1675 def Exec(self, feedback_fn):
1676 """Verify integrity of cluster, performing various test on nodes.
1680 self._feedback_fn = feedback_fn
1682 feedback_fn("* Verifying cluster config")
1684 for msg in self.cfg.VerifyConfig():
1685 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1687 feedback_fn("* Verifying cluster certificate files")
1689 for cert_filename in constants.ALL_CERT_FILES:
1690 (errcode, msg) = _VerifyCertificate(cert_filename)
1691 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1693 feedback_fn("* Verifying hypervisor parameters")
1695 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1696 self.all_inst_info.values()))
1698 feedback_fn("* Verifying all nodes belong to an existing group")
1700 # We do this verification here because, should this bogus circumstance
1701 # occur, it would never be caught by VerifyGroup, which only acts on
1702 # nodes/instances reachable from existing node groups.
1704 dangling_nodes = set(node.name for node in self.all_node_info.values()
1705 if node.group not in self.all_group_info)
1707 dangling_instances = {}
1708 no_node_instances = []
1710 for inst in self.all_inst_info.values():
1711 if inst.primary_node in dangling_nodes:
1712 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1713 elif inst.primary_node not in self.all_node_info:
1714 no_node_instances.append(inst.name)
1719 utils.CommaJoin(dangling_instances.get(node.name,
1721 for node in dangling_nodes]
1723 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1725 "the following nodes (and their instances) belong to a non"
1726 " existing group: %s", utils.CommaJoin(pretty_dangling))
1728 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1730 "the following instances have a non-existing primary-node:"
1731 " %s", utils.CommaJoin(no_node_instances))
1736 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1737 """Verifies the status of a node group.
1740 HPATH = "cluster-verify"
1741 HTYPE = constants.HTYPE_CLUSTER
1744 _HOOKS_INDENT_RE = re.compile("^", re.M)
1746 class NodeImage(object):
1747 """A class representing the logical and physical status of a node.
1750 @ivar name: the node name to which this object refers
1751 @ivar volumes: a structure as returned from
1752 L{ganeti.backend.GetVolumeList} (runtime)
1753 @ivar instances: a list of running instances (runtime)
1754 @ivar pinst: list of configured primary instances (config)
1755 @ivar sinst: list of configured secondary instances (config)
1756 @ivar sbp: dictionary of {primary-node: list of instances} for all
1757 instances for which this node is secondary (config)
1758 @ivar mfree: free memory, as reported by hypervisor (runtime)
1759 @ivar dfree: free disk, as reported by the node (runtime)
1760 @ivar offline: the offline status (config)
1761 @type rpc_fail: boolean
1762 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1763 not whether the individual keys were correct) (runtime)
1764 @type lvm_fail: boolean
1765 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1766 @type hyp_fail: boolean
1767 @ivar hyp_fail: whether the RPC call didn't return the instance list
1768 @type ghost: boolean
1769 @ivar ghost: whether this is a known node or not (config)
1770 @type os_fail: boolean
1771 @ivar os_fail: whether the RPC call didn't return valid OS data
1773 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1774 @type vm_capable: boolean
1775 @ivar vm_capable: whether the node can host instances
1778 def __init__(self, offline=False, name=None, vm_capable=True):
1787 self.offline = offline
1788 self.vm_capable = vm_capable
1789 self.rpc_fail = False
1790 self.lvm_fail = False
1791 self.hyp_fail = False
1793 self.os_fail = False
1796 def ExpandNames(self):
1797 # This raises errors.OpPrereqError on its own:
1798 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1800 # Get instances in node group; this is unsafe and needs verification later
1801 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1803 self.needed_locks = {
1804 locking.LEVEL_INSTANCE: inst_names,
1805 locking.LEVEL_NODEGROUP: [self.group_uuid],
1806 locking.LEVEL_NODE: [],
1809 self.share_locks = _ShareAll()
1811 def DeclareLocks(self, level):
1812 if level == locking.LEVEL_NODE:
1813 # Get members of node group; this is unsafe and needs verification later
1814 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1816 all_inst_info = self.cfg.GetAllInstancesInfo()
1818 # In Exec(), we warn about mirrored instances that have primary and
1819 # secondary living in separate node groups. To fully verify that
1820 # volumes for these instances are healthy, we will need to do an
1821 # extra call to their secondaries. We ensure here those nodes will
1823 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1824 # Important: access only the instances whose lock is owned
1825 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1826 nodes.update(all_inst_info[inst].secondary_nodes)
1828 self.needed_locks[locking.LEVEL_NODE] = nodes
1830 def CheckPrereq(self):
1831 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1832 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1834 group_nodes = set(self.group_info.members)
1835 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1838 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1840 unlocked_instances = \
1841 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1844 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1845 utils.CommaJoin(unlocked_nodes))
1847 if unlocked_instances:
1848 raise errors.OpPrereqError("Missing lock for instances: %s" %
1849 utils.CommaJoin(unlocked_instances))
1851 self.all_node_info = self.cfg.GetAllNodesInfo()
1852 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1854 self.my_node_names = utils.NiceSort(group_nodes)
1855 self.my_inst_names = utils.NiceSort(group_instances)
1857 self.my_node_info = dict((name, self.all_node_info[name])
1858 for name in self.my_node_names)
1860 self.my_inst_info = dict((name, self.all_inst_info[name])
1861 for name in self.my_inst_names)
1863 # We detect here the nodes that will need the extra RPC calls for verifying
1864 # split LV volumes; they should be locked.
1865 extra_lv_nodes = set()
1867 for inst in self.my_inst_info.values():
1868 if inst.disk_template in constants.DTS_INT_MIRROR:
1869 group = self.my_node_info[inst.primary_node].group
1870 for nname in inst.secondary_nodes:
1871 if self.all_node_info[nname].group != group:
1872 extra_lv_nodes.add(nname)
1874 unlocked_lv_nodes = \
1875 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1877 if unlocked_lv_nodes:
1878 raise errors.OpPrereqError("these nodes could be locked: %s" %
1879 utils.CommaJoin(unlocked_lv_nodes))
1880 self.extra_lv_nodes = list(extra_lv_nodes)
1882 def _VerifyNode(self, ninfo, nresult):
1883 """Perform some basic validation on data returned from a node.
1885 - check the result data structure is well formed and has all the
1887 - check ganeti version
1889 @type ninfo: L{objects.Node}
1890 @param ninfo: the node to check
1891 @param nresult: the results from the node
1893 @return: whether overall this call was successful (and we can expect
1894 reasonable values in the respose)
1898 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1900 # main result, nresult should be a non-empty dict
1901 test = not nresult or not isinstance(nresult, dict)
1902 _ErrorIf(test, constants.CV_ENODERPC, node,
1903 "unable to verify node: no data returned")
1907 # compares ganeti version
1908 local_version = constants.PROTOCOL_VERSION
1909 remote_version = nresult.get("version", None)
1910 test = not (remote_version and
1911 isinstance(remote_version, (list, tuple)) and
1912 len(remote_version) == 2)
1913 _ErrorIf(test, constants.CV_ENODERPC, node,
1914 "connection to node returned invalid data")
1918 test = local_version != remote_version[0]
1919 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1920 "incompatible protocol versions: master %s,"
1921 " node %s", local_version, remote_version[0])
1925 # node seems compatible, we can actually try to look into its results
1927 # full package version
1928 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1929 constants.CV_ENODEVERSION, node,
1930 "software version mismatch: master %s, node %s",
1931 constants.RELEASE_VERSION, remote_version[1],
1932 code=self.ETYPE_WARNING)
1934 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1935 if ninfo.vm_capable and isinstance(hyp_result, dict):
1936 for hv_name, hv_result in hyp_result.iteritems():
1937 test = hv_result is not None
1938 _ErrorIf(test, constants.CV_ENODEHV, node,
1939 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1941 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1942 if ninfo.vm_capable and isinstance(hvp_result, list):
1943 for item, hv_name, hv_result in hvp_result:
1944 _ErrorIf(True, constants.CV_ENODEHV, node,
1945 "hypervisor %s parameter verify failure (source %s): %s",
1946 hv_name, item, hv_result)
1948 test = nresult.get(constants.NV_NODESETUP,
1949 ["Missing NODESETUP results"])
1950 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1955 def _VerifyNodeTime(self, ninfo, nresult,
1956 nvinfo_starttime, nvinfo_endtime):
1957 """Check the node time.
1959 @type ninfo: L{objects.Node}
1960 @param ninfo: the node to check
1961 @param nresult: the remote results for the node
1962 @param nvinfo_starttime: the start time of the RPC call
1963 @param nvinfo_endtime: the end time of the RPC call
1967 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1969 ntime = nresult.get(constants.NV_TIME, None)
1971 ntime_merged = utils.MergeTime(ntime)
1972 except (ValueError, TypeError):
1973 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1976 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1977 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1978 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1979 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1983 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1984 "Node time diverges by at least %s from master node time",
1987 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1988 """Check the node LVM results.
1990 @type ninfo: L{objects.Node}
1991 @param ninfo: the node to check
1992 @param nresult: the remote results for the node
1993 @param vg_name: the configured VG name
2000 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2002 # checks vg existence and size > 20G
2003 vglist = nresult.get(constants.NV_VGLIST, None)
2005 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2007 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2008 constants.MIN_VG_SIZE)
2009 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2012 pvlist = nresult.get(constants.NV_PVLIST, None)
2013 test = pvlist is None
2014 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2016 # check that ':' is not present in PV names, since it's a
2017 # special character for lvcreate (denotes the range of PEs to
2019 for _, pvname, owner_vg in pvlist:
2020 test = ":" in pvname
2021 _ErrorIf(test, constants.CV_ENODELVM, node,
2022 "Invalid character ':' in PV '%s' of VG '%s'",
2025 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2026 """Check the node bridges.
2028 @type ninfo: L{objects.Node}
2029 @param ninfo: the node to check
2030 @param nresult: the remote results for the node
2031 @param bridges: the expected list of bridges
2038 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2040 missing = nresult.get(constants.NV_BRIDGES, None)
2041 test = not isinstance(missing, list)
2042 _ErrorIf(test, constants.CV_ENODENET, node,
2043 "did not return valid bridge information")
2045 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2046 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2048 def _VerifyNodeUserScripts(self, ninfo, nresult):
2049 """Check the results of user scripts presence and executability on the node
2051 @type ninfo: L{objects.Node}
2052 @param ninfo: the node to check
2053 @param nresult: the remote results for the node
2058 test = not constants.NV_USERSCRIPTS in nresult
2059 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2060 "did not return user scripts information")
2062 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2064 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2065 "user scripts not present or not executable: %s" %
2066 utils.CommaJoin(sorted(broken_scripts)))
2068 def _VerifyNodeNetwork(self, ninfo, nresult):
2069 """Check the node network connectivity results.
2071 @type ninfo: L{objects.Node}
2072 @param ninfo: the node to check
2073 @param nresult: the remote results for the node
2077 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2079 test = constants.NV_NODELIST not in nresult
2080 _ErrorIf(test, constants.CV_ENODESSH, node,
2081 "node hasn't returned node ssh connectivity data")
2083 if nresult[constants.NV_NODELIST]:
2084 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2085 _ErrorIf(True, constants.CV_ENODESSH, node,
2086 "ssh communication with node '%s': %s", a_node, a_msg)
2088 test = constants.NV_NODENETTEST not in nresult
2089 _ErrorIf(test, constants.CV_ENODENET, node,
2090 "node hasn't returned node tcp connectivity data")
2092 if nresult[constants.NV_NODENETTEST]:
2093 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2095 _ErrorIf(True, constants.CV_ENODENET, node,
2096 "tcp communication with node '%s': %s",
2097 anode, nresult[constants.NV_NODENETTEST][anode])
2099 test = constants.NV_MASTERIP not in nresult
2100 _ErrorIf(test, constants.CV_ENODENET, node,
2101 "node hasn't returned node master IP reachability data")
2103 if not nresult[constants.NV_MASTERIP]:
2104 if node == self.master_node:
2105 msg = "the master node cannot reach the master IP (not configured?)"
2107 msg = "cannot reach the master IP"
2108 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2110 def _VerifyInstance(self, instance, instanceconfig, node_image,
2112 """Verify an instance.
2114 This function checks to see if the required block devices are
2115 available on the instance's node.
2118 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119 node_current = instanceconfig.primary_node
2121 node_vol_should = {}
2122 instanceconfig.MapLVsByNode(node_vol_should)
2124 for node in node_vol_should:
2125 n_img = node_image[node]
2126 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2127 # ignore missing volumes on offline or broken nodes
2129 for volume in node_vol_should[node]:
2130 test = volume not in n_img.volumes
2131 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2132 "volume %s missing on node %s", volume, node)
2134 if instanceconfig.admin_state == constants.ADMINST_UP:
2135 pri_img = node_image[node_current]
2136 test = instance not in pri_img.instances and not pri_img.offline
2137 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2138 "instance not running on its primary node %s",
2141 diskdata = [(nname, success, status, idx)
2142 for (nname, disks) in diskstatus.items()
2143 for idx, (success, status) in enumerate(disks)]
2145 for nname, success, bdev_status, idx in diskdata:
2146 # the 'ghost node' construction in Exec() ensures that we have a
2148 snode = node_image[nname]
2149 bad_snode = snode.ghost or snode.offline
2150 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2151 not success and not bad_snode,
2152 constants.CV_EINSTANCEFAULTYDISK, instance,
2153 "couldn't retrieve status for disk/%s on %s: %s",
2154 idx, nname, bdev_status)
2155 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2156 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2157 constants.CV_EINSTANCEFAULTYDISK, instance,
2158 "disk/%s on %s is faulty", idx, nname)
2160 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2161 """Verify if there are any unknown volumes in the cluster.
2163 The .os, .swap and backup volumes are ignored. All other volumes are
2164 reported as unknown.
2166 @type reserved: L{ganeti.utils.FieldSet}
2167 @param reserved: a FieldSet of reserved volume names
2170 for node, n_img in node_image.items():
2171 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2172 # skip non-healthy nodes
2174 for volume in n_img.volumes:
2175 test = ((node not in node_vol_should or
2176 volume not in node_vol_should[node]) and
2177 not reserved.Matches(volume))
2178 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2179 "volume %s is unknown", volume)
2181 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2182 """Verify N+1 Memory Resilience.
2184 Check that if one single node dies we can still start all the
2185 instances it was primary for.
2188 cluster_info = self.cfg.GetClusterInfo()
2189 for node, n_img in node_image.items():
2190 # This code checks that every node which is now listed as
2191 # secondary has enough memory to host all instances it is
2192 # supposed to should a single other node in the cluster fail.
2193 # FIXME: not ready for failover to an arbitrary node
2194 # FIXME: does not support file-backed instances
2195 # WARNING: we currently take into account down instances as well
2196 # as up ones, considering that even if they're down someone
2197 # might want to start them even in the event of a node failure.
2199 # we're skipping offline nodes from the N+1 warning, since
2200 # most likely we don't have good memory infromation from them;
2201 # we already list instances living on such nodes, and that's
2204 #TODO(dynmem): use MINMEM for checking
2205 #TODO(dynmem): also consider ballooning out other instances
2206 for prinode, instances in n_img.sbp.items():
2208 for instance in instances:
2209 bep = cluster_info.FillBE(instance_cfg[instance])
2210 if bep[constants.BE_AUTO_BALANCE]:
2211 needed_mem += bep[constants.BE_MAXMEM]
2212 test = n_img.mfree < needed_mem
2213 self._ErrorIf(test, constants.CV_ENODEN1, node,
2214 "not enough memory to accomodate instance failovers"
2215 " should node %s fail (%dMiB needed, %dMiB available)",
2216 prinode, needed_mem, n_img.mfree)
2219 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2220 (files_all, files_opt, files_mc, files_vm)):
2221 """Verifies file checksums collected from all nodes.
2223 @param errorif: Callback for reporting errors
2224 @param nodeinfo: List of L{objects.Node} objects
2225 @param master_node: Name of master node
2226 @param all_nvinfo: RPC results
2229 # Define functions determining which nodes to consider for a file
2232 (files_mc, lambda node: (node.master_candidate or
2233 node.name == master_node)),
2234 (files_vm, lambda node: node.vm_capable),
2237 # Build mapping from filename to list of nodes which should have the file
2239 for (files, fn) in files2nodefn:
2241 filenodes = nodeinfo
2243 filenodes = filter(fn, nodeinfo)
2244 nodefiles.update((filename,
2245 frozenset(map(operator.attrgetter("name"), filenodes)))
2246 for filename in files)
2248 assert set(nodefiles) == (files_all | files_mc | files_vm)
2250 fileinfo = dict((filename, {}) for filename in nodefiles)
2251 ignore_nodes = set()
2253 for node in nodeinfo:
2255 ignore_nodes.add(node.name)
2258 nresult = all_nvinfo[node.name]
2260 if nresult.fail_msg or not nresult.payload:
2263 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2265 test = not (node_files and isinstance(node_files, dict))
2266 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2267 "Node did not return file checksum data")
2269 ignore_nodes.add(node.name)
2272 # Build per-checksum mapping from filename to nodes having it
2273 for (filename, checksum) in node_files.items():
2274 assert filename in nodefiles
2275 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2277 for (filename, checksums) in fileinfo.items():
2278 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2280 # Nodes having the file
2281 with_file = frozenset(node_name
2282 for nodes in fileinfo[filename].values()
2283 for node_name in nodes) - ignore_nodes
2285 expected_nodes = nodefiles[filename] - ignore_nodes
2287 # Nodes missing file
2288 missing_file = expected_nodes - with_file
2290 if filename in files_opt:
2292 errorif(missing_file and missing_file != expected_nodes,
2293 constants.CV_ECLUSTERFILECHECK, None,
2294 "File %s is optional, but it must exist on all or no"
2295 " nodes (not found on %s)",
2296 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2298 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2299 "File %s is missing from node(s) %s", filename,
2300 utils.CommaJoin(utils.NiceSort(missing_file)))
2302 # Warn if a node has a file it shouldn't
2303 unexpected = with_file - expected_nodes
2305 constants.CV_ECLUSTERFILECHECK, None,
2306 "File %s should not exist on node(s) %s",
2307 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2309 # See if there are multiple versions of the file
2310 test = len(checksums) > 1
2312 variants = ["variant %s on %s" %
2313 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2314 for (idx, (checksum, nodes)) in
2315 enumerate(sorted(checksums.items()))]
2319 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2320 "File %s found with %s different checksums (%s)",
2321 filename, len(checksums), "; ".join(variants))
2323 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2325 """Verifies and the node DRBD status.
2327 @type ninfo: L{objects.Node}
2328 @param ninfo: the node to check
2329 @param nresult: the remote results for the node
2330 @param instanceinfo: the dict of instances
2331 @param drbd_helper: the configured DRBD usermode helper
2332 @param drbd_map: the DRBD map as returned by
2333 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2337 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2341 test = (helper_result == None)
2342 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2343 "no drbd usermode helper returned")
2345 status, payload = helper_result
2347 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2348 "drbd usermode helper check unsuccessful: %s", payload)
2349 test = status and (payload != drbd_helper)
2350 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2351 "wrong drbd usermode helper: %s", payload)
2353 # compute the DRBD minors
2355 for minor, instance in drbd_map[node].items():
2356 test = instance not in instanceinfo
2357 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2358 "ghost instance '%s' in temporary DRBD map", instance)
2359 # ghost instance should not be running, but otherwise we
2360 # don't give double warnings (both ghost instance and
2361 # unallocated minor in use)
2363 node_drbd[minor] = (instance, False)
2365 instance = instanceinfo[instance]
2366 node_drbd[minor] = (instance.name,
2367 instance.admin_state == constants.ADMINST_UP)
2369 # and now check them
2370 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2371 test = not isinstance(used_minors, (tuple, list))
2372 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2373 "cannot parse drbd status file: %s", str(used_minors))
2375 # we cannot check drbd status
2378 for minor, (iname, must_exist) in node_drbd.items():
2379 test = minor not in used_minors and must_exist
2380 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2381 "drbd minor %d of instance %s is not active", minor, iname)
2382 for minor in used_minors:
2383 test = minor not in node_drbd
2384 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2385 "unallocated drbd minor %d is in use", minor)
2387 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2388 """Builds the node OS structures.
2390 @type ninfo: L{objects.Node}
2391 @param ninfo: the node to check
2392 @param nresult: the remote results for the node
2393 @param nimg: the node image object
2397 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2399 remote_os = nresult.get(constants.NV_OSLIST, None)
2400 test = (not isinstance(remote_os, list) or
2401 not compat.all(isinstance(v, list) and len(v) == 7
2402 for v in remote_os))
2404 _ErrorIf(test, constants.CV_ENODEOS, node,
2405 "node hasn't returned valid OS data")
2414 for (name, os_path, status, diagnose,
2415 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2417 if name not in os_dict:
2420 # parameters is a list of lists instead of list of tuples due to
2421 # JSON lacking a real tuple type, fix it:
2422 parameters = [tuple(v) for v in parameters]
2423 os_dict[name].append((os_path, status, diagnose,
2424 set(variants), set(parameters), set(api_ver)))
2426 nimg.oslist = os_dict
2428 def _VerifyNodeOS(self, ninfo, nimg, base):
2429 """Verifies the node OS list.
2431 @type ninfo: L{objects.Node}
2432 @param ninfo: the node to check
2433 @param nimg: the node image object
2434 @param base: the 'template' node we match against (e.g. from the master)
2438 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2440 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2442 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2443 for os_name, os_data in nimg.oslist.items():
2444 assert os_data, "Empty OS status for OS %s?!" % os_name
2445 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2446 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2447 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2448 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2449 "OS '%s' has multiple entries (first one shadows the rest): %s",
2450 os_name, utils.CommaJoin([v[0] for v in os_data]))
2451 # comparisons with the 'base' image
2452 test = os_name not in base.oslist
2453 _ErrorIf(test, constants.CV_ENODEOS, node,
2454 "Extra OS %s not present on reference node (%s)",
2458 assert base.oslist[os_name], "Base node has empty OS status?"
2459 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2461 # base OS is invalid, skipping
2463 for kind, a, b in [("API version", f_api, b_api),
2464 ("variants list", f_var, b_var),
2465 ("parameters", beautify_params(f_param),
2466 beautify_params(b_param))]:
2467 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2468 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2469 kind, os_name, base.name,
2470 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2472 # check any missing OSes
2473 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2474 _ErrorIf(missing, constants.CV_ENODEOS, node,
2475 "OSes present on reference node %s but missing on this node: %s",
2476 base.name, utils.CommaJoin(missing))
2478 def _VerifyOob(self, ninfo, nresult):
2479 """Verifies out of band functionality of a node.
2481 @type ninfo: L{objects.Node}
2482 @param ninfo: the node to check
2483 @param nresult: the remote results for the node
2487 # We just have to verify the paths on master and/or master candidates
2488 # as the oob helper is invoked on the master
2489 if ((ninfo.master_candidate or ninfo.master_capable) and
2490 constants.NV_OOB_PATHS in nresult):
2491 for path_result in nresult[constants.NV_OOB_PATHS]:
2492 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2494 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2495 """Verifies and updates the node volume data.
2497 This function will update a L{NodeImage}'s internal structures
2498 with data from the remote call.
2500 @type ninfo: L{objects.Node}
2501 @param ninfo: the node to check
2502 @param nresult: the remote results for the node
2503 @param nimg: the node image object
2504 @param vg_name: the configured VG name
2508 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2510 nimg.lvm_fail = True
2511 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2514 elif isinstance(lvdata, basestring):
2515 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2516 utils.SafeEncode(lvdata))
2517 elif not isinstance(lvdata, dict):
2518 _ErrorIf(True, constants.CV_ENODELVM, node,
2519 "rpc call to node failed (lvlist)")
2521 nimg.volumes = lvdata
2522 nimg.lvm_fail = False
2524 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2525 """Verifies and updates the node instance list.
2527 If the listing was successful, then updates this node's instance
2528 list. Otherwise, it marks the RPC call as failed for the instance
2531 @type ninfo: L{objects.Node}
2532 @param ninfo: the node to check
2533 @param nresult: the remote results for the node
2534 @param nimg: the node image object
2537 idata = nresult.get(constants.NV_INSTANCELIST, None)
2538 test = not isinstance(idata, list)
2539 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2540 "rpc call to node failed (instancelist): %s",
2541 utils.SafeEncode(str(idata)))
2543 nimg.hyp_fail = True
2545 nimg.instances = idata
2547 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2548 """Verifies and computes a node information map
2550 @type ninfo: L{objects.Node}
2551 @param ninfo: the node to check
2552 @param nresult: the remote results for the node
2553 @param nimg: the node image object
2554 @param vg_name: the configured VG name
2558 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2560 # try to read free memory (from the hypervisor)
2561 hv_info = nresult.get(constants.NV_HVINFO, None)
2562 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2563 _ErrorIf(test, constants.CV_ENODEHV, node,
2564 "rpc call to node failed (hvinfo)")
2567 nimg.mfree = int(hv_info["memory_free"])
2568 except (ValueError, TypeError):
2569 _ErrorIf(True, constants.CV_ENODERPC, node,
2570 "node returned invalid nodeinfo, check hypervisor")
2572 # FIXME: devise a free space model for file based instances as well
2573 if vg_name is not None:
2574 test = (constants.NV_VGLIST not in nresult or
2575 vg_name not in nresult[constants.NV_VGLIST])
2576 _ErrorIf(test, constants.CV_ENODELVM, node,
2577 "node didn't return data for the volume group '%s'"
2578 " - it is either missing or broken", vg_name)
2581 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2582 except (ValueError, TypeError):
2583 _ErrorIf(True, constants.CV_ENODERPC, node,
2584 "node returned invalid LVM info, check LVM status")
2586 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2587 """Gets per-disk status information for all instances.
2589 @type nodelist: list of strings
2590 @param nodelist: Node names
2591 @type node_image: dict of (name, L{objects.Node})
2592 @param node_image: Node objects
2593 @type instanceinfo: dict of (name, L{objects.Instance})
2594 @param instanceinfo: Instance objects
2595 @rtype: {instance: {node: [(succes, payload)]}}
2596 @return: a dictionary of per-instance dictionaries with nodes as
2597 keys and disk information as values; the disk information is a
2598 list of tuples (success, payload)
2601 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2604 node_disks_devonly = {}
2605 diskless_instances = set()
2606 diskless = constants.DT_DISKLESS
2608 for nname in nodelist:
2609 node_instances = list(itertools.chain(node_image[nname].pinst,
2610 node_image[nname].sinst))
2611 diskless_instances.update(inst for inst in node_instances
2612 if instanceinfo[inst].disk_template == diskless)
2613 disks = [(inst, disk)
2614 for inst in node_instances
2615 for disk in instanceinfo[inst].disks]
2618 # No need to collect data
2621 node_disks[nname] = disks
2623 # Creating copies as SetDiskID below will modify the objects and that can
2624 # lead to incorrect data returned from nodes
2625 devonly = [dev.Copy() for (_, dev) in disks]
2628 self.cfg.SetDiskID(dev, nname)
2630 node_disks_devonly[nname] = devonly
2632 assert len(node_disks) == len(node_disks_devonly)
2634 # Collect data from all nodes with disks
2635 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2638 assert len(result) == len(node_disks)
2642 for (nname, nres) in result.items():
2643 disks = node_disks[nname]
2646 # No data from this node
2647 data = len(disks) * [(False, "node offline")]
2650 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2651 "while getting disk information: %s", msg)
2653 # No data from this node
2654 data = len(disks) * [(False, msg)]
2657 for idx, i in enumerate(nres.payload):
2658 if isinstance(i, (tuple, list)) and len(i) == 2:
2661 logging.warning("Invalid result from node %s, entry %d: %s",
2663 data.append((False, "Invalid result from the remote node"))
2665 for ((inst, _), status) in zip(disks, data):
2666 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2668 # Add empty entries for diskless instances.
2669 for inst in diskless_instances:
2670 assert inst not in instdisk
2673 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2674 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2675 compat.all(isinstance(s, (tuple, list)) and
2676 len(s) == 2 for s in statuses)
2677 for inst, nnames in instdisk.items()
2678 for nname, statuses in nnames.items())
2679 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2684 def _SshNodeSelector(group_uuid, all_nodes):
2685 """Create endless iterators for all potential SSH check hosts.
2688 nodes = [node for node in all_nodes
2689 if (node.group != group_uuid and
2691 keyfunc = operator.attrgetter("group")
2693 return map(itertools.cycle,
2694 [sorted(map(operator.attrgetter("name"), names))
2695 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2699 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2700 """Choose which nodes should talk to which other nodes.
2702 We will make nodes contact all nodes in their group, and one node from
2705 @warning: This algorithm has a known issue if one node group is much
2706 smaller than others (e.g. just one node). In such a case all other
2707 nodes will talk to the single node.
2710 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2711 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2713 return (online_nodes,
2714 dict((name, sorted([i.next() for i in sel]))
2715 for name in online_nodes))
2717 def BuildHooksEnv(self):
2720 Cluster-Verify hooks just ran in the post phase and their failure makes
2721 the output be logged in the verify output and the verification to fail.
2725 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2728 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2729 for node in self.my_node_info.values())
2733 def BuildHooksNodes(self):
2734 """Build hooks nodes.
2737 return ([], self.my_node_names)
2739 def Exec(self, feedback_fn):
2740 """Verify integrity of the node group, performing various test on nodes.
2743 # This method has too many local variables. pylint: disable=R0914
2744 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2746 if not self.my_node_names:
2748 feedback_fn("* Empty node group, skipping verification")
2752 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753 verbose = self.op.verbose
2754 self._feedback_fn = feedback_fn
2756 vg_name = self.cfg.GetVGName()
2757 drbd_helper = self.cfg.GetDRBDHelper()
2758 cluster = self.cfg.GetClusterInfo()
2759 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2760 hypervisors = cluster.enabled_hypervisors
2761 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2763 i_non_redundant = [] # Non redundant instances
2764 i_non_a_balanced = [] # Non auto-balanced instances
2765 i_offline = 0 # Count of offline instances
2766 n_offline = 0 # Count of offline nodes
2767 n_drained = 0 # Count of nodes being drained
2768 node_vol_should = {}
2770 # FIXME: verify OS list
2773 filemap = _ComputeAncillaryFiles(cluster, False)
2775 # do local checksums
2776 master_node = self.master_node = self.cfg.GetMasterNode()
2777 master_ip = self.cfg.GetMasterIP()
2779 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2782 if self.cfg.GetUseExternalMipScript():
2783 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2785 node_verify_param = {
2786 constants.NV_FILELIST:
2787 utils.UniqueSequence(filename
2788 for files in filemap
2789 for filename in files),
2790 constants.NV_NODELIST:
2791 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2792 self.all_node_info.values()),
2793 constants.NV_HYPERVISOR: hypervisors,
2794 constants.NV_HVPARAMS:
2795 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2796 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2797 for node in node_data_list
2798 if not node.offline],
2799 constants.NV_INSTANCELIST: hypervisors,
2800 constants.NV_VERSION: None,
2801 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2802 constants.NV_NODESETUP: None,
2803 constants.NV_TIME: None,
2804 constants.NV_MASTERIP: (master_node, master_ip),
2805 constants.NV_OSLIST: None,
2806 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2807 constants.NV_USERSCRIPTS: user_scripts,
2810 if vg_name is not None:
2811 node_verify_param[constants.NV_VGLIST] = None
2812 node_verify_param[constants.NV_LVLIST] = vg_name
2813 node_verify_param[constants.NV_PVLIST] = [vg_name]
2814 node_verify_param[constants.NV_DRBDLIST] = None
2817 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2820 # FIXME: this needs to be changed per node-group, not cluster-wide
2822 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2823 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2824 bridges.add(default_nicpp[constants.NIC_LINK])
2825 for instance in self.my_inst_info.values():
2826 for nic in instance.nics:
2827 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2828 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2829 bridges.add(full_nic[constants.NIC_LINK])
2832 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2834 # Build our expected cluster state
2835 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2837 vm_capable=node.vm_capable))
2838 for node in node_data_list)
2842 for node in self.all_node_info.values():
2843 path = _SupportsOob(self.cfg, node)
2844 if path and path not in oob_paths:
2845 oob_paths.append(path)
2848 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2850 for instance in self.my_inst_names:
2851 inst_config = self.my_inst_info[instance]
2853 for nname in inst_config.all_nodes:
2854 if nname not in node_image:
2855 gnode = self.NodeImage(name=nname)
2856 gnode.ghost = (nname not in self.all_node_info)
2857 node_image[nname] = gnode
2859 inst_config.MapLVsByNode(node_vol_should)
2861 pnode = inst_config.primary_node
2862 node_image[pnode].pinst.append(instance)
2864 for snode in inst_config.secondary_nodes:
2865 nimg = node_image[snode]
2866 nimg.sinst.append(instance)
2867 if pnode not in nimg.sbp:
2868 nimg.sbp[pnode] = []
2869 nimg.sbp[pnode].append(instance)
2871 # At this point, we have the in-memory data structures complete,
2872 # except for the runtime information, which we'll gather next
2874 # Due to the way our RPC system works, exact response times cannot be
2875 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2876 # time before and after executing the request, we can at least have a time
2878 nvinfo_starttime = time.time()
2879 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2881 self.cfg.GetClusterName())
2882 nvinfo_endtime = time.time()
2884 if self.extra_lv_nodes and vg_name is not None:
2886 self.rpc.call_node_verify(self.extra_lv_nodes,
2887 {constants.NV_LVLIST: vg_name},
2888 self.cfg.GetClusterName())
2890 extra_lv_nvinfo = {}
2892 all_drbd_map = self.cfg.ComputeDRBDMap()
2894 feedback_fn("* Gathering disk information (%s nodes)" %
2895 len(self.my_node_names))
2896 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2899 feedback_fn("* Verifying configuration file consistency")
2901 # If not all nodes are being checked, we need to make sure the master node
2902 # and a non-checked vm_capable node are in the list.
2903 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2905 vf_nvinfo = all_nvinfo.copy()
2906 vf_node_info = list(self.my_node_info.values())
2907 additional_nodes = []
2908 if master_node not in self.my_node_info:
2909 additional_nodes.append(master_node)
2910 vf_node_info.append(self.all_node_info[master_node])
2911 # Add the first vm_capable node we find which is not included
2912 for node in absent_nodes:
2913 nodeinfo = self.all_node_info[node]
2914 if nodeinfo.vm_capable and not nodeinfo.offline:
2915 additional_nodes.append(node)
2916 vf_node_info.append(self.all_node_info[node])
2918 key = constants.NV_FILELIST
2919 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2920 {key: node_verify_param[key]},
2921 self.cfg.GetClusterName()))
2923 vf_nvinfo = all_nvinfo
2924 vf_node_info = self.my_node_info.values()
2926 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2928 feedback_fn("* Verifying node status")
2932 for node_i in node_data_list:
2934 nimg = node_image[node]
2938 feedback_fn("* Skipping offline node %s" % (node,))
2942 if node == master_node:
2944 elif node_i.master_candidate:
2945 ntype = "master candidate"
2946 elif node_i.drained:
2952 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2954 msg = all_nvinfo[node].fail_msg
2955 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2958 nimg.rpc_fail = True
2961 nresult = all_nvinfo[node].payload
2963 nimg.call_ok = self._VerifyNode(node_i, nresult)
2964 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2965 self._VerifyNodeNetwork(node_i, nresult)
2966 self._VerifyNodeUserScripts(node_i, nresult)
2967 self._VerifyOob(node_i, nresult)
2970 self._VerifyNodeLVM(node_i, nresult, vg_name)
2971 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2974 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2975 self._UpdateNodeInstances(node_i, nresult, nimg)
2976 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2977 self._UpdateNodeOS(node_i, nresult, nimg)
2979 if not nimg.os_fail:
2980 if refos_img is None:
2982 self._VerifyNodeOS(node_i, nimg, refos_img)
2983 self._VerifyNodeBridges(node_i, nresult, bridges)
2985 # Check whether all running instancies are primary for the node. (This
2986 # can no longer be done from _VerifyInstance below, since some of the
2987 # wrong instances could be from other node groups.)
2988 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2990 for inst in non_primary_inst:
2991 # FIXME: investigate best way to handle offline insts
2992 if inst.admin_state == constants.ADMINST_OFFLINE:
2994 feedback_fn("* Skipping offline instance %s" % inst.name)
2997 test = inst in self.all_inst_info
2998 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2999 "instance should not run on node %s", node_i.name)
3000 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3001 "node is running unknown instance %s", inst)
3003 for node, result in extra_lv_nvinfo.items():
3004 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3005 node_image[node], vg_name)
3007 feedback_fn("* Verifying instance status")
3008 for instance in self.my_inst_names:
3010 feedback_fn("* Verifying instance %s" % instance)
3011 inst_config = self.my_inst_info[instance]
3012 self._VerifyInstance(instance, inst_config, node_image,
3014 inst_nodes_offline = []
3016 pnode = inst_config.primary_node
3017 pnode_img = node_image[pnode]
3018 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3019 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3020 " primary node failed", instance)
3022 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3024 constants.CV_EINSTANCEBADNODE, instance,
3025 "instance is marked as running and lives on offline node %s",
3026 inst_config.primary_node)
3028 # If the instance is non-redundant we cannot survive losing its primary
3029 # node, so we are not N+1 compliant. On the other hand we have no disk
3030 # templates with more than one secondary so that situation is not well
3032 # FIXME: does not support file-backed instances
3033 if not inst_config.secondary_nodes:
3034 i_non_redundant.append(instance)
3036 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3037 constants.CV_EINSTANCELAYOUT,
3038 instance, "instance has multiple secondary nodes: %s",
3039 utils.CommaJoin(inst_config.secondary_nodes),
3040 code=self.ETYPE_WARNING)
3042 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3043 pnode = inst_config.primary_node
3044 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3045 instance_groups = {}
3047 for node in instance_nodes:
3048 instance_groups.setdefault(self.all_node_info[node].group,
3052 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3053 # Sort so that we always list the primary node first.
3054 for group, nodes in sorted(instance_groups.items(),
3055 key=lambda (_, nodes): pnode in nodes,
3058 self._ErrorIf(len(instance_groups) > 1,
3059 constants.CV_EINSTANCESPLITGROUPS,
3060 instance, "instance has primary and secondary nodes in"
3061 " different groups: %s", utils.CommaJoin(pretty_list),
3062 code=self.ETYPE_WARNING)
3064 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3065 i_non_a_balanced.append(instance)
3067 for snode in inst_config.secondary_nodes:
3068 s_img = node_image[snode]
3069 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3070 snode, "instance %s, connection to secondary node failed",
3074 inst_nodes_offline.append(snode)
3076 # warn that the instance lives on offline nodes
3077 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3078 "instance has offline secondary node(s) %s",
3079 utils.CommaJoin(inst_nodes_offline))
3080 # ... or ghost/non-vm_capable nodes
3081 for node in inst_config.all_nodes:
3082 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3083 instance, "instance lives on ghost node %s", node)
3084 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3085 instance, "instance lives on non-vm_capable node %s", node)
3087 feedback_fn("* Verifying orphan volumes")
3088 reserved = utils.FieldSet(*cluster.reserved_lvs)
3090 # We will get spurious "unknown volume" warnings if any node of this group
3091 # is secondary for an instance whose primary is in another group. To avoid
3092 # them, we find these instances and add their volumes to node_vol_should.
3093 for inst in self.all_inst_info.values():
3094 for secondary in inst.secondary_nodes:
3095 if (secondary in self.my_node_info
3096 and inst.name not in self.my_inst_info):
3097 inst.MapLVsByNode(node_vol_should)
3100 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3102 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3103 feedback_fn("* Verifying N+1 Memory redundancy")
3104 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3106 feedback_fn("* Other Notes")
3108 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3109 % len(i_non_redundant))
3111 if i_non_a_balanced:
3112 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3113 % len(i_non_a_balanced))
3116 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3119 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3122 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3126 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3127 """Analyze the post-hooks' result
3129 This method analyses the hook result, handles it, and sends some
3130 nicely-formatted feedback back to the user.
3132 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3133 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3134 @param hooks_results: the results of the multi-node hooks rpc call
3135 @param feedback_fn: function used send feedback back to the caller
3136 @param lu_result: previous Exec result
3137 @return: the new Exec result, based on the previous result
3141 # We only really run POST phase hooks, only for non-empty groups,
3142 # and are only interested in their results
3143 if not self.my_node_names:
3146 elif phase == constants.HOOKS_PHASE_POST:
3147 # Used to change hooks' output to proper indentation
3148 feedback_fn("* Hooks Results")
3149 assert hooks_results, "invalid result from hooks"
3151 for node_name in hooks_results:
3152 res = hooks_results[node_name]
3154 test = msg and not res.offline
3155 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3156 "Communication failure in hooks execution: %s", msg)
3157 if res.offline or msg:
3158 # No need to investigate payload if node is offline or gave
3161 for script, hkr, output in res.payload:
3162 test = hkr == constants.HKR_FAIL
3163 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3164 "Script %s failed, output:", script)
3166 output = self._HOOKS_INDENT_RE.sub(" ", output)
3167 feedback_fn("%s" % output)
3173 class LUClusterVerifyDisks(NoHooksLU):
3174 """Verifies the cluster disks status.
3179 def ExpandNames(self):
3180 self.share_locks = _ShareAll()
3181 self.needed_locks = {
3182 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3185 def Exec(self, feedback_fn):
3186 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3188 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3189 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3190 for group in group_names])
3193 class LUGroupVerifyDisks(NoHooksLU):
3194 """Verifies the status of all disks in a node group.
3199 def ExpandNames(self):
3200 # Raises errors.OpPrereqError on its own if group can't be found
3201 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3203 self.share_locks = _ShareAll()
3204 self.needed_locks = {
3205 locking.LEVEL_INSTANCE: [],
3206 locking.LEVEL_NODEGROUP: [],
3207 locking.LEVEL_NODE: [],
3210 def DeclareLocks(self, level):
3211 if level == locking.LEVEL_INSTANCE:
3212 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3214 # Lock instances optimistically, needs verification once node and group
3215 # locks have been acquired
3216 self.needed_locks[locking.LEVEL_INSTANCE] = \
3217 self.cfg.GetNodeGroupInstances(self.group_uuid)
3219 elif level == locking.LEVEL_NODEGROUP:
3220 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3222 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3223 set([self.group_uuid] +
3224 # Lock all groups used by instances optimistically; this requires
3225 # going via the node before it's locked, requiring verification
3228 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3229 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3231 elif level == locking.LEVEL_NODE:
3232 # This will only lock the nodes in the group to be verified which contain
3234 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3235 self._LockInstancesNodes()
3237 # Lock all nodes in group to be verified
3238 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3239 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3240 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3242 def CheckPrereq(self):
3243 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3244 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3245 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3247 assert self.group_uuid in owned_groups
3249 # Check if locked instances are still correct
3250 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3252 # Get instance information
3253 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3255 # Check if node groups for locked instances are still correct
3256 for (instance_name, inst) in self.instances.items():
3257 assert owned_nodes.issuperset(inst.all_nodes), \
3258 "Instance %s's nodes changed while we kept the lock" % instance_name
3260 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3263 assert self.group_uuid in inst_groups, \
3264 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3266 def Exec(self, feedback_fn):
3267 """Verify integrity of cluster disks.
3269 @rtype: tuple of three items
3270 @return: a tuple of (dict of node-to-node_error, list of instances
3271 which need activate-disks, dict of instance: (node, volume) for
3276 res_instances = set()
3279 nv_dict = _MapInstanceDisksToNodes([inst
3280 for inst in self.instances.values()
3281 if inst.admin_state == constants.ADMINST_UP])
3284 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3285 set(self.cfg.GetVmCapableNodeList()))
3287 node_lvs = self.rpc.call_lv_list(nodes, [])
3289 for (node, node_res) in node_lvs.items():
3290 if node_res.offline:
3293 msg = node_res.fail_msg
3295 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3296 res_nodes[node] = msg
3299 for lv_name, (_, _, lv_online) in node_res.payload.items():
3300 inst = nv_dict.pop((node, lv_name), None)
3301 if not (lv_online or inst is None):
3302 res_instances.add(inst)
3304 # any leftover items in nv_dict are missing LVs, let's arrange the data
3306 for key, inst in nv_dict.iteritems():
3307 res_missing.setdefault(inst, []).append(list(key))
3309 return (res_nodes, list(res_instances), res_missing)
3312 class LUClusterRepairDiskSizes(NoHooksLU):
3313 """Verifies the cluster disks sizes.
3318 def ExpandNames(self):
3319 if self.op.instances:
3320 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3321 self.needed_locks = {
3322 locking.LEVEL_NODE_RES: [],
3323 locking.LEVEL_INSTANCE: self.wanted_names,
3325 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3327 self.wanted_names = None
3328 self.needed_locks = {
3329 locking.LEVEL_NODE_RES: locking.ALL_SET,
3330 locking.LEVEL_INSTANCE: locking.ALL_SET,
3332 self.share_locks = {
3333 locking.LEVEL_NODE_RES: 1,
3334 locking.LEVEL_INSTANCE: 0,
3337 def DeclareLocks(self, level):
3338 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3339 self._LockInstancesNodes(primary_only=True, level=level)
3341 def CheckPrereq(self):
3342 """Check prerequisites.
3344 This only checks the optional instance list against the existing names.
3347 if self.wanted_names is None:
3348 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3350 self.wanted_instances = \
3351 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3353 def _EnsureChildSizes(self, disk):
3354 """Ensure children of the disk have the needed disk size.
3356 This is valid mainly for DRBD8 and fixes an issue where the
3357 children have smaller disk size.
3359 @param disk: an L{ganeti.objects.Disk} object
3362 if disk.dev_type == constants.LD_DRBD8:
3363 assert disk.children, "Empty children for DRBD8?"
3364 fchild = disk.children[0]
3365 mismatch = fchild.size < disk.size
3367 self.LogInfo("Child disk has size %d, parent %d, fixing",
3368 fchild.size, disk.size)
3369 fchild.size = disk.size
3371 # and we recurse on this child only, not on the metadev
3372 return self._EnsureChildSizes(fchild) or mismatch
3376 def Exec(self, feedback_fn):
3377 """Verify the size of cluster disks.
3380 # TODO: check child disks too
3381 # TODO: check differences in size between primary/secondary nodes
3383 for instance in self.wanted_instances:
3384 pnode = instance.primary_node
3385 if pnode not in per_node_disks:
3386 per_node_disks[pnode] = []
3387 for idx, disk in enumerate(instance.disks):
3388 per_node_disks[pnode].append((instance, idx, disk))
3390 assert not (frozenset(per_node_disks.keys()) -
3391 self.owned_locks(locking.LEVEL_NODE_RES)), \
3392 "Not owning correct locks"
3393 assert not self.owned_locks(locking.LEVEL_NODE)
3396 for node, dskl in per_node_disks.items():
3397 newl = [v[2].Copy() for v in dskl]
3399 self.cfg.SetDiskID(dsk, node)
3400 result = self.rpc.call_blockdev_getsize(node, newl)
3402 self.LogWarning("Failure in blockdev_getsize call to node"
3403 " %s, ignoring", node)
3405 if len(result.payload) != len(dskl):
3406 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3407 " result.payload=%s", node, len(dskl), result.payload)
3408 self.LogWarning("Invalid result from node %s, ignoring node results",
3411 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3413 self.LogWarning("Disk %d of instance %s did not return size"
3414 " information, ignoring", idx, instance.name)
3416 if not isinstance(size, (int, long)):
3417 self.LogWarning("Disk %d of instance %s did not return valid"
3418 " size information, ignoring", idx, instance.name)
3421 if size != disk.size:
3422 self.LogInfo("Disk %d of instance %s has mismatched size,"
3423 " correcting: recorded %d, actual %d", idx,
3424 instance.name, disk.size, size)
3426 self.cfg.Update(instance, feedback_fn)
3427 changed.append((instance.name, idx, size))
3428 if self._EnsureChildSizes(disk):
3429 self.cfg.Update(instance, feedback_fn)
3430 changed.append((instance.name, idx, disk.size))
3434 class LUClusterRename(LogicalUnit):
3435 """Rename the cluster.
3438 HPATH = "cluster-rename"
3439 HTYPE = constants.HTYPE_CLUSTER
3441 def BuildHooksEnv(self):
3446 "OP_TARGET": self.cfg.GetClusterName(),
3447 "NEW_NAME": self.op.name,
3450 def BuildHooksNodes(self):
3451 """Build hooks nodes.
3454 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3456 def CheckPrereq(self):
3457 """Verify that the passed name is a valid one.
3460 hostname = netutils.GetHostname(name=self.op.name,
3461 family=self.cfg.GetPrimaryIPFamily())
3463 new_name = hostname.name
3464 self.ip = new_ip = hostname.ip
3465 old_name = self.cfg.GetClusterName()
3466 old_ip = self.cfg.GetMasterIP()
3467 if new_name == old_name and new_ip == old_ip:
3468 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3469 " cluster has changed",
3471 if new_ip != old_ip:
3472 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3473 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3474 " reachable on the network" %
3475 new_ip, errors.ECODE_NOTUNIQUE)
3477 self.op.name = new_name
3479 def Exec(self, feedback_fn):
3480 """Rename the cluster.
3483 clustername = self.op.name
3486 # shutdown the master IP
3487 master_params = self.cfg.GetMasterNetworkParameters()
3488 ems = self.cfg.GetUseExternalMipScript()
3489 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3491 result.Raise("Could not disable the master role")
3494 cluster = self.cfg.GetClusterInfo()
3495 cluster.cluster_name = clustername
3496 cluster.master_ip = new_ip
3497 self.cfg.Update(cluster, feedback_fn)
3499 # update the known hosts file
3500 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3501 node_list = self.cfg.GetOnlineNodeList()
3503 node_list.remove(master_params.name)
3506 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3508 master_params.ip = new_ip
3509 result = self.rpc.call_node_activate_master_ip(master_params.name,
3511 msg = result.fail_msg
3513 self.LogWarning("Could not re-enable the master role on"
3514 " the master, please restart manually: %s", msg)
3519 def _ValidateNetmask(cfg, netmask):
3520 """Checks if a netmask is valid.
3522 @type cfg: L{config.ConfigWriter}
3523 @param cfg: The cluster configuration
3525 @param netmask: the netmask to be verified
3526 @raise errors.OpPrereqError: if the validation fails
3529 ip_family = cfg.GetPrimaryIPFamily()
3531 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3532 except errors.ProgrammerError:
3533 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3535 if not ipcls.ValidateNetmask(netmask):
3536 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3540 class LUClusterSetParams(LogicalUnit):
3541 """Change the parameters of the cluster.
3544 HPATH = "cluster-modify"
3545 HTYPE = constants.HTYPE_CLUSTER
3548 def CheckArguments(self):
3552 if self.op.uid_pool:
3553 uidpool.CheckUidPool(self.op.uid_pool)
3555 if self.op.add_uids:
3556 uidpool.CheckUidPool(self.op.add_uids)
3558 if self.op.remove_uids:
3559 uidpool.CheckUidPool(self.op.remove_uids)
3561 if self.op.master_netmask is not None:
3562 _ValidateNetmask(self.cfg, self.op.master_netmask)
3564 if self.op.diskparams:
3565 for dt_params in self.op.diskparams.values():
3566 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3568 def ExpandNames(self):
3569 # FIXME: in the future maybe other cluster params won't require checking on
3570 # all nodes to be modified.
3571 self.needed_locks = {
3572 locking.LEVEL_NODE: locking.ALL_SET,
3574 self.share_locks[locking.LEVEL_NODE] = 1
3576 def BuildHooksEnv(self):
3581 "OP_TARGET": self.cfg.GetClusterName(),
3582 "NEW_VG_NAME": self.op.vg_name,
3585 def BuildHooksNodes(self):
3586 """Build hooks nodes.
3589 mn = self.cfg.GetMasterNode()
3592 def CheckPrereq(self):
3593 """Check prerequisites.
3595 This checks whether the given params don't conflict and
3596 if the given volume group is valid.
3599 if self.op.vg_name is not None and not self.op.vg_name:
3600 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3601 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3602 " instances exist", errors.ECODE_INVAL)
3604 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3605 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3606 raise errors.OpPrereqError("Cannot disable drbd helper while"
3607 " drbd-based instances exist",
3610 node_list = self.owned_locks(locking.LEVEL_NODE)
3612 # if vg_name not None, checks given volume group on all nodes
3614 vglist = self.rpc.call_vg_list(node_list)
3615 for node in node_list:
3616 msg = vglist[node].fail_msg
3618 # ignoring down node
3619 self.LogWarning("Error while gathering data on node %s"
3620 " (ignoring node): %s", node, msg)
3622 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3624 constants.MIN_VG_SIZE)
3626 raise errors.OpPrereqError("Error on node '%s': %s" %
3627 (node, vgstatus), errors.ECODE_ENVIRON)
3629 if self.op.drbd_helper:
3630 # checks given drbd helper on all nodes
3631 helpers = self.rpc.call_drbd_helper(node_list)
3632 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3634 self.LogInfo("Not checking drbd helper on offline node %s", node)
3636 msg = helpers[node].fail_msg
3638 raise errors.OpPrereqError("Error checking drbd helper on node"
3639 " '%s': %s" % (node, msg),
3640 errors.ECODE_ENVIRON)
3641 node_helper = helpers[node].payload
3642 if node_helper != self.op.drbd_helper:
3643 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3644 (node, node_helper), errors.ECODE_ENVIRON)
3646 self.cluster = cluster = self.cfg.GetClusterInfo()
3647 # validate params changes
3648 if self.op.beparams:
3649 objects.UpgradeBeParams(self.op.beparams)
3650 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3651 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3653 if self.op.ndparams:
3654 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3655 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3657 # TODO: we need a more general way to handle resetting
3658 # cluster-level parameters to default values
3659 if self.new_ndparams["oob_program"] == "":
3660 self.new_ndparams["oob_program"] = \
3661 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3663 if self.op.hv_state:
3664 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3665 self.cluster.hv_state_static)
3666 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3667 for hv, values in new_hv_state.items())
3669 if self.op.disk_state:
3670 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3671 self.cluster.disk_state_static)
3672 self.new_disk_state = \
3673 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3674 for name, values in svalues.items()))
3675 for storage, svalues in new_disk_state.items())
3677 if self.op.nicparams:
3678 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3679 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3680 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3683 # check all instances for consistency
3684 for instance in self.cfg.GetAllInstancesInfo().values():
3685 for nic_idx, nic in enumerate(instance.nics):
3686 params_copy = copy.deepcopy(nic.nicparams)
3687 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3689 # check parameter syntax
3691 objects.NIC.CheckParameterSyntax(params_filled)
3692 except errors.ConfigurationError, err:
3693 nic_errors.append("Instance %s, nic/%d: %s" %
3694 (instance.name, nic_idx, err))
3696 # if we're moving instances to routed, check that they have an ip
3697 target_mode = params_filled[constants.NIC_MODE]
3698 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3699 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3700 " address" % (instance.name, nic_idx))
3702 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3703 "\n".join(nic_errors))
3705 # hypervisor list/parameters
3706 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3707 if self.op.hvparams:
3708 for hv_name, hv_dict in self.op.hvparams.items():
3709 if hv_name not in self.new_hvparams:
3710 self.new_hvparams[hv_name] = hv_dict
3712 self.new_hvparams[hv_name].update(hv_dict)
3714 # disk template parameters
3715 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3716 if self.op.diskparams:
3717 for dt_name, dt_params in self.op.diskparams.items():
3718 if dt_name not in self.op.diskparams:
3719 self.new_diskparams[dt_name] = dt_params
3721 self.new_diskparams[dt_name].update(dt_params)
3723 # os hypervisor parameters
3724 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3726 for os_name, hvs in self.op.os_hvp.items():
3727 if os_name not in self.new_os_hvp:
3728 self.new_os_hvp[os_name] = hvs
3730 for hv_name, hv_dict in hvs.items():
3731 if hv_name not in self.new_os_hvp[os_name]:
3732 self.new_os_hvp[os_name][hv_name] = hv_dict
3734 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3737 self.new_osp = objects.FillDict(cluster.osparams, {})
3738 if self.op.osparams:
3739 for os_name, osp in self.op.osparams.items():
3740 if os_name not in self.new_osp:
3741 self.new_osp[os_name] = {}
3743 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3746 if not self.new_osp[os_name]:
3747 # we removed all parameters
3748 del self.new_osp[os_name]
3750 # check the parameter validity (remote check)
3751 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3752 os_name, self.new_osp[os_name])
3754 # changes to the hypervisor list
3755 if self.op.enabled_hypervisors is not None:
3756 self.hv_list = self.op.enabled_hypervisors
3757 for hv in self.hv_list:
3758 # if the hypervisor doesn't already exist in the cluster
3759 # hvparams, we initialize it to empty, and then (in both
3760 # cases) we make sure to fill the defaults, as we might not
3761 # have a complete defaults list if the hypervisor wasn't
3763 if hv not in new_hvp:
3765 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3766 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3768 self.hv_list = cluster.enabled_hypervisors
3770 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3771 # either the enabled list has changed, or the parameters have, validate
3772 for hv_name, hv_params in self.new_hvparams.items():
3773 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3774 (self.op.enabled_hypervisors and
3775 hv_name in self.op.enabled_hypervisors)):
3776 # either this is a new hypervisor, or its parameters have changed
3777 hv_class = hypervisor.GetHypervisor(hv_name)
3778 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3779 hv_class.CheckParameterSyntax(hv_params)
3780 _CheckHVParams(self, node_list, hv_name, hv_params)
3783 # no need to check any newly-enabled hypervisors, since the
3784 # defaults have already been checked in the above code-block
3785 for os_name, os_hvp in self.new_os_hvp.items():
3786 for hv_name, hv_params in os_hvp.items():
3787 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3788 # we need to fill in the new os_hvp on top of the actual hv_p
3789 cluster_defaults = self.new_hvparams.get(hv_name, {})
3790 new_osp = objects.FillDict(cluster_defaults, hv_params)
3791 hv_class = hypervisor.GetHypervisor(hv_name)
3792 hv_class.CheckParameterSyntax(new_osp)
3793 _CheckHVParams(self, node_list, hv_name, new_osp)
3795 if self.op.default_iallocator:
3796 alloc_script = utils.FindFile(self.op.default_iallocator,
3797 constants.IALLOCATOR_SEARCH_PATH,
3799 if alloc_script is None:
3800 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3801 " specified" % self.op.default_iallocator,
3804 def Exec(self, feedback_fn):
3805 """Change the parameters of the cluster.
3808 if self.op.vg_name is not None:
3809 new_volume = self.op.vg_name
3812 if new_volume != self.cfg.GetVGName():
3813 self.cfg.SetVGName(new_volume)
3815 feedback_fn("Cluster LVM configuration already in desired"
3816 " state, not changing")
3817 if self.op.drbd_helper is not None:
3818 new_helper = self.op.drbd_helper
3821 if new_helper != self.cfg.GetDRBDHelper():
3822 self.cfg.SetDRBDHelper(new_helper)
3824 feedback_fn("Cluster DRBD helper already in desired state,"
3826 if self.op.hvparams:
3827 self.cluster.hvparams = self.new_hvparams
3829 self.cluster.os_hvp = self.new_os_hvp
3830 if self.op.enabled_hypervisors is not None:
3831 self.cluster.hvparams = self.new_hvparams
3832 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3833 if self.op.beparams:
3834 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3835 if self.op.nicparams:
3836 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3837 if self.op.osparams:
3838 self.cluster.osparams = self.new_osp
3839 if self.op.ndparams:
3840 self.cluster.ndparams = self.new_ndparams
3841 if self.op.diskparams:
3842 self.cluster.diskparams = self.new_diskparams
3843 if self.op.hv_state:
3844 self.cluster.hv_state_static = self.new_hv_state
3845 if self.op.disk_state:
3846 self.cluster.disk_state_static = self.new_disk_state
3848 if self.op.candidate_pool_size is not None:
3849 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3850 # we need to update the pool size here, otherwise the save will fail
3851 _AdjustCandidatePool(self, [])
3853 if self.op.maintain_node_health is not None:
3854 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3855 feedback_fn("Note: CONFD was disabled at build time, node health"
3856 " maintenance is not useful (still enabling it)")
3857 self.cluster.maintain_node_health = self.op.maintain_node_health
3859 if self.op.prealloc_wipe_disks is not None:
3860 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3862 if self.op.add_uids is not None:
3863 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3865 if self.op.remove_uids is not None:
3866 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3868 if self.op.uid_pool is not None:
3869 self.cluster.uid_pool = self.op.uid_pool
3871 if self.op.default_iallocator is not None:
3872 self.cluster.default_iallocator = self.op.default_iallocator
3874 if self.op.reserved_lvs is not None:
3875 self.cluster.reserved_lvs = self.op.reserved_lvs
3877 if self.op.use_external_mip_script is not None:
3878 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3880 def helper_os(aname, mods, desc):
3882 lst = getattr(self.cluster, aname)
3883 for key, val in mods:
3884 if key == constants.DDM_ADD:
3886 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3889 elif key == constants.DDM_REMOVE:
3893 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3895 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3897 if self.op.hidden_os:
3898 helper_os("hidden_os", self.op.hidden_os, "hidden")
3900 if self.op.blacklisted_os:
3901 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3903 if self.op.master_netdev:
3904 master_params = self.cfg.GetMasterNetworkParameters()
3905 ems = self.cfg.GetUseExternalMipScript()
3906 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3907 self.cluster.master_netdev)
3908 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3910 result.Raise("Could not disable the master ip")
3911 feedback_fn("Changing master_netdev from %s to %s" %
3912 (master_params.netdev, self.op.master_netdev))
3913 self.cluster.master_netdev = self.op.master_netdev
3915 if self.op.master_netmask:
3916 master_params = self.cfg.GetMasterNetworkParameters()
3917 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3918 result = self.rpc.call_node_change_master_netmask(master_params.name,
3919 master_params.netmask,
3920 self.op.master_netmask,
3922 master_params.netdev)
3924 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3927 self.cluster.master_netmask = self.op.master_netmask
3929 self.cfg.Update(self.cluster, feedback_fn)
3931 if self.op.master_netdev:
3932 master_params = self.cfg.GetMasterNetworkParameters()
3933 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3934 self.op.master_netdev)
3935 ems = self.cfg.GetUseExternalMipScript()
3936 result = self.rpc.call_node_activate_master_ip(master_params.name,
3939 self.LogWarning("Could not re-enable the master ip on"
3940 " the master, please restart manually: %s",
3944 def _UploadHelper(lu, nodes, fname):
3945 """Helper for uploading a file and showing warnings.
3948 if os.path.exists(fname):
3949 result = lu.rpc.call_upload_file(nodes, fname)
3950 for to_node, to_result in result.items():
3951 msg = to_result.fail_msg
3953 msg = ("Copy of file %s to node %s failed: %s" %
3954 (fname, to_node, msg))
3955 lu.proc.LogWarning(msg)
3958 def _ComputeAncillaryFiles(cluster, redist):
3959 """Compute files external to Ganeti which need to be consistent.
3961 @type redist: boolean
3962 @param redist: Whether to include files which need to be redistributed
3965 # Compute files for all nodes
3967 constants.SSH_KNOWN_HOSTS_FILE,
3968 constants.CONFD_HMAC_KEY,
3969 constants.CLUSTER_DOMAIN_SECRET_FILE,
3970 constants.SPICE_CERT_FILE,
3971 constants.SPICE_CACERT_FILE,
3972 constants.RAPI_USERS_FILE,
3976 files_all.update(constants.ALL_CERT_FILES)
3977 files_all.update(ssconf.SimpleStore().GetFileList())
3979 # we need to ship at least the RAPI certificate
3980 files_all.add(constants.RAPI_CERT_FILE)
3982 if cluster.modify_etc_hosts:
3983 files_all.add(constants.ETC_HOSTS)
3985 # Files which are optional, these must:
3986 # - be present in one other category as well
3987 # - either exist or not exist on all nodes of that category (mc, vm all)
3989 constants.RAPI_USERS_FILE,
3992 # Files which should only be on master candidates
3996 files_mc.add(constants.CLUSTER_CONF_FILE)
3998 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4000 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4002 # Files which should only be on VM-capable nodes
4003 files_vm = set(filename
4004 for hv_name in cluster.enabled_hypervisors
4005 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4007 files_opt |= set(filename
4008 for hv_name in cluster.enabled_hypervisors
4009 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4011 # Filenames in each category must be unique
4012 all_files_set = files_all | files_mc | files_vm
4013 assert (len(all_files_set) ==
4014 sum(map(len, [files_all, files_mc, files_vm]))), \
4015 "Found file listed in more than one file list"
4017 # Optional files must be present in one other category
4018 assert all_files_set.issuperset(files_opt), \
4019 "Optional file not in a different required list"
4021 return (files_all, files_opt, files_mc, files_vm)
4024 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4025 """Distribute additional files which are part of the cluster configuration.
4027 ConfigWriter takes care of distributing the config and ssconf files, but
4028 there are more files which should be distributed to all nodes. This function
4029 makes sure those are copied.
4031 @param lu: calling logical unit
4032 @param additional_nodes: list of nodes not in the config to distribute to
4033 @type additional_vm: boolean
4034 @param additional_vm: whether the additional nodes are vm-capable or not
4037 # Gather target nodes
4038 cluster = lu.cfg.GetClusterInfo()
4039 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4041 online_nodes = lu.cfg.GetOnlineNodeList()
4042 vm_nodes = lu.cfg.GetVmCapableNodeList()
4044 if additional_nodes is not None:
4045 online_nodes.extend(additional_nodes)
4047 vm_nodes.extend(additional_nodes)
4049 # Never distribute to master node
4050 for nodelist in [online_nodes, vm_nodes]:
4051 if master_info.name in nodelist:
4052 nodelist.remove(master_info.name)
4055 (files_all, _, files_mc, files_vm) = \
4056 _ComputeAncillaryFiles(cluster, True)
4058 # Never re-distribute configuration file from here
4059 assert not (constants.CLUSTER_CONF_FILE in files_all or
4060 constants.CLUSTER_CONF_FILE in files_vm)
4061 assert not files_mc, "Master candidates not handled in this function"
4064 (online_nodes, files_all),
4065 (vm_nodes, files_vm),
4069 for (node_list, files) in filemap:
4071 _UploadHelper(lu, node_list, fname)
4074 class LUClusterRedistConf(NoHooksLU):
4075 """Force the redistribution of cluster configuration.
4077 This is a very simple LU.
4082 def ExpandNames(self):
4083 self.needed_locks = {
4084 locking.LEVEL_NODE: locking.ALL_SET,
4086 self.share_locks[locking.LEVEL_NODE] = 1
4088 def Exec(self, feedback_fn):
4089 """Redistribute the configuration.
4092 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4093 _RedistributeAncillaryFiles(self)
4096 class LUClusterActivateMasterIp(NoHooksLU):
4097 """Activate the master IP on the master node.
4100 def Exec(self, feedback_fn):
4101 """Activate the master IP.
4104 master_params = self.cfg.GetMasterNetworkParameters()
4105 ems = self.cfg.GetUseExternalMipScript()
4106 result = self.rpc.call_node_activate_master_ip(master_params.name,
4108 result.Raise("Could not activate the master IP")
4111 class LUClusterDeactivateMasterIp(NoHooksLU):
4112 """Deactivate the master IP on the master node.
4115 def Exec(self, feedback_fn):
4116 """Deactivate the master IP.
4119 master_params = self.cfg.GetMasterNetworkParameters()
4120 ems = self.cfg.GetUseExternalMipScript()
4121 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4123 result.Raise("Could not deactivate the master IP")
4126 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4127 """Sleep and poll for an instance's disk to sync.
4130 if not instance.disks or disks is not None and not disks:
4133 disks = _ExpandCheckDisks(instance, disks)
4136 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4138 node = instance.primary_node
4141 lu.cfg.SetDiskID(dev, node)
4143 # TODO: Convert to utils.Retry
4146 degr_retries = 10 # in seconds, as we sleep 1 second each time
4150 cumul_degraded = False
4151 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4152 msg = rstats.fail_msg
4154 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4157 raise errors.RemoteError("Can't contact node %s for mirror data,"
4158 " aborting." % node)
4161 rstats = rstats.payload
4163 for i, mstat in enumerate(rstats):
4165 lu.LogWarning("Can't compute data for node %s/%s",
4166 node, disks[i].iv_name)
4169 cumul_degraded = (cumul_degraded or
4170 (mstat.is_degraded and mstat.sync_percent is None))
4171 if mstat.sync_percent is not None:
4173 if mstat.estimated_time is not None:
4174 rem_time = ("%s remaining (estimated)" %
4175 utils.FormatSeconds(mstat.estimated_time))
4176 max_time = mstat.estimated_time
4178 rem_time = "no time estimate"
4179 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4180 (disks[i].iv_name, mstat.sync_percent, rem_time))
4182 # if we're done but degraded, let's do a few small retries, to
4183 # make sure we see a stable and not transient situation; therefore
4184 # we force restart of the loop
4185 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4186 logging.info("Degraded disks found, %d retries left", degr_retries)
4194 time.sleep(min(60, max_time))
4197 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4198 return not cumul_degraded
4201 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4202 """Check that mirrors are not degraded.
4204 The ldisk parameter, if True, will change the test from the
4205 is_degraded attribute (which represents overall non-ok status for
4206 the device(s)) to the ldisk (representing the local storage status).
4209 lu.cfg.SetDiskID(dev, node)
4213 if on_primary or dev.AssembleOnSecondary():
4214 rstats = lu.rpc.call_blockdev_find(node, dev)
4215 msg = rstats.fail_msg
4217 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4219 elif not rstats.payload:
4220 lu.LogWarning("Can't find disk on node %s", node)
4224 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4226 result = result and not rstats.payload.is_degraded
4229 for child in dev.children:
4230 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4235 class LUOobCommand(NoHooksLU):
4236 """Logical unit for OOB handling.
4240 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4242 def ExpandNames(self):
4243 """Gather locks we need.
4246 if self.op.node_names:
4247 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4248 lock_names = self.op.node_names
4250 lock_names = locking.ALL_SET
4252 self.needed_locks = {
4253 locking.LEVEL_NODE: lock_names,
4256 def CheckPrereq(self):
4257 """Check prerequisites.
4260 - the node exists in the configuration
4263 Any errors are signaled by raising errors.OpPrereqError.
4267 self.master_node = self.cfg.GetMasterNode()
4269 assert self.op.power_delay >= 0.0
4271 if self.op.node_names:
4272 if (self.op.command in self._SKIP_MASTER and
4273 self.master_node in self.op.node_names):
4274 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4275 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4277 if master_oob_handler:
4278 additional_text = ("run '%s %s %s' if you want to operate on the"
4279 " master regardless") % (master_oob_handler,
4283 additional_text = "it does not support out-of-band operations"
4285 raise errors.OpPrereqError(("Operating on the master node %s is not"
4286 " allowed for %s; %s") %
4287 (self.master_node, self.op.command,
4288 additional_text), errors.ECODE_INVAL)
4290 self.op.node_names = self.cfg.GetNodeList()
4291 if self.op.command in self._SKIP_MASTER:
4292 self.op.node_names.remove(self.master_node)
4294 if self.op.command in self._SKIP_MASTER:
4295 assert self.master_node not in self.op.node_names
4297 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4299 raise errors.OpPrereqError("Node %s not found" % node_name,
4302 self.nodes.append(node)
4304 if (not self.op.ignore_status and
4305 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4306 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4307 " not marked offline") % node_name,
4310 def Exec(self, feedback_fn):
4311 """Execute OOB and return result if we expect any.
4314 master_node = self.master_node
4317 for idx, node in enumerate(utils.NiceSort(self.nodes,
4318 key=lambda node: node.name)):
4319 node_entry = [(constants.RS_NORMAL, node.name)]
4320 ret.append(node_entry)
4322 oob_program = _SupportsOob(self.cfg, node)
4325 node_entry.append((constants.RS_UNAVAIL, None))
4328 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4329 self.op.command, oob_program, node.name)
4330 result = self.rpc.call_run_oob(master_node, oob_program,
4331 self.op.command, node.name,
4335 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4336 node.name, result.fail_msg)
4337 node_entry.append((constants.RS_NODATA, None))
4340 self._CheckPayload(result)
4341 except errors.OpExecError, err:
4342 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4344 node_entry.append((constants.RS_NODATA, None))
4346 if self.op.command == constants.OOB_HEALTH:
4347 # For health we should log important events
4348 for item, status in result.payload:
4349 if status in [constants.OOB_STATUS_WARNING,
4350 constants.OOB_STATUS_CRITICAL]:
4351 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4352 item, node.name, status)
4354 if self.op.command == constants.OOB_POWER_ON:
4356 elif self.op.command == constants.OOB_POWER_OFF:
4357 node.powered = False
4358 elif self.op.command == constants.OOB_POWER_STATUS:
4359 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4360 if powered != node.powered:
4361 logging.warning(("Recorded power state (%s) of node '%s' does not"
4362 " match actual power state (%s)"), node.powered,
4365 # For configuration changing commands we should update the node
4366 if self.op.command in (constants.OOB_POWER_ON,
4367 constants.OOB_POWER_OFF):
4368 self.cfg.Update(node, feedback_fn)
4370 node_entry.append((constants.RS_NORMAL, result.payload))
4372 if (self.op.command == constants.OOB_POWER_ON and
4373 idx < len(self.nodes) - 1):
4374 time.sleep(self.op.power_delay)
4378 def _CheckPayload(self, result):
4379 """Checks if the payload is valid.
4381 @param result: RPC result
4382 @raises errors.OpExecError: If payload is not valid
4386 if self.op.command == constants.OOB_HEALTH:
4387 if not isinstance(result.payload, list):
4388 errs.append("command 'health' is expected to return a list but got %s" %
4389 type(result.payload))
4391 for item, status in result.payload:
4392 if status not in constants.OOB_STATUSES:
4393 errs.append("health item '%s' has invalid status '%s'" %
4396 if self.op.command == constants.OOB_POWER_STATUS:
4397 if not isinstance(result.payload, dict):
4398 errs.append("power-status is expected to return a dict but got %s" %
4399 type(result.payload))
4401 if self.op.command in [
4402 constants.OOB_POWER_ON,
4403 constants.OOB_POWER_OFF,
4404 constants.OOB_POWER_CYCLE,
4406 if result.payload is not None:
4407 errs.append("%s is expected to not return payload but got '%s'" %
4408 (self.op.command, result.payload))
4411 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4412 utils.CommaJoin(errs))
4415 class _OsQuery(_QueryBase):
4416 FIELDS = query.OS_FIELDS
4418 def ExpandNames(self, lu):
4419 # Lock all nodes in shared mode
4420 # Temporary removal of locks, should be reverted later
4421 # TODO: reintroduce locks when they are lighter-weight
4422 lu.needed_locks = {}
4423 #self.share_locks[locking.LEVEL_NODE] = 1
4424 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4426 # The following variables interact with _QueryBase._GetNames
4428 self.wanted = self.names
4430 self.wanted = locking.ALL_SET
4432 self.do_locking = self.use_locking
4434 def DeclareLocks(self, lu, level):
4438 def _DiagnoseByOS(rlist):
4439 """Remaps a per-node return list into an a per-os per-node dictionary
4441 @param rlist: a map with node names as keys and OS objects as values
4444 @return: a dictionary with osnames as keys and as value another
4445 map, with nodes as keys and tuples of (path, status, diagnose,
4446 variants, parameters, api_versions) as values, eg::
4448 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4449 (/srv/..., False, "invalid api")],
4450 "node2": [(/srv/..., True, "", [], [])]}
4455 # we build here the list of nodes that didn't fail the RPC (at RPC
4456 # level), so that nodes with a non-responding node daemon don't
4457 # make all OSes invalid
4458 good_nodes = [node_name for node_name in rlist
4459 if not rlist[node_name].fail_msg]
4460 for node_name, nr in rlist.items():
4461 if nr.fail_msg or not nr.payload:
4463 for (name, path, status, diagnose, variants,
4464 params, api_versions) in nr.payload:
4465 if name not in all_os:
4466 # build a list of nodes for this os containing empty lists
4467 # for each node in node_list
4469 for nname in good_nodes:
4470 all_os[name][nname] = []
4471 # convert params from [name, help] to (name, help)
4472 params = [tuple(v) for v in params]
4473 all_os[name][node_name].append((path, status, diagnose,
4474 variants, params, api_versions))
4477 def _GetQueryData(self, lu):
4478 """Computes the list of nodes and their attributes.
4481 # Locking is not used
4482 assert not (compat.any(lu.glm.is_owned(level)
4483 for level in locking.LEVELS
4484 if level != locking.LEVEL_CLUSTER) or
4485 self.do_locking or self.use_locking)
4487 valid_nodes = [node.name
4488 for node in lu.cfg.GetAllNodesInfo().values()
4489 if not node.offline and node.vm_capable]
4490 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4491 cluster = lu.cfg.GetClusterInfo()
4495 for (os_name, os_data) in pol.items():
4496 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4497 hidden=(os_name in cluster.hidden_os),
4498 blacklisted=(os_name in cluster.blacklisted_os))
4502 api_versions = set()
4504 for idx, osl in enumerate(os_data.values()):
4505 info.valid = bool(info.valid and osl and osl[0][1])
4509 (node_variants, node_params, node_api) = osl[0][3:6]
4512 variants.update(node_variants)
4513 parameters.update(node_params)
4514 api_versions.update(node_api)
4516 # Filter out inconsistent values
4517 variants.intersection_update(node_variants)
4518 parameters.intersection_update(node_params)
4519 api_versions.intersection_update(node_api)
4521 info.variants = list(variants)
4522 info.parameters = list(parameters)
4523 info.api_versions = list(api_versions)
4525 data[os_name] = info
4527 # Prepare data in requested order
4528 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4532 class LUOsDiagnose(NoHooksLU):
4533 """Logical unit for OS diagnose/query.
4539 def _BuildFilter(fields, names):
4540 """Builds a filter for querying OSes.
4543 name_filter = qlang.MakeSimpleFilter("name", names)
4545 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4546 # respective field is not requested
4547 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4548 for fname in ["hidden", "blacklisted"]
4549 if fname not in fields]
4550 if "valid" not in fields:
4551 status_filter.append([qlang.OP_TRUE, "valid"])
4554 status_filter.insert(0, qlang.OP_AND)
4556 status_filter = None
4558 if name_filter and status_filter:
4559 return [qlang.OP_AND, name_filter, status_filter]
4563 return status_filter
4565 def CheckArguments(self):
4566 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4567 self.op.output_fields, False)
4569 def ExpandNames(self):
4570 self.oq.ExpandNames(self)
4572 def Exec(self, feedback_fn):
4573 return self.oq.OldStyleQuery(self)
4576 class LUNodeRemove(LogicalUnit):
4577 """Logical unit for removing a node.
4580 HPATH = "node-remove"
4581 HTYPE = constants.HTYPE_NODE
4583 def BuildHooksEnv(self):
4586 This doesn't run on the target node in the pre phase as a failed
4587 node would then be impossible to remove.
4591 "OP_TARGET": self.op.node_name,
4592 "NODE_NAME": self.op.node_name,
4595 def BuildHooksNodes(self):
4596 """Build hooks nodes.
4599 all_nodes = self.cfg.GetNodeList()
4601 all_nodes.remove(self.op.node_name)
4603 logging.warning("Node '%s', which is about to be removed, was not found"
4604 " in the list of all nodes", self.op.node_name)
4605 return (all_nodes, all_nodes)
4607 def CheckPrereq(self):
4608 """Check prerequisites.
4611 - the node exists in the configuration
4612 - it does not have primary or secondary instances
4613 - it's not the master
4615 Any errors are signaled by raising errors.OpPrereqError.
4618 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4619 node = self.cfg.GetNodeInfo(self.op.node_name)
4620 assert node is not None
4622 masternode = self.cfg.GetMasterNode()
4623 if node.name == masternode:
4624 raise errors.OpPrereqError("Node is the master node, failover to another"
4625 " node is required", errors.ECODE_INVAL)
4627 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4628 if node.name in instance.all_nodes:
4629 raise errors.OpPrereqError("Instance %s is still running on the node,"
4630 " please remove first" % instance_name,
4632 self.op.node_name = node.name
4635 def Exec(self, feedback_fn):
4636 """Removes the node from the cluster.
4640 logging.info("Stopping the node daemon and removing configs from node %s",
4643 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4645 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4648 # Promote nodes to master candidate as needed
4649 _AdjustCandidatePool(self, exceptions=[node.name])
4650 self.context.RemoveNode(node.name)
4652 # Run post hooks on the node before it's removed
4653 _RunPostHook(self, node.name)
4655 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4656 msg = result.fail_msg
4658 self.LogWarning("Errors encountered on the remote node while leaving"
4659 " the cluster: %s", msg)
4661 # Remove node from our /etc/hosts
4662 if self.cfg.GetClusterInfo().modify_etc_hosts:
4663 master_node = self.cfg.GetMasterNode()
4664 result = self.rpc.call_etc_hosts_modify(master_node,
4665 constants.ETC_HOSTS_REMOVE,
4667 result.Raise("Can't update hosts file with new host data")
4668 _RedistributeAncillaryFiles(self)
4671 class _NodeQuery(_QueryBase):
4672 FIELDS = query.NODE_FIELDS
4674 def ExpandNames(self, lu):
4675 lu.needed_locks = {}
4676 lu.share_locks = _ShareAll()
4679 self.wanted = _GetWantedNodes(lu, self.names)
4681 self.wanted = locking.ALL_SET
4683 self.do_locking = (self.use_locking and
4684 query.NQ_LIVE in self.requested_data)
4687 # If any non-static field is requested we need to lock the nodes
4688 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4690 def DeclareLocks(self, lu, level):
4693 def _GetQueryData(self, lu):
4694 """Computes the list of nodes and their attributes.
4697 all_info = lu.cfg.GetAllNodesInfo()
4699 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4701 # Gather data as requested
4702 if query.NQ_LIVE in self.requested_data:
4703 # filter out non-vm_capable nodes
4704 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4706 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4707 [lu.cfg.GetHypervisorType()])
4708 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4709 for (name, nresult) in node_data.items()
4710 if not nresult.fail_msg and nresult.payload)
4714 if query.NQ_INST in self.requested_data:
4715 node_to_primary = dict([(name, set()) for name in nodenames])
4716 node_to_secondary = dict([(name, set()) for name in nodenames])
4718 inst_data = lu.cfg.GetAllInstancesInfo()
4720 for inst in inst_data.values():
4721 if inst.primary_node in node_to_primary:
4722 node_to_primary[inst.primary_node].add(inst.name)
4723 for secnode in inst.secondary_nodes:
4724 if secnode in node_to_secondary:
4725 node_to_secondary[secnode].add(inst.name)
4727 node_to_primary = None
4728 node_to_secondary = None
4730 if query.NQ_OOB in self.requested_data:
4731 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4732 for name, node in all_info.iteritems())
4736 if query.NQ_GROUP in self.requested_data:
4737 groups = lu.cfg.GetAllNodeGroupsInfo()
4741 return query.NodeQueryData([all_info[name] for name in nodenames],
4742 live_data, lu.cfg.GetMasterNode(),
4743 node_to_primary, node_to_secondary, groups,
4744 oob_support, lu.cfg.GetClusterInfo())
4747 class LUNodeQuery(NoHooksLU):
4748 """Logical unit for querying nodes.
4751 # pylint: disable=W0142
4754 def CheckArguments(self):
4755 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4756 self.op.output_fields, self.op.use_locking)
4758 def ExpandNames(self):
4759 self.nq.ExpandNames(self)
4761 def DeclareLocks(self, level):
4762 self.nq.DeclareLocks(self, level)
4764 def Exec(self, feedback_fn):
4765 return self.nq.OldStyleQuery(self)
4768 class LUNodeQueryvols(NoHooksLU):
4769 """Logical unit for getting volumes on node(s).
4773 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4774 _FIELDS_STATIC = utils.FieldSet("node")
4776 def CheckArguments(self):
4777 _CheckOutputFields(static=self._FIELDS_STATIC,
4778 dynamic=self._FIELDS_DYNAMIC,
4779 selected=self.op.output_fields)
4781 def ExpandNames(self):
4782 self.share_locks = _ShareAll()
4783 self.needed_locks = {}
4785 if not self.op.nodes:
4786 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4788 self.needed_locks[locking.LEVEL_NODE] = \
4789 _GetWantedNodes(self, self.op.nodes)
4791 def Exec(self, feedback_fn):
4792 """Computes the list of nodes and their attributes.
4795 nodenames = self.owned_locks(locking.LEVEL_NODE)
4796 volumes = self.rpc.call_node_volumes(nodenames)
4798 ilist = self.cfg.GetAllInstancesInfo()
4799 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4802 for node in nodenames:
4803 nresult = volumes[node]
4806 msg = nresult.fail_msg
4808 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4811 node_vols = sorted(nresult.payload,
4812 key=operator.itemgetter("dev"))
4814 for vol in node_vols:
4816 for field in self.op.output_fields:
4819 elif field == "phys":
4823 elif field == "name":
4825 elif field == "size":
4826 val = int(float(vol["size"]))
4827 elif field == "instance":
4828 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4830 raise errors.ParameterError(field)
4831 node_output.append(str(val))
4833 output.append(node_output)
4838 class LUNodeQueryStorage(NoHooksLU):
4839 """Logical unit for getting information on storage units on node(s).
4842 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4845 def CheckArguments(self):
4846 _CheckOutputFields(static=self._FIELDS_STATIC,
4847 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4848 selected=self.op.output_fields)
4850 def ExpandNames(self):
4851 self.share_locks = _ShareAll()
4852 self.needed_locks = {}
4855 self.needed_locks[locking.LEVEL_NODE] = \
4856 _GetWantedNodes(self, self.op.nodes)
4858 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4860 def Exec(self, feedback_fn):
4861 """Computes the list of nodes and their attributes.
4864 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4866 # Always get name to sort by
4867 if constants.SF_NAME in self.op.output_fields:
4868 fields = self.op.output_fields[:]
4870 fields = [constants.SF_NAME] + self.op.output_fields
4872 # Never ask for node or type as it's only known to the LU
4873 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4874 while extra in fields:
4875 fields.remove(extra)
4877 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4878 name_idx = field_idx[constants.SF_NAME]
4880 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4881 data = self.rpc.call_storage_list(self.nodes,
4882 self.op.storage_type, st_args,
4883 self.op.name, fields)
4887 for node in utils.NiceSort(self.nodes):
4888 nresult = data[node]
4892 msg = nresult.fail_msg
4894 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4897 rows = dict([(row[name_idx], row) for row in nresult.payload])
4899 for name in utils.NiceSort(rows.keys()):
4904 for field in self.op.output_fields:
4905 if field == constants.SF_NODE:
4907 elif field == constants.SF_TYPE:
4908 val = self.op.storage_type
4909 elif field in field_idx:
4910 val = row[field_idx[field]]
4912 raise errors.ParameterError(field)
4921 class _InstanceQuery(_QueryBase):
4922 FIELDS = query.INSTANCE_FIELDS
4924 def ExpandNames(self, lu):
4925 lu.needed_locks = {}
4926 lu.share_locks = _ShareAll()
4929 self.wanted = _GetWantedInstances(lu, self.names)
4931 self.wanted = locking.ALL_SET
4933 self.do_locking = (self.use_locking and
4934 query.IQ_LIVE in self.requested_data)
4936 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4937 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4938 lu.needed_locks[locking.LEVEL_NODE] = []
4939 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4941 self.do_grouplocks = (self.do_locking and
4942 query.IQ_NODES in self.requested_data)
4944 def DeclareLocks(self, lu, level):
4946 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4947 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4949 # Lock all groups used by instances optimistically; this requires going
4950 # via the node before it's locked, requiring verification later on
4951 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4953 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4954 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4955 elif level == locking.LEVEL_NODE:
4956 lu._LockInstancesNodes() # pylint: disable=W0212
4959 def _CheckGroupLocks(lu):
4960 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4961 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4963 # Check if node groups for locked instances are still correct
4964 for instance_name in owned_instances:
4965 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4967 def _GetQueryData(self, lu):
4968 """Computes the list of instances and their attributes.
4971 if self.do_grouplocks:
4972 self._CheckGroupLocks(lu)
4974 cluster = lu.cfg.GetClusterInfo()
4975 all_info = lu.cfg.GetAllInstancesInfo()
4977 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4979 instance_list = [all_info[name] for name in instance_names]
4980 nodes = frozenset(itertools.chain(*(inst.all_nodes
4981 for inst in instance_list)))
4982 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4985 wrongnode_inst = set()
4987 # Gather data as requested
4988 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4990 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4992 result = node_data[name]
4994 # offline nodes will be in both lists
4995 assert result.fail_msg
4996 offline_nodes.append(name)
4998 bad_nodes.append(name)
4999 elif result.payload:
5000 for inst in result.payload:
5001 if inst in all_info:
5002 if all_info[inst].primary_node == name:
5003 live_data.update(result.payload)
5005 wrongnode_inst.add(inst)
5007 # orphan instance; we don't list it here as we don't
5008 # handle this case yet in the output of instance listing
5009 logging.warning("Orphan instance '%s' found on node %s",
5011 # else no instance is alive
5015 if query.IQ_DISKUSAGE in self.requested_data:
5016 disk_usage = dict((inst.name,
5017 _ComputeDiskSize(inst.disk_template,
5018 [{constants.IDISK_SIZE: disk.size}
5019 for disk in inst.disks]))
5020 for inst in instance_list)
5024 if query.IQ_CONSOLE in self.requested_data:
5026 for inst in instance_list:
5027 if inst.name in live_data:
5028 # Instance is running
5029 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5031 consinfo[inst.name] = None
5032 assert set(consinfo.keys()) == set(instance_names)
5036 if query.IQ_NODES in self.requested_data:
5037 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5039 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5040 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5041 for uuid in set(map(operator.attrgetter("group"),
5047 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5048 disk_usage, offline_nodes, bad_nodes,
5049 live_data, wrongnode_inst, consinfo,
5053 class LUQuery(NoHooksLU):
5054 """Query for resources/items of a certain kind.
5057 # pylint: disable=W0142
5060 def CheckArguments(self):
5061 qcls = _GetQueryImplementation(self.op.what)
5063 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5065 def ExpandNames(self):
5066 self.impl.ExpandNames(self)
5068 def DeclareLocks(self, level):
5069 self.impl.DeclareLocks(self, level)
5071 def Exec(self, feedback_fn):
5072 return self.impl.NewStyleQuery(self)
5075 class LUQueryFields(NoHooksLU):
5076 """Query for resources/items of a certain kind.
5079 # pylint: disable=W0142
5082 def CheckArguments(self):
5083 self.qcls = _GetQueryImplementation(self.op.what)
5085 def ExpandNames(self):
5086 self.needed_locks = {}
5088 def Exec(self, feedback_fn):
5089 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5092 class LUNodeModifyStorage(NoHooksLU):
5093 """Logical unit for modifying a storage volume on a node.
5098 def CheckArguments(self):
5099 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5101 storage_type = self.op.storage_type
5104 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5106 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5107 " modified" % storage_type,
5110 diff = set(self.op.changes.keys()) - modifiable
5112 raise errors.OpPrereqError("The following fields can not be modified for"
5113 " storage units of type '%s': %r" %
5114 (storage_type, list(diff)),
5117 def ExpandNames(self):
5118 self.needed_locks = {
5119 locking.LEVEL_NODE: self.op.node_name,
5122 def Exec(self, feedback_fn):
5123 """Computes the list of nodes and their attributes.
5126 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5127 result = self.rpc.call_storage_modify(self.op.node_name,
5128 self.op.storage_type, st_args,
5129 self.op.name, self.op.changes)
5130 result.Raise("Failed to modify storage unit '%s' on %s" %
5131 (self.op.name, self.op.node_name))
5134 class LUNodeAdd(LogicalUnit):
5135 """Logical unit for adding node to the cluster.
5139 HTYPE = constants.HTYPE_NODE
5140 _NFLAGS = ["master_capable", "vm_capable"]
5142 def CheckArguments(self):
5143 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5144 # validate/normalize the node name
5145 self.hostname = netutils.GetHostname(name=self.op.node_name,
5146 family=self.primary_ip_family)
5147 self.op.node_name = self.hostname.name
5149 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5150 raise errors.OpPrereqError("Cannot readd the master node",
5153 if self.op.readd and self.op.group:
5154 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5155 " being readded", errors.ECODE_INVAL)
5157 def BuildHooksEnv(self):
5160 This will run on all nodes before, and on all nodes + the new node after.
5164 "OP_TARGET": self.op.node_name,
5165 "NODE_NAME": self.op.node_name,
5166 "NODE_PIP": self.op.primary_ip,
5167 "NODE_SIP": self.op.secondary_ip,
5168 "MASTER_CAPABLE": str(self.op.master_capable),
5169 "VM_CAPABLE": str(self.op.vm_capable),
5172 def BuildHooksNodes(self):
5173 """Build hooks nodes.
5176 # Exclude added node
5177 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5178 post_nodes = pre_nodes + [self.op.node_name, ]
5180 return (pre_nodes, post_nodes)
5182 def CheckPrereq(self):
5183 """Check prerequisites.
5186 - the new node is not already in the config
5188 - its parameters (single/dual homed) matches the cluster
5190 Any errors are signaled by raising errors.OpPrereqError.
5194 hostname = self.hostname
5195 node = hostname.name
5196 primary_ip = self.op.primary_ip = hostname.ip
5197 if self.op.secondary_ip is None:
5198 if self.primary_ip_family == netutils.IP6Address.family:
5199 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5200 " IPv4 address must be given as secondary",
5202 self.op.secondary_ip = primary_ip
5204 secondary_ip = self.op.secondary_ip
5205 if not netutils.IP4Address.IsValid(secondary_ip):
5206 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5207 " address" % secondary_ip, errors.ECODE_INVAL)
5209 node_list = cfg.GetNodeList()
5210 if not self.op.readd and node in node_list:
5211 raise errors.OpPrereqError("Node %s is already in the configuration" %
5212 node, errors.ECODE_EXISTS)
5213 elif self.op.readd and node not in node_list:
5214 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5217 self.changed_primary_ip = False
5219 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5220 if self.op.readd and node == existing_node_name:
5221 if existing_node.secondary_ip != secondary_ip:
5222 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5223 " address configuration as before",
5225 if existing_node.primary_ip != primary_ip:
5226 self.changed_primary_ip = True
5230 if (existing_node.primary_ip == primary_ip or
5231 existing_node.secondary_ip == primary_ip or
5232 existing_node.primary_ip == secondary_ip or
5233 existing_node.secondary_ip == secondary_ip):
5234 raise errors.OpPrereqError("New node ip address(es) conflict with"
5235 " existing node %s" % existing_node.name,
5236 errors.ECODE_NOTUNIQUE)
5238 # After this 'if' block, None is no longer a valid value for the
5239 # _capable op attributes
5241 old_node = self.cfg.GetNodeInfo(node)
5242 assert old_node is not None, "Can't retrieve locked node %s" % node
5243 for attr in self._NFLAGS:
5244 if getattr(self.op, attr) is None:
5245 setattr(self.op, attr, getattr(old_node, attr))
5247 for attr in self._NFLAGS:
5248 if getattr(self.op, attr) is None:
5249 setattr(self.op, attr, True)
5251 if self.op.readd and not self.op.vm_capable:
5252 pri, sec = cfg.GetNodeInstances(node)
5254 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5255 " flag set to false, but it already holds"
5256 " instances" % node,
5259 # check that the type of the node (single versus dual homed) is the
5260 # same as for the master
5261 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5262 master_singlehomed = myself.secondary_ip == myself.primary_ip
5263 newbie_singlehomed = secondary_ip == primary_ip
5264 if master_singlehomed != newbie_singlehomed:
5265 if master_singlehomed:
5266 raise errors.OpPrereqError("The master has no secondary ip but the"
5267 " new node has one",
5270 raise errors.OpPrereqError("The master has a secondary ip but the"
5271 " new node doesn't have one",
5274 # checks reachability
5275 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5276 raise errors.OpPrereqError("Node not reachable by ping",
5277 errors.ECODE_ENVIRON)
5279 if not newbie_singlehomed:
5280 # check reachability from my secondary ip to newbie's secondary ip
5281 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5282 source=myself.secondary_ip):
5283 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5284 " based ping to node daemon port",
5285 errors.ECODE_ENVIRON)
5292 if self.op.master_capable:
5293 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5295 self.master_candidate = False
5298 self.new_node = old_node
5300 node_group = cfg.LookupNodeGroup(self.op.group)
5301 self.new_node = objects.Node(name=node,
5302 primary_ip=primary_ip,
5303 secondary_ip=secondary_ip,
5304 master_candidate=self.master_candidate,
5305 offline=False, drained=False,
5308 if self.op.ndparams:
5309 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5311 def Exec(self, feedback_fn):
5312 """Adds the new node to the cluster.
5315 new_node = self.new_node
5316 node = new_node.name
5318 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5321 # We adding a new node so we assume it's powered
5322 new_node.powered = True
5324 # for re-adds, reset the offline/drained/master-candidate flags;
5325 # we need to reset here, otherwise offline would prevent RPC calls
5326 # later in the procedure; this also means that if the re-add
5327 # fails, we are left with a non-offlined, broken node
5329 new_node.drained = new_node.offline = False # pylint: disable=W0201
5330 self.LogInfo("Readding a node, the offline/drained flags were reset")
5331 # if we demote the node, we do cleanup later in the procedure
5332 new_node.master_candidate = self.master_candidate
5333 if self.changed_primary_ip:
5334 new_node.primary_ip = self.op.primary_ip
5336 # copy the master/vm_capable flags
5337 for attr in self._NFLAGS:
5338 setattr(new_node, attr, getattr(self.op, attr))
5340 # notify the user about any possible mc promotion
5341 if new_node.master_candidate:
5342 self.LogInfo("Node will be a master candidate")
5344 if self.op.ndparams:
5345 new_node.ndparams = self.op.ndparams
5347 new_node.ndparams = {}
5349 # check connectivity
5350 result = self.rpc.call_version([node])[node]
5351 result.Raise("Can't get version information from node %s" % node)
5352 if constants.PROTOCOL_VERSION == result.payload:
5353 logging.info("Communication to node %s fine, sw version %s match",
5354 node, result.payload)
5356 raise errors.OpExecError("Version mismatch master version %s,"
5357 " node version %s" %
5358 (constants.PROTOCOL_VERSION, result.payload))
5360 # Add node to our /etc/hosts, and add key to known_hosts
5361 if self.cfg.GetClusterInfo().modify_etc_hosts:
5362 master_node = self.cfg.GetMasterNode()
5363 result = self.rpc.call_etc_hosts_modify(master_node,
5364 constants.ETC_HOSTS_ADD,
5367 result.Raise("Can't update hosts file with new host data")
5369 if new_node.secondary_ip != new_node.primary_ip:
5370 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5373 node_verify_list = [self.cfg.GetMasterNode()]
5374 node_verify_param = {
5375 constants.NV_NODELIST: ([node], {}),
5376 # TODO: do a node-net-test as well?
5379 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5380 self.cfg.GetClusterName())
5381 for verifier in node_verify_list:
5382 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5383 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5385 for failed in nl_payload:
5386 feedback_fn("ssh/hostname verification failed"
5387 " (checking from %s): %s" %
5388 (verifier, nl_payload[failed]))
5389 raise errors.OpExecError("ssh/hostname verification failed")
5392 _RedistributeAncillaryFiles(self)
5393 self.context.ReaddNode(new_node)
5394 # make sure we redistribute the config
5395 self.cfg.Update(new_node, feedback_fn)
5396 # and make sure the new node will not have old files around
5397 if not new_node.master_candidate:
5398 result = self.rpc.call_node_demote_from_mc(new_node.name)
5399 msg = result.fail_msg
5401 self.LogWarning("Node failed to demote itself from master"
5402 " candidate status: %s" % msg)
5404 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5405 additional_vm=self.op.vm_capable)
5406 self.context.AddNode(new_node, self.proc.GetECId())
5409 class LUNodeSetParams(LogicalUnit):
5410 """Modifies the parameters of a node.
5412 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5413 to the node role (as _ROLE_*)
5414 @cvar _R2F: a dictionary from node role to tuples of flags
5415 @cvar _FLAGS: a list of attribute names corresponding to the flags
5418 HPATH = "node-modify"
5419 HTYPE = constants.HTYPE_NODE
5421 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5423 (True, False, False): _ROLE_CANDIDATE,
5424 (False, True, False): _ROLE_DRAINED,
5425 (False, False, True): _ROLE_OFFLINE,
5426 (False, False, False): _ROLE_REGULAR,
5428 _R2F = dict((v, k) for k, v in _F2R.items())
5429 _FLAGS = ["master_candidate", "drained", "offline"]
5431 def CheckArguments(self):
5432 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5433 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5434 self.op.master_capable, self.op.vm_capable,
5435 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5437 if all_mods.count(None) == len(all_mods):
5438 raise errors.OpPrereqError("Please pass at least one modification",
5440 if all_mods.count(True) > 1:
5441 raise errors.OpPrereqError("Can't set the node into more than one"
5442 " state at the same time",
5445 # Boolean value that tells us whether we might be demoting from MC
5446 self.might_demote = (self.op.master_candidate == False or
5447 self.op.offline == True or
5448 self.op.drained == True or
5449 self.op.master_capable == False)
5451 if self.op.secondary_ip:
5452 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5453 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5454 " address" % self.op.secondary_ip,
5457 self.lock_all = self.op.auto_promote and self.might_demote
5458 self.lock_instances = self.op.secondary_ip is not None
5460 def _InstanceFilter(self, instance):
5461 """Filter for getting affected instances.
5464 return (instance.disk_template in constants.DTS_INT_MIRROR and
5465 self.op.node_name in instance.all_nodes)
5467 def ExpandNames(self):
5469 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5471 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5473 # Since modifying a node can have severe effects on currently running
5474 # operations the resource lock is at least acquired in shared mode
5475 self.needed_locks[locking.LEVEL_NODE_RES] = \
5476 self.needed_locks[locking.LEVEL_NODE]
5478 # Get node resource and instance locks in shared mode; they are not used
5479 # for anything but read-only access
5480 self.share_locks[locking.LEVEL_NODE_RES] = 1
5481 self.share_locks[locking.LEVEL_INSTANCE] = 1
5483 if self.lock_instances:
5484 self.needed_locks[locking.LEVEL_INSTANCE] = \
5485 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5487 def BuildHooksEnv(self):
5490 This runs on the master node.
5494 "OP_TARGET": self.op.node_name,
5495 "MASTER_CANDIDATE": str(self.op.master_candidate),
5496 "OFFLINE": str(self.op.offline),
5497 "DRAINED": str(self.op.drained),
5498 "MASTER_CAPABLE": str(self.op.master_capable),
5499 "VM_CAPABLE": str(self.op.vm_capable),
5502 def BuildHooksNodes(self):
5503 """Build hooks nodes.
5506 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5509 def CheckPrereq(self):
5510 """Check prerequisites.
5512 This only checks the instance list against the existing names.
5515 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5517 if self.lock_instances:
5518 affected_instances = \
5519 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5521 # Verify instance locks
5522 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5523 wanted_instances = frozenset(affected_instances.keys())
5524 if wanted_instances - owned_instances:
5525 raise errors.OpPrereqError("Instances affected by changing node %s's"
5526 " secondary IP address have changed since"
5527 " locks were acquired, wanted '%s', have"
5528 " '%s'; retry the operation" %
5530 utils.CommaJoin(wanted_instances),
5531 utils.CommaJoin(owned_instances)),
5534 affected_instances = None
5536 if (self.op.master_candidate is not None or
5537 self.op.drained is not None or
5538 self.op.offline is not None):
5539 # we can't change the master's node flags
5540 if self.op.node_name == self.cfg.GetMasterNode():
5541 raise errors.OpPrereqError("The master role can be changed"
5542 " only via master-failover",
5545 if self.op.master_candidate and not node.master_capable:
5546 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5547 " it a master candidate" % node.name,
5550 if self.op.vm_capable == False:
5551 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5553 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5554 " the vm_capable flag" % node.name,
5557 if node.master_candidate and self.might_demote and not self.lock_all:
5558 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5559 # check if after removing the current node, we're missing master
5561 (mc_remaining, mc_should, _) = \
5562 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5563 if mc_remaining < mc_should:
5564 raise errors.OpPrereqError("Not enough master candidates, please"
5565 " pass auto promote option to allow"
5566 " promotion", errors.ECODE_STATE)
5568 self.old_flags = old_flags = (node.master_candidate,
5569 node.drained, node.offline)
5570 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5571 self.old_role = old_role = self._F2R[old_flags]
5573 # Check for ineffective changes
5574 for attr in self._FLAGS:
5575 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5576 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5577 setattr(self.op, attr, None)
5579 # Past this point, any flag change to False means a transition
5580 # away from the respective state, as only real changes are kept
5582 # TODO: We might query the real power state if it supports OOB
5583 if _SupportsOob(self.cfg, node):
5584 if self.op.offline is False and not (node.powered or
5585 self.op.powered == True):
5586 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5587 " offline status can be reset") %
5589 elif self.op.powered is not None:
5590 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5591 " as it does not support out-of-band"
5592 " handling") % self.op.node_name)
5594 # If we're being deofflined/drained, we'll MC ourself if needed
5595 if (self.op.drained == False or self.op.offline == False or
5596 (self.op.master_capable and not node.master_capable)):
5597 if _DecideSelfPromotion(self):
5598 self.op.master_candidate = True
5599 self.LogInfo("Auto-promoting node to master candidate")
5601 # If we're no longer master capable, we'll demote ourselves from MC
5602 if self.op.master_capable == False and node.master_candidate:
5603 self.LogInfo("Demoting from master candidate")
5604 self.op.master_candidate = False
5607 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5608 if self.op.master_candidate:
5609 new_role = self._ROLE_CANDIDATE
5610 elif self.op.drained:
5611 new_role = self._ROLE_DRAINED
5612 elif self.op.offline:
5613 new_role = self._ROLE_OFFLINE
5614 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5615 # False is still in new flags, which means we're un-setting (the
5617 new_role = self._ROLE_REGULAR
5618 else: # no new flags, nothing, keep old role
5621 self.new_role = new_role
5623 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5624 # Trying to transition out of offline status
5625 # TODO: Use standard RPC runner, but make sure it works when the node is
5626 # still marked offline
5627 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5629 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5630 " to report its version: %s" %
5631 (node.name, result.fail_msg),
5634 self.LogWarning("Transitioning node from offline to online state"
5635 " without using re-add. Please make sure the node"
5638 if self.op.secondary_ip:
5639 # Ok even without locking, because this can't be changed by any LU
5640 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5641 master_singlehomed = master.secondary_ip == master.primary_ip
5642 if master_singlehomed and self.op.secondary_ip:
5643 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5644 " homed cluster", errors.ECODE_INVAL)
5646 assert not (frozenset(affected_instances) -
5647 self.owned_locks(locking.LEVEL_INSTANCE))
5650 if affected_instances:
5651 raise errors.OpPrereqError("Cannot change secondary IP address:"
5652 " offline node has instances (%s)"
5653 " configured to use it" %
5654 utils.CommaJoin(affected_instances.keys()))
5656 # On online nodes, check that no instances are running, and that
5657 # the node has the new ip and we can reach it.
5658 for instance in affected_instances.values():
5659 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5660 msg="cannot change secondary ip")
5662 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5663 if master.name != node.name:
5664 # check reachability from master secondary ip to new secondary ip
5665 if not netutils.TcpPing(self.op.secondary_ip,
5666 constants.DEFAULT_NODED_PORT,
5667 source=master.secondary_ip):
5668 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5669 " based ping to node daemon port",
5670 errors.ECODE_ENVIRON)
5672 if self.op.ndparams:
5673 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5674 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5675 self.new_ndparams = new_ndparams
5677 if self.op.hv_state:
5678 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5679 self.node.hv_state_static)
5681 if self.op.disk_state:
5682 self.new_disk_state = \
5683 _MergeAndVerifyDiskState(self.op.disk_state,
5684 self.node.disk_state_static)
5686 def Exec(self, feedback_fn):
5691 old_role = self.old_role
5692 new_role = self.new_role
5696 if self.op.ndparams:
5697 node.ndparams = self.new_ndparams
5699 if self.op.powered is not None:
5700 node.powered = self.op.powered
5702 if self.op.hv_state:
5703 node.hv_state_static = self.new_hv_state
5705 if self.op.disk_state:
5706 node.disk_state_static = self.new_disk_state
5708 for attr in ["master_capable", "vm_capable"]:
5709 val = getattr(self.op, attr)
5711 setattr(node, attr, val)
5712 result.append((attr, str(val)))
5714 if new_role != old_role:
5715 # Tell the node to demote itself, if no longer MC and not offline
5716 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5717 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5719 self.LogWarning("Node failed to demote itself: %s", msg)
5721 new_flags = self._R2F[new_role]
5722 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5724 result.append((desc, str(nf)))
5725 (node.master_candidate, node.drained, node.offline) = new_flags
5727 # we locked all nodes, we adjust the CP before updating this node
5729 _AdjustCandidatePool(self, [node.name])
5731 if self.op.secondary_ip:
5732 node.secondary_ip = self.op.secondary_ip
5733 result.append(("secondary_ip", self.op.secondary_ip))
5735 # this will trigger configuration file update, if needed
5736 self.cfg.Update(node, feedback_fn)
5738 # this will trigger job queue propagation or cleanup if the mc
5740 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5741 self.context.ReaddNode(node)
5746 class LUNodePowercycle(NoHooksLU):
5747 """Powercycles a node.
5752 def CheckArguments(self):
5753 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5754 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5755 raise errors.OpPrereqError("The node is the master and the force"
5756 " parameter was not set",
5759 def ExpandNames(self):
5760 """Locking for PowercycleNode.
5762 This is a last-resort option and shouldn't block on other
5763 jobs. Therefore, we grab no locks.
5766 self.needed_locks = {}
5768 def Exec(self, feedback_fn):
5772 result = self.rpc.call_node_powercycle(self.op.node_name,
5773 self.cfg.GetHypervisorType())
5774 result.Raise("Failed to schedule the reboot")
5775 return result.payload
5778 class LUClusterQuery(NoHooksLU):
5779 """Query cluster configuration.
5784 def ExpandNames(self):
5785 self.needed_locks = {}
5787 def Exec(self, feedback_fn):
5788 """Return cluster config.
5791 cluster = self.cfg.GetClusterInfo()
5794 # Filter just for enabled hypervisors
5795 for os_name, hv_dict in cluster.os_hvp.items():
5796 os_hvp[os_name] = {}
5797 for hv_name, hv_params in hv_dict.items():
5798 if hv_name in cluster.enabled_hypervisors:
5799 os_hvp[os_name][hv_name] = hv_params
5801 # Convert ip_family to ip_version
5802 primary_ip_version = constants.IP4_VERSION
5803 if cluster.primary_ip_family == netutils.IP6Address.family:
5804 primary_ip_version = constants.IP6_VERSION
5807 "software_version": constants.RELEASE_VERSION,
5808 "protocol_version": constants.PROTOCOL_VERSION,
5809 "config_version": constants.CONFIG_VERSION,
5810 "os_api_version": max(constants.OS_API_VERSIONS),
5811 "export_version": constants.EXPORT_VERSION,
5812 "architecture": (platform.architecture()[0], platform.machine()),
5813 "name": cluster.cluster_name,
5814 "master": cluster.master_node,
5815 "default_hypervisor": cluster.primary_hypervisor,
5816 "enabled_hypervisors": cluster.enabled_hypervisors,
5817 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5818 for hypervisor_name in cluster.enabled_hypervisors]),
5820 "beparams": cluster.beparams,
5821 "osparams": cluster.osparams,
5822 "nicparams": cluster.nicparams,
5823 "ndparams": cluster.ndparams,
5824 "candidate_pool_size": cluster.candidate_pool_size,
5825 "master_netdev": cluster.master_netdev,
5826 "master_netmask": cluster.master_netmask,
5827 "use_external_mip_script": cluster.use_external_mip_script,
5828 "volume_group_name": cluster.volume_group_name,
5829 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5830 "file_storage_dir": cluster.file_storage_dir,
5831 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5832 "maintain_node_health": cluster.maintain_node_health,
5833 "ctime": cluster.ctime,
5834 "mtime": cluster.mtime,
5835 "uuid": cluster.uuid,
5836 "tags": list(cluster.GetTags()),
5837 "uid_pool": cluster.uid_pool,
5838 "default_iallocator": cluster.default_iallocator,
5839 "reserved_lvs": cluster.reserved_lvs,
5840 "primary_ip_version": primary_ip_version,
5841 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5842 "hidden_os": cluster.hidden_os,
5843 "blacklisted_os": cluster.blacklisted_os,
5849 class LUClusterConfigQuery(NoHooksLU):
5850 """Return configuration values.
5854 _FIELDS_DYNAMIC = utils.FieldSet()
5855 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5856 "watcher_pause", "volume_group_name")
5858 def CheckArguments(self):
5859 _CheckOutputFields(static=self._FIELDS_STATIC,
5860 dynamic=self._FIELDS_DYNAMIC,
5861 selected=self.op.output_fields)
5863 def ExpandNames(self):
5864 self.needed_locks = {}
5866 def Exec(self, feedback_fn):
5867 """Dump a representation of the cluster config to the standard output.
5871 for field in self.op.output_fields:
5872 if field == "cluster_name":
5873 entry = self.cfg.GetClusterName()
5874 elif field == "master_node":
5875 entry = self.cfg.GetMasterNode()
5876 elif field == "drain_flag":
5877 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5878 elif field == "watcher_pause":
5879 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5880 elif field == "volume_group_name":
5881 entry = self.cfg.GetVGName()
5883 raise errors.ParameterError(field)
5884 values.append(entry)
5888 class LUInstanceActivateDisks(NoHooksLU):
5889 """Bring up an instance's disks.
5894 def ExpandNames(self):
5895 self._ExpandAndLockInstance()
5896 self.needed_locks[locking.LEVEL_NODE] = []
5897 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5899 def DeclareLocks(self, level):
5900 if level == locking.LEVEL_NODE:
5901 self._LockInstancesNodes()
5903 def CheckPrereq(self):
5904 """Check prerequisites.
5906 This checks that the instance is in the cluster.
5909 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5910 assert self.instance is not None, \
5911 "Cannot retrieve locked instance %s" % self.op.instance_name
5912 _CheckNodeOnline(self, self.instance.primary_node)
5914 def Exec(self, feedback_fn):
5915 """Activate the disks.
5918 disks_ok, disks_info = \
5919 _AssembleInstanceDisks(self, self.instance,
5920 ignore_size=self.op.ignore_size)
5922 raise errors.OpExecError("Cannot activate block devices")
5927 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5929 """Prepare the block devices for an instance.
5931 This sets up the block devices on all nodes.
5933 @type lu: L{LogicalUnit}
5934 @param lu: the logical unit on whose behalf we execute
5935 @type instance: L{objects.Instance}
5936 @param instance: the instance for whose disks we assemble
5937 @type disks: list of L{objects.Disk} or None
5938 @param disks: which disks to assemble (or all, if None)
5939 @type ignore_secondaries: boolean
5940 @param ignore_secondaries: if true, errors on secondary nodes
5941 won't result in an error return from the function
5942 @type ignore_size: boolean
5943 @param ignore_size: if true, the current known size of the disk
5944 will not be used during the disk activation, useful for cases
5945 when the size is wrong
5946 @return: False if the operation failed, otherwise a list of
5947 (host, instance_visible_name, node_visible_name)
5948 with the mapping from node devices to instance devices
5953 iname = instance.name
5954 disks = _ExpandCheckDisks(instance, disks)
5956 # With the two passes mechanism we try to reduce the window of
5957 # opportunity for the race condition of switching DRBD to primary
5958 # before handshaking occured, but we do not eliminate it
5960 # The proper fix would be to wait (with some limits) until the
5961 # connection has been made and drbd transitions from WFConnection
5962 # into any other network-connected state (Connected, SyncTarget,
5965 # 1st pass, assemble on all nodes in secondary mode
5966 for idx, inst_disk in enumerate(disks):
5967 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5969 node_disk = node_disk.Copy()
5970 node_disk.UnsetSize()
5971 lu.cfg.SetDiskID(node_disk, node)
5972 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5973 msg = result.fail_msg
5975 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5976 " (is_primary=False, pass=1): %s",
5977 inst_disk.iv_name, node, msg)
5978 if not ignore_secondaries:
5981 # FIXME: race condition on drbd migration to primary
5983 # 2nd pass, do only the primary node
5984 for idx, inst_disk in enumerate(disks):
5987 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5988 if node != instance.primary_node:
5991 node_disk = node_disk.Copy()
5992 node_disk.UnsetSize()
5993 lu.cfg.SetDiskID(node_disk, node)
5994 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5995 msg = result.fail_msg
5997 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5998 " (is_primary=True, pass=2): %s",
5999 inst_disk.iv_name, node, msg)
6002 dev_path = result.payload
6004 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6006 # leave the disks configured for the primary node
6007 # this is a workaround that would be fixed better by
6008 # improving the logical/physical id handling
6010 lu.cfg.SetDiskID(disk, instance.primary_node)
6012 return disks_ok, device_info
6015 def _StartInstanceDisks(lu, instance, force):
6016 """Start the disks of an instance.
6019 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6020 ignore_secondaries=force)
6022 _ShutdownInstanceDisks(lu, instance)
6023 if force is not None and not force:
6024 lu.proc.LogWarning("", hint="If the message above refers to a"
6026 " you can retry the operation using '--force'.")
6027 raise errors.OpExecError("Disk consistency error")
6030 class LUInstanceDeactivateDisks(NoHooksLU):
6031 """Shutdown an instance's disks.
6036 def ExpandNames(self):
6037 self._ExpandAndLockInstance()
6038 self.needed_locks[locking.LEVEL_NODE] = []
6039 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6041 def DeclareLocks(self, level):
6042 if level == locking.LEVEL_NODE:
6043 self._LockInstancesNodes()
6045 def CheckPrereq(self):
6046 """Check prerequisites.
6048 This checks that the instance is in the cluster.
6051 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6052 assert self.instance is not None, \
6053 "Cannot retrieve locked instance %s" % self.op.instance_name
6055 def Exec(self, feedback_fn):
6056 """Deactivate the disks
6059 instance = self.instance
6061 _ShutdownInstanceDisks(self, instance)
6063 _SafeShutdownInstanceDisks(self, instance)
6066 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6067 """Shutdown block devices of an instance.
6069 This function checks if an instance is running, before calling
6070 _ShutdownInstanceDisks.
6073 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6074 _ShutdownInstanceDisks(lu, instance, disks=disks)
6077 def _ExpandCheckDisks(instance, disks):
6078 """Return the instance disks selected by the disks list
6080 @type disks: list of L{objects.Disk} or None
6081 @param disks: selected disks
6082 @rtype: list of L{objects.Disk}
6083 @return: selected instance disks to act on
6087 return instance.disks
6089 if not set(disks).issubset(instance.disks):
6090 raise errors.ProgrammerError("Can only act on disks belonging to the"
6095 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6096 """Shutdown block devices of an instance.
6098 This does the shutdown on all nodes of the instance.
6100 If the ignore_primary is false, errors on the primary node are
6105 disks = _ExpandCheckDisks(instance, disks)
6108 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6109 lu.cfg.SetDiskID(top_disk, node)
6110 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6111 msg = result.fail_msg
6113 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6114 disk.iv_name, node, msg)
6115 if ((node == instance.primary_node and not ignore_primary) or
6116 (node != instance.primary_node and not result.offline)):
6121 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6122 """Checks if a node has enough free memory.
6124 This function check if a given node has the needed amount of free
6125 memory. In case the node has less memory or we cannot get the
6126 information from the node, this function raise an OpPrereqError
6129 @type lu: C{LogicalUnit}
6130 @param lu: a logical unit from which we get configuration data
6132 @param node: the node to check
6133 @type reason: C{str}
6134 @param reason: string to use in the error message
6135 @type requested: C{int}
6136 @param requested: the amount of memory in MiB to check for
6137 @type hypervisor_name: C{str}
6138 @param hypervisor_name: the hypervisor to ask for memory stats
6139 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6140 we cannot check the node
6143 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6144 nodeinfo[node].Raise("Can't get data from node %s" % node,
6145 prereq=True, ecode=errors.ECODE_ENVIRON)
6146 (_, _, (hv_info, )) = nodeinfo[node].payload
6148 free_mem = hv_info.get("memory_free", None)
6149 if not isinstance(free_mem, int):
6150 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6151 " was '%s'" % (node, free_mem),
6152 errors.ECODE_ENVIRON)
6153 if requested > free_mem:
6154 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6155 " needed %s MiB, available %s MiB" %
6156 (node, reason, requested, free_mem),
6160 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6161 """Checks if nodes have enough free disk space in the all VGs.
6163 This function check if all given nodes have the needed amount of
6164 free disk. In case any node has less disk or we cannot get the
6165 information from the node, this function raise an OpPrereqError
6168 @type lu: C{LogicalUnit}
6169 @param lu: a logical unit from which we get configuration data
6170 @type nodenames: C{list}
6171 @param nodenames: the list of node names to check
6172 @type req_sizes: C{dict}
6173 @param req_sizes: the hash of vg and corresponding amount of disk in
6175 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6176 or we cannot check the node
6179 for vg, req_size in req_sizes.items():
6180 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6183 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6184 """Checks if nodes have enough free disk space in the specified VG.
6186 This function check if all given nodes have the needed amount of
6187 free disk. In case any node has less disk or we cannot get the
6188 information from the node, this function raise an OpPrereqError
6191 @type lu: C{LogicalUnit}
6192 @param lu: a logical unit from which we get configuration data
6193 @type nodenames: C{list}
6194 @param nodenames: the list of node names to check
6196 @param vg: the volume group to check
6197 @type requested: C{int}
6198 @param requested: the amount of disk in MiB to check for
6199 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6200 or we cannot check the node
6203 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6204 for node in nodenames:
6205 info = nodeinfo[node]
6206 info.Raise("Cannot get current information from node %s" % node,
6207 prereq=True, ecode=errors.ECODE_ENVIRON)
6208 (_, (vg_info, ), _) = info.payload
6209 vg_free = vg_info.get("vg_free", None)
6210 if not isinstance(vg_free, int):
6211 raise errors.OpPrereqError("Can't compute free disk space on node"
6212 " %s for vg %s, result was '%s'" %
6213 (node, vg, vg_free), errors.ECODE_ENVIRON)
6214 if requested > vg_free:
6215 raise errors.OpPrereqError("Not enough disk space on target node %s"
6216 " vg %s: required %d MiB, available %d MiB" %
6217 (node, vg, requested, vg_free),
6221 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6222 """Checks if nodes have enough physical CPUs
6224 This function checks if all given nodes have the needed number of
6225 physical CPUs. In case any node has less CPUs or we cannot get the
6226 information from the node, this function raises an OpPrereqError
6229 @type lu: C{LogicalUnit}
6230 @param lu: a logical unit from which we get configuration data
6231 @type nodenames: C{list}
6232 @param nodenames: the list of node names to check
6233 @type requested: C{int}
6234 @param requested: the minimum acceptable number of physical CPUs
6235 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6236 or we cannot check the node
6239 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6240 for node in nodenames:
6241 info = nodeinfo[node]
6242 info.Raise("Cannot get current information from node %s" % node,
6243 prereq=True, ecode=errors.ECODE_ENVIRON)
6244 (_, _, (hv_info, )) = info.payload
6245 num_cpus = hv_info.get("cpu_total", None)
6246 if not isinstance(num_cpus, int):
6247 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6248 " on node %s, result was '%s'" %
6249 (node, num_cpus), errors.ECODE_ENVIRON)
6250 if requested > num_cpus:
6251 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6252 "required" % (node, num_cpus, requested),
6256 class LUInstanceStartup(LogicalUnit):
6257 """Starts an instance.
6260 HPATH = "instance-start"
6261 HTYPE = constants.HTYPE_INSTANCE
6264 def CheckArguments(self):
6266 if self.op.beparams:
6267 # fill the beparams dict
6268 objects.UpgradeBeParams(self.op.beparams)
6269 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6271 def ExpandNames(self):
6272 self._ExpandAndLockInstance()
6274 def BuildHooksEnv(self):
6277 This runs on master, primary and secondary nodes of the instance.
6281 "FORCE": self.op.force,
6284 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6288 def BuildHooksNodes(self):
6289 """Build hooks nodes.
6292 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6295 def CheckPrereq(self):
6296 """Check prerequisites.
6298 This checks that the instance is in the cluster.
6301 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6302 assert self.instance is not None, \
6303 "Cannot retrieve locked instance %s" % self.op.instance_name
6306 if self.op.hvparams:
6307 # check hypervisor parameter syntax (locally)
6308 cluster = self.cfg.GetClusterInfo()
6309 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6310 filled_hvp = cluster.FillHV(instance)
6311 filled_hvp.update(self.op.hvparams)
6312 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6313 hv_type.CheckParameterSyntax(filled_hvp)
6314 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6316 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6318 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6320 if self.primary_offline and self.op.ignore_offline_nodes:
6321 self.proc.LogWarning("Ignoring offline primary node")
6323 if self.op.hvparams or self.op.beparams:
6324 self.proc.LogWarning("Overridden parameters are ignored")
6326 _CheckNodeOnline(self, instance.primary_node)
6328 bep = self.cfg.GetClusterInfo().FillBE(instance)
6330 # check bridges existence
6331 _CheckInstanceBridgesExist(self, instance)
6333 remote_info = self.rpc.call_instance_info(instance.primary_node,
6335 instance.hypervisor)
6336 remote_info.Raise("Error checking node %s" % instance.primary_node,
6337 prereq=True, ecode=errors.ECODE_ENVIRON)
6338 if not remote_info.payload: # not running already
6339 _CheckNodeFreeMemory(self, instance.primary_node,
6340 "starting instance %s" % instance.name,
6341 bep[constants.BE_MAXMEM], instance.hypervisor)
6343 def Exec(self, feedback_fn):
6344 """Start the instance.
6347 instance = self.instance
6348 force = self.op.force
6350 if not self.op.no_remember:
6351 self.cfg.MarkInstanceUp(instance.name)
6353 if self.primary_offline:
6354 assert self.op.ignore_offline_nodes
6355 self.proc.LogInfo("Primary node offline, marked instance as started")
6357 node_current = instance.primary_node
6359 _StartInstanceDisks(self, instance, force)
6362 self.rpc.call_instance_start(node_current,
6363 (instance, self.op.hvparams,
6365 self.op.startup_paused)
6366 msg = result.fail_msg
6368 _ShutdownInstanceDisks(self, instance)
6369 raise errors.OpExecError("Could not start instance: %s" % msg)
6372 class LUInstanceReboot(LogicalUnit):
6373 """Reboot an instance.
6376 HPATH = "instance-reboot"
6377 HTYPE = constants.HTYPE_INSTANCE
6380 def ExpandNames(self):
6381 self._ExpandAndLockInstance()
6383 def BuildHooksEnv(self):
6386 This runs on master, primary and secondary nodes of the instance.
6390 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6391 "REBOOT_TYPE": self.op.reboot_type,
6392 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6395 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6399 def BuildHooksNodes(self):
6400 """Build hooks nodes.
6403 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6406 def CheckPrereq(self):
6407 """Check prerequisites.
6409 This checks that the instance is in the cluster.
6412 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6413 assert self.instance is not None, \
6414 "Cannot retrieve locked instance %s" % self.op.instance_name
6415 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6416 _CheckNodeOnline(self, instance.primary_node)
6418 # check bridges existence
6419 _CheckInstanceBridgesExist(self, instance)
6421 def Exec(self, feedback_fn):
6422 """Reboot the instance.
6425 instance = self.instance
6426 ignore_secondaries = self.op.ignore_secondaries
6427 reboot_type = self.op.reboot_type
6429 remote_info = self.rpc.call_instance_info(instance.primary_node,
6431 instance.hypervisor)
6432 remote_info.Raise("Error checking node %s" % instance.primary_node)
6433 instance_running = bool(remote_info.payload)
6435 node_current = instance.primary_node
6437 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6438 constants.INSTANCE_REBOOT_HARD]:
6439 for disk in instance.disks:
6440 self.cfg.SetDiskID(disk, node_current)
6441 result = self.rpc.call_instance_reboot(node_current, instance,
6443 self.op.shutdown_timeout)
6444 result.Raise("Could not reboot instance")
6446 if instance_running:
6447 result = self.rpc.call_instance_shutdown(node_current, instance,
6448 self.op.shutdown_timeout)
6449 result.Raise("Could not shutdown instance for full reboot")
6450 _ShutdownInstanceDisks(self, instance)
6452 self.LogInfo("Instance %s was already stopped, starting now",
6454 _StartInstanceDisks(self, instance, ignore_secondaries)
6455 result = self.rpc.call_instance_start(node_current,
6456 (instance, None, None), False)
6457 msg = result.fail_msg
6459 _ShutdownInstanceDisks(self, instance)
6460 raise errors.OpExecError("Could not start instance for"
6461 " full reboot: %s" % msg)
6463 self.cfg.MarkInstanceUp(instance.name)
6466 class LUInstanceShutdown(LogicalUnit):
6467 """Shutdown an instance.
6470 HPATH = "instance-stop"
6471 HTYPE = constants.HTYPE_INSTANCE
6474 def ExpandNames(self):
6475 self._ExpandAndLockInstance()
6477 def BuildHooksEnv(self):
6480 This runs on master, primary and secondary nodes of the instance.
6483 env = _BuildInstanceHookEnvByObject(self, self.instance)
6484 env["TIMEOUT"] = self.op.timeout
6487 def BuildHooksNodes(self):
6488 """Build hooks nodes.
6491 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6494 def CheckPrereq(self):
6495 """Check prerequisites.
6497 This checks that the instance is in the cluster.
6500 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6501 assert self.instance is not None, \
6502 "Cannot retrieve locked instance %s" % self.op.instance_name
6504 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6506 self.primary_offline = \
6507 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6509 if self.primary_offline and self.op.ignore_offline_nodes:
6510 self.proc.LogWarning("Ignoring offline primary node")
6512 _CheckNodeOnline(self, self.instance.primary_node)
6514 def Exec(self, feedback_fn):
6515 """Shutdown the instance.
6518 instance = self.instance
6519 node_current = instance.primary_node
6520 timeout = self.op.timeout
6522 if not self.op.no_remember:
6523 self.cfg.MarkInstanceDown(instance.name)
6525 if self.primary_offline:
6526 assert self.op.ignore_offline_nodes
6527 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6529 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6530 msg = result.fail_msg
6532 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6534 _ShutdownInstanceDisks(self, instance)
6537 class LUInstanceReinstall(LogicalUnit):
6538 """Reinstall an instance.
6541 HPATH = "instance-reinstall"
6542 HTYPE = constants.HTYPE_INSTANCE
6545 def ExpandNames(self):
6546 self._ExpandAndLockInstance()
6548 def BuildHooksEnv(self):
6551 This runs on master, primary and secondary nodes of the instance.
6554 return _BuildInstanceHookEnvByObject(self, self.instance)
6556 def BuildHooksNodes(self):
6557 """Build hooks nodes.
6560 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6563 def CheckPrereq(self):
6564 """Check prerequisites.
6566 This checks that the instance is in the cluster and is not running.
6569 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6570 assert instance is not None, \
6571 "Cannot retrieve locked instance %s" % self.op.instance_name
6572 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6573 " offline, cannot reinstall")
6574 for node in instance.secondary_nodes:
6575 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6576 " cannot reinstall")
6578 if instance.disk_template == constants.DT_DISKLESS:
6579 raise errors.OpPrereqError("Instance '%s' has no disks" %
6580 self.op.instance_name,
6582 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6584 if self.op.os_type is not None:
6586 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6587 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6588 instance_os = self.op.os_type
6590 instance_os = instance.os
6592 nodelist = list(instance.all_nodes)
6594 if self.op.osparams:
6595 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6596 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6597 self.os_inst = i_osdict # the new dict (without defaults)
6601 self.instance = instance
6603 def Exec(self, feedback_fn):
6604 """Reinstall the instance.
6607 inst = self.instance
6609 if self.op.os_type is not None:
6610 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6611 inst.os = self.op.os_type
6612 # Write to configuration
6613 self.cfg.Update(inst, feedback_fn)
6615 _StartInstanceDisks(self, inst, None)
6617 feedback_fn("Running the instance OS create scripts...")
6618 # FIXME: pass debug option from opcode to backend
6619 result = self.rpc.call_instance_os_add(inst.primary_node,
6620 (inst, self.os_inst), True,
6621 self.op.debug_level)
6622 result.Raise("Could not install OS for instance %s on node %s" %
6623 (inst.name, inst.primary_node))
6625 _ShutdownInstanceDisks(self, inst)
6628 class LUInstanceRecreateDisks(LogicalUnit):
6629 """Recreate an instance's missing disks.
6632 HPATH = "instance-recreate-disks"
6633 HTYPE = constants.HTYPE_INSTANCE
6636 def CheckArguments(self):
6637 # normalise the disk list
6638 self.op.disks = sorted(frozenset(self.op.disks))
6640 def ExpandNames(self):
6641 self._ExpandAndLockInstance()
6642 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6644 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6645 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6647 self.needed_locks[locking.LEVEL_NODE] = []
6649 def DeclareLocks(self, level):
6650 if level == locking.LEVEL_NODE:
6651 # if we replace the nodes, we only need to lock the old primary,
6652 # otherwise we need to lock all nodes for disk re-creation
6653 primary_only = bool(self.op.nodes)
6654 self._LockInstancesNodes(primary_only=primary_only)
6655 elif level == locking.LEVEL_NODE_RES:
6657 self.needed_locks[locking.LEVEL_NODE_RES] = \
6658 self.needed_locks[locking.LEVEL_NODE][:]
6660 def BuildHooksEnv(self):
6663 This runs on master, primary and secondary nodes of the instance.
6666 return _BuildInstanceHookEnvByObject(self, self.instance)
6668 def BuildHooksNodes(self):
6669 """Build hooks nodes.
6672 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6675 def CheckPrereq(self):
6676 """Check prerequisites.
6678 This checks that the instance is in the cluster and is not running.
6681 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6682 assert instance is not None, \
6683 "Cannot retrieve locked instance %s" % self.op.instance_name
6685 if len(self.op.nodes) != len(instance.all_nodes):
6686 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6687 " %d replacement nodes were specified" %
6688 (instance.name, len(instance.all_nodes),
6689 len(self.op.nodes)),
6691 assert instance.disk_template != constants.DT_DRBD8 or \
6692 len(self.op.nodes) == 2
6693 assert instance.disk_template != constants.DT_PLAIN or \
6694 len(self.op.nodes) == 1
6695 primary_node = self.op.nodes[0]
6697 primary_node = instance.primary_node
6698 _CheckNodeOnline(self, primary_node)
6700 if instance.disk_template == constants.DT_DISKLESS:
6701 raise errors.OpPrereqError("Instance '%s' has no disks" %
6702 self.op.instance_name, errors.ECODE_INVAL)
6703 # if we replace nodes *and* the old primary is offline, we don't
6705 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6706 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6707 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6708 if not (self.op.nodes and old_pnode.offline):
6709 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6710 msg="cannot recreate disks")
6712 if not self.op.disks:
6713 self.op.disks = range(len(instance.disks))
6715 for idx in self.op.disks:
6716 if idx >= len(instance.disks):
6717 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6719 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6720 raise errors.OpPrereqError("Can't recreate disks partially and"
6721 " change the nodes at the same time",
6723 self.instance = instance
6725 def Exec(self, feedback_fn):
6726 """Recreate the disks.
6729 instance = self.instance
6731 assert (self.owned_locks(locking.LEVEL_NODE) ==
6732 self.owned_locks(locking.LEVEL_NODE_RES))
6735 mods = [] # keeps track of needed logical_id changes
6737 for idx, disk in enumerate(instance.disks):
6738 if idx not in self.op.disks: # disk idx has not been passed in
6741 # update secondaries for disks, if needed
6743 if disk.dev_type == constants.LD_DRBD8:
6744 # need to update the nodes and minors
6745 assert len(self.op.nodes) == 2
6746 assert len(disk.logical_id) == 6 # otherwise disk internals
6748 (_, _, old_port, _, _, old_secret) = disk.logical_id
6749 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6750 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6751 new_minors[0], new_minors[1], old_secret)
6752 assert len(disk.logical_id) == len(new_id)
6753 mods.append((idx, new_id))
6755 # now that we have passed all asserts above, we can apply the mods
6756 # in a single run (to avoid partial changes)
6757 for idx, new_id in mods:
6758 instance.disks[idx].logical_id = new_id
6760 # change primary node, if needed
6762 instance.primary_node = self.op.nodes[0]
6763 self.LogWarning("Changing the instance's nodes, you will have to"
6764 " remove any disks left on the older nodes manually")
6767 self.cfg.Update(instance, feedback_fn)
6769 _CreateDisks(self, instance, to_skip=to_skip)
6772 class LUInstanceRename(LogicalUnit):
6773 """Rename an instance.
6776 HPATH = "instance-rename"
6777 HTYPE = constants.HTYPE_INSTANCE
6779 def CheckArguments(self):
6783 if self.op.ip_check and not self.op.name_check:
6784 # TODO: make the ip check more flexible and not depend on the name check
6785 raise errors.OpPrereqError("IP address check requires a name check",
6788 def BuildHooksEnv(self):
6791 This runs on master, primary and secondary nodes of the instance.
6794 env = _BuildInstanceHookEnvByObject(self, self.instance)
6795 env["INSTANCE_NEW_NAME"] = self.op.new_name
6798 def BuildHooksNodes(self):
6799 """Build hooks nodes.
6802 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6805 def CheckPrereq(self):
6806 """Check prerequisites.
6808 This checks that the instance is in the cluster and is not running.
6811 self.op.instance_name = _ExpandInstanceName(self.cfg,
6812 self.op.instance_name)
6813 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6814 assert instance is not None
6815 _CheckNodeOnline(self, instance.primary_node)
6816 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6817 msg="cannot rename")
6818 self.instance = instance
6820 new_name = self.op.new_name
6821 if self.op.name_check:
6822 hostname = netutils.GetHostname(name=new_name)
6823 if hostname.name != new_name:
6824 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6826 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6827 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6828 " same as given hostname '%s'") %
6829 (hostname.name, self.op.new_name),
6831 new_name = self.op.new_name = hostname.name
6832 if (self.op.ip_check and
6833 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6834 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6835 (hostname.ip, new_name),
6836 errors.ECODE_NOTUNIQUE)
6838 instance_list = self.cfg.GetInstanceList()
6839 if new_name in instance_list and new_name != instance.name:
6840 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6841 new_name, errors.ECODE_EXISTS)
6843 def Exec(self, feedback_fn):
6844 """Rename the instance.
6847 inst = self.instance
6848 old_name = inst.name
6850 rename_file_storage = False
6851 if (inst.disk_template in constants.DTS_FILEBASED and
6852 self.op.new_name != inst.name):
6853 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6854 rename_file_storage = True
6856 self.cfg.RenameInstance(inst.name, self.op.new_name)
6857 # Change the instance lock. This is definitely safe while we hold the BGL.
6858 # Otherwise the new lock would have to be added in acquired mode.
6860 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6861 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6863 # re-read the instance from the configuration after rename
6864 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6866 if rename_file_storage:
6867 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6868 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6869 old_file_storage_dir,
6870 new_file_storage_dir)
6871 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6872 " (but the instance has been renamed in Ganeti)" %
6873 (inst.primary_node, old_file_storage_dir,
6874 new_file_storage_dir))
6876 _StartInstanceDisks(self, inst, None)
6878 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6879 old_name, self.op.debug_level)
6880 msg = result.fail_msg
6882 msg = ("Could not run OS rename script for instance %s on node %s"
6883 " (but the instance has been renamed in Ganeti): %s" %
6884 (inst.name, inst.primary_node, msg))
6885 self.proc.LogWarning(msg)
6887 _ShutdownInstanceDisks(self, inst)
6892 class LUInstanceRemove(LogicalUnit):
6893 """Remove an instance.
6896 HPATH = "instance-remove"
6897 HTYPE = constants.HTYPE_INSTANCE
6900 def ExpandNames(self):
6901 self._ExpandAndLockInstance()
6902 self.needed_locks[locking.LEVEL_NODE] = []
6903 self.needed_locks[locking.LEVEL_NODE_RES] = []
6904 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6906 def DeclareLocks(self, level):
6907 if level == locking.LEVEL_NODE:
6908 self._LockInstancesNodes()
6909 elif level == locking.LEVEL_NODE_RES:
6911 self.needed_locks[locking.LEVEL_NODE_RES] = \
6912 self.needed_locks[locking.LEVEL_NODE][:]
6914 def BuildHooksEnv(self):
6917 This runs on master, primary and secondary nodes of the instance.
6920 env = _BuildInstanceHookEnvByObject(self, self.instance)
6921 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6924 def BuildHooksNodes(self):
6925 """Build hooks nodes.
6928 nl = [self.cfg.GetMasterNode()]
6929 nl_post = list(self.instance.all_nodes) + nl
6930 return (nl, nl_post)
6932 def CheckPrereq(self):
6933 """Check prerequisites.
6935 This checks that the instance is in the cluster.
6938 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6939 assert self.instance is not None, \
6940 "Cannot retrieve locked instance %s" % self.op.instance_name
6942 def Exec(self, feedback_fn):
6943 """Remove the instance.
6946 instance = self.instance
6947 logging.info("Shutting down instance %s on node %s",
6948 instance.name, instance.primary_node)
6950 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6951 self.op.shutdown_timeout)
6952 msg = result.fail_msg
6954 if self.op.ignore_failures:
6955 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6957 raise errors.OpExecError("Could not shutdown instance %s on"
6959 (instance.name, instance.primary_node, msg))
6961 assert (self.owned_locks(locking.LEVEL_NODE) ==
6962 self.owned_locks(locking.LEVEL_NODE_RES))
6963 assert not (set(instance.all_nodes) -
6964 self.owned_locks(locking.LEVEL_NODE)), \
6965 "Not owning correct locks"
6967 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6970 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6971 """Utility function to remove an instance.
6974 logging.info("Removing block devices for instance %s", instance.name)
6976 if not _RemoveDisks(lu, instance):
6977 if not ignore_failures:
6978 raise errors.OpExecError("Can't remove instance's disks")
6979 feedback_fn("Warning: can't remove instance's disks")
6981 logging.info("Removing instance %s out of cluster config", instance.name)
6983 lu.cfg.RemoveInstance(instance.name)
6985 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6986 "Instance lock removal conflict"
6988 # Remove lock for the instance
6989 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6992 class LUInstanceQuery(NoHooksLU):
6993 """Logical unit for querying instances.
6996 # pylint: disable=W0142
6999 def CheckArguments(self):
7000 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7001 self.op.output_fields, self.op.use_locking)
7003 def ExpandNames(self):
7004 self.iq.ExpandNames(self)
7006 def DeclareLocks(self, level):
7007 self.iq.DeclareLocks(self, level)
7009 def Exec(self, feedback_fn):
7010 return self.iq.OldStyleQuery(self)
7013 class LUInstanceFailover(LogicalUnit):
7014 """Failover an instance.
7017 HPATH = "instance-failover"
7018 HTYPE = constants.HTYPE_INSTANCE
7021 def CheckArguments(self):
7022 """Check the arguments.
7025 self.iallocator = getattr(self.op, "iallocator", None)
7026 self.target_node = getattr(self.op, "target_node", None)
7028 def ExpandNames(self):
7029 self._ExpandAndLockInstance()
7031 if self.op.target_node is not None:
7032 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7034 self.needed_locks[locking.LEVEL_NODE] = []
7035 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7037 ignore_consistency = self.op.ignore_consistency
7038 shutdown_timeout = self.op.shutdown_timeout
7039 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7042 ignore_consistency=ignore_consistency,
7043 shutdown_timeout=shutdown_timeout)
7044 self.tasklets = [self._migrater]
7046 def DeclareLocks(self, level):
7047 if level == locking.LEVEL_NODE:
7048 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7049 if instance.disk_template in constants.DTS_EXT_MIRROR:
7050 if self.op.target_node is None:
7051 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7053 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7054 self.op.target_node]
7055 del self.recalculate_locks[locking.LEVEL_NODE]
7057 self._LockInstancesNodes()
7059 def BuildHooksEnv(self):
7062 This runs on master, primary and secondary nodes of the instance.
7065 instance = self._migrater.instance
7066 source_node = instance.primary_node
7067 target_node = self.op.target_node
7069 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7070 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7071 "OLD_PRIMARY": source_node,
7072 "NEW_PRIMARY": target_node,
7075 if instance.disk_template in constants.DTS_INT_MIRROR:
7076 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7077 env["NEW_SECONDARY"] = source_node
7079 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7081 env.update(_BuildInstanceHookEnvByObject(self, instance))
7085 def BuildHooksNodes(self):
7086 """Build hooks nodes.
7089 instance = self._migrater.instance
7090 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7091 return (nl, nl + [instance.primary_node])
7094 class LUInstanceMigrate(LogicalUnit):
7095 """Migrate an instance.
7097 This is migration without shutting down, compared to the failover,
7098 which is done with shutdown.
7101 HPATH = "instance-migrate"
7102 HTYPE = constants.HTYPE_INSTANCE
7105 def ExpandNames(self):
7106 self._ExpandAndLockInstance()
7108 if self.op.target_node is not None:
7109 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7111 self.needed_locks[locking.LEVEL_NODE] = []
7112 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7114 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7115 cleanup=self.op.cleanup,
7117 fallback=self.op.allow_failover)
7118 self.tasklets = [self._migrater]
7120 def DeclareLocks(self, level):
7121 if level == locking.LEVEL_NODE:
7122 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7123 if instance.disk_template in constants.DTS_EXT_MIRROR:
7124 if self.op.target_node is None:
7125 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7127 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7128 self.op.target_node]
7129 del self.recalculate_locks[locking.LEVEL_NODE]
7131 self._LockInstancesNodes()
7133 def BuildHooksEnv(self):
7136 This runs on master, primary and secondary nodes of the instance.
7139 instance = self._migrater.instance
7140 source_node = instance.primary_node
7141 target_node = self.op.target_node
7142 env = _BuildInstanceHookEnvByObject(self, instance)
7144 "MIGRATE_LIVE": self._migrater.live,
7145 "MIGRATE_CLEANUP": self.op.cleanup,
7146 "OLD_PRIMARY": source_node,
7147 "NEW_PRIMARY": target_node,
7150 if instance.disk_template in constants.DTS_INT_MIRROR:
7151 env["OLD_SECONDARY"] = target_node
7152 env["NEW_SECONDARY"] = source_node
7154 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7158 def BuildHooksNodes(self):
7159 """Build hooks nodes.
7162 instance = self._migrater.instance
7163 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7164 return (nl, nl + [instance.primary_node])
7167 class LUInstanceMove(LogicalUnit):
7168 """Move an instance by data-copying.
7171 HPATH = "instance-move"
7172 HTYPE = constants.HTYPE_INSTANCE
7175 def ExpandNames(self):
7176 self._ExpandAndLockInstance()
7177 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7178 self.op.target_node = target_node
7179 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7180 self.needed_locks[locking.LEVEL_NODE_RES] = []
7181 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7183 def DeclareLocks(self, level):
7184 if level == locking.LEVEL_NODE:
7185 self._LockInstancesNodes(primary_only=True)
7186 elif level == locking.LEVEL_NODE_RES:
7188 self.needed_locks[locking.LEVEL_NODE_RES] = \
7189 self.needed_locks[locking.LEVEL_NODE][:]
7191 def BuildHooksEnv(self):
7194 This runs on master, primary and secondary nodes of the instance.
7198 "TARGET_NODE": self.op.target_node,
7199 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7201 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7204 def BuildHooksNodes(self):
7205 """Build hooks nodes.
7209 self.cfg.GetMasterNode(),
7210 self.instance.primary_node,
7211 self.op.target_node,
7215 def CheckPrereq(self):
7216 """Check prerequisites.
7218 This checks that the instance is in the cluster.
7221 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7222 assert self.instance is not None, \
7223 "Cannot retrieve locked instance %s" % self.op.instance_name
7225 node = self.cfg.GetNodeInfo(self.op.target_node)
7226 assert node is not None, \
7227 "Cannot retrieve locked node %s" % self.op.target_node
7229 self.target_node = target_node = node.name
7231 if target_node == instance.primary_node:
7232 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7233 (instance.name, target_node),
7236 bep = self.cfg.GetClusterInfo().FillBE(instance)
7238 for idx, dsk in enumerate(instance.disks):
7239 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7240 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7241 " cannot copy" % idx, errors.ECODE_STATE)
7243 _CheckNodeOnline(self, target_node)
7244 _CheckNodeNotDrained(self, target_node)
7245 _CheckNodeVmCapable(self, target_node)
7247 if instance.admin_state == constants.ADMINST_UP:
7248 # check memory requirements on the secondary node
7249 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7250 instance.name, bep[constants.BE_MAXMEM],
7251 instance.hypervisor)
7253 self.LogInfo("Not checking memory on the secondary node as"
7254 " instance will not be started")
7256 # check bridge existance
7257 _CheckInstanceBridgesExist(self, instance, node=target_node)
7259 def Exec(self, feedback_fn):
7260 """Move an instance.
7262 The move is done by shutting it down on its present node, copying
7263 the data over (slow) and starting it on the new node.
7266 instance = self.instance
7268 source_node = instance.primary_node
7269 target_node = self.target_node
7271 self.LogInfo("Shutting down instance %s on source node %s",
7272 instance.name, source_node)
7274 assert (self.owned_locks(locking.LEVEL_NODE) ==
7275 self.owned_locks(locking.LEVEL_NODE_RES))
7277 result = self.rpc.call_instance_shutdown(source_node, instance,
7278 self.op.shutdown_timeout)
7279 msg = result.fail_msg
7281 if self.op.ignore_consistency:
7282 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7283 " Proceeding anyway. Please make sure node"
7284 " %s is down. Error details: %s",
7285 instance.name, source_node, source_node, msg)
7287 raise errors.OpExecError("Could not shutdown instance %s on"
7289 (instance.name, source_node, msg))
7291 # create the target disks
7293 _CreateDisks(self, instance, target_node=target_node)
7294 except errors.OpExecError:
7295 self.LogWarning("Device creation failed, reverting...")
7297 _RemoveDisks(self, instance, target_node=target_node)
7299 self.cfg.ReleaseDRBDMinors(instance.name)
7302 cluster_name = self.cfg.GetClusterInfo().cluster_name
7305 # activate, get path, copy the data over
7306 for idx, disk in enumerate(instance.disks):
7307 self.LogInfo("Copying data for disk %d", idx)
7308 result = self.rpc.call_blockdev_assemble(target_node, disk,
7309 instance.name, True, idx)
7311 self.LogWarning("Can't assemble newly created disk %d: %s",
7312 idx, result.fail_msg)
7313 errs.append(result.fail_msg)
7315 dev_path = result.payload
7316 result = self.rpc.call_blockdev_export(source_node, disk,
7317 target_node, dev_path,
7320 self.LogWarning("Can't copy data over for disk %d: %s",
7321 idx, result.fail_msg)
7322 errs.append(result.fail_msg)
7326 self.LogWarning("Some disks failed to copy, aborting")
7328 _RemoveDisks(self, instance, target_node=target_node)
7330 self.cfg.ReleaseDRBDMinors(instance.name)
7331 raise errors.OpExecError("Errors during disk copy: %s" %
7334 instance.primary_node = target_node
7335 self.cfg.Update(instance, feedback_fn)
7337 self.LogInfo("Removing the disks on the original node")
7338 _RemoveDisks(self, instance, target_node=source_node)
7340 # Only start the instance if it's marked as up
7341 if instance.admin_state == constants.ADMINST_UP:
7342 self.LogInfo("Starting instance %s on node %s",
7343 instance.name, target_node)
7345 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7346 ignore_secondaries=True)
7348 _ShutdownInstanceDisks(self, instance)
7349 raise errors.OpExecError("Can't activate the instance's disks")
7351 result = self.rpc.call_instance_start(target_node,
7352 (instance, None, None), False)
7353 msg = result.fail_msg
7355 _ShutdownInstanceDisks(self, instance)
7356 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7357 (instance.name, target_node, msg))
7360 class LUNodeMigrate(LogicalUnit):
7361 """Migrate all instances from a node.
7364 HPATH = "node-migrate"
7365 HTYPE = constants.HTYPE_NODE
7368 def CheckArguments(self):
7371 def ExpandNames(self):
7372 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7374 self.share_locks = _ShareAll()
7375 self.needed_locks = {
7376 locking.LEVEL_NODE: [self.op.node_name],
7379 def BuildHooksEnv(self):
7382 This runs on the master, the primary and all the secondaries.
7386 "NODE_NAME": self.op.node_name,
7389 def BuildHooksNodes(self):
7390 """Build hooks nodes.
7393 nl = [self.cfg.GetMasterNode()]
7396 def CheckPrereq(self):
7399 def Exec(self, feedback_fn):
7400 # Prepare jobs for migration instances
7402 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7405 iallocator=self.op.iallocator,
7406 target_node=self.op.target_node)]
7407 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7410 # TODO: Run iallocator in this opcode and pass correct placement options to
7411 # OpInstanceMigrate. Since other jobs can modify the cluster between
7412 # running the iallocator and the actual migration, a good consistency model
7413 # will have to be found.
7415 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7416 frozenset([self.op.node_name]))
7418 return ResultWithJobs(jobs)
7421 class TLMigrateInstance(Tasklet):
7422 """Tasklet class for instance migration.
7425 @ivar live: whether the migration will be done live or non-live;
7426 this variable is initalized only after CheckPrereq has run
7427 @type cleanup: boolean
7428 @ivar cleanup: Wheater we cleanup from a failed migration
7429 @type iallocator: string
7430 @ivar iallocator: The iallocator used to determine target_node
7431 @type target_node: string
7432 @ivar target_node: If given, the target_node to reallocate the instance to
7433 @type failover: boolean
7434 @ivar failover: Whether operation results in failover or migration
7435 @type fallback: boolean
7436 @ivar fallback: Whether fallback to failover is allowed if migration not
7438 @type ignore_consistency: boolean
7439 @ivar ignore_consistency: Wheter we should ignore consistency between source
7441 @type shutdown_timeout: int
7442 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7447 _MIGRATION_POLL_INTERVAL = 1 # seconds
7448 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7450 def __init__(self, lu, instance_name, cleanup=False,
7451 failover=False, fallback=False,
7452 ignore_consistency=False,
7453 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7454 """Initializes this class.
7457 Tasklet.__init__(self, lu)
7460 self.instance_name = instance_name
7461 self.cleanup = cleanup
7462 self.live = False # will be overridden later
7463 self.failover = failover
7464 self.fallback = fallback
7465 self.ignore_consistency = ignore_consistency
7466 self.shutdown_timeout = shutdown_timeout
7468 def CheckPrereq(self):
7469 """Check prerequisites.
7471 This checks that the instance is in the cluster.
7474 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7475 instance = self.cfg.GetInstanceInfo(instance_name)
7476 assert instance is not None
7477 self.instance = instance
7479 if (not self.cleanup and
7480 not instance.admin_state == constants.ADMINST_UP and
7481 not self.failover and self.fallback):
7482 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7483 " switching to failover")
7484 self.failover = True
7486 if instance.disk_template not in constants.DTS_MIRRORED:
7491 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7492 " %s" % (instance.disk_template, text),
7495 if instance.disk_template in constants.DTS_EXT_MIRROR:
7496 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7498 if self.lu.op.iallocator:
7499 self._RunAllocator()
7501 # We set set self.target_node as it is required by
7503 self.target_node = self.lu.op.target_node
7505 # self.target_node is already populated, either directly or by the
7507 target_node = self.target_node
7508 if self.target_node == instance.primary_node:
7509 raise errors.OpPrereqError("Cannot migrate instance %s"
7510 " to its primary (%s)" %
7511 (instance.name, instance.primary_node))
7513 if len(self.lu.tasklets) == 1:
7514 # It is safe to release locks only when we're the only tasklet
7516 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7517 keep=[instance.primary_node, self.target_node])
7520 secondary_nodes = instance.secondary_nodes
7521 if not secondary_nodes:
7522 raise errors.ConfigurationError("No secondary node but using"
7523 " %s disk template" %
7524 instance.disk_template)
7525 target_node = secondary_nodes[0]
7526 if self.lu.op.iallocator or (self.lu.op.target_node and
7527 self.lu.op.target_node != target_node):
7529 text = "failed over"
7532 raise errors.OpPrereqError("Instances with disk template %s cannot"
7533 " be %s to arbitrary nodes"
7534 " (neither an iallocator nor a target"
7535 " node can be passed)" %
7536 (instance.disk_template, text),
7539 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7541 # check memory requirements on the secondary node
7542 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7543 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7544 instance.name, i_be[constants.BE_MAXMEM],
7545 instance.hypervisor)
7547 self.lu.LogInfo("Not checking memory on the secondary node as"
7548 " instance will not be started")
7550 # check if failover must be forced instead of migration
7551 if (not self.cleanup and not self.failover and
7552 i_be[constants.BE_ALWAYS_FAILOVER]):
7554 self.lu.LogInfo("Instance configured to always failover; fallback"
7556 self.failover = True
7558 raise errors.OpPrereqError("This instance has been configured to"
7559 " always failover, please allow failover",
7562 # check bridge existance
7563 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7565 if not self.cleanup:
7566 _CheckNodeNotDrained(self.lu, target_node)
7567 if not self.failover:
7568 result = self.rpc.call_instance_migratable(instance.primary_node,
7570 if result.fail_msg and self.fallback:
7571 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7573 self.failover = True
7575 result.Raise("Can't migrate, please use failover",
7576 prereq=True, ecode=errors.ECODE_STATE)
7578 assert not (self.failover and self.cleanup)
7580 if not self.failover:
7581 if self.lu.op.live is not None and self.lu.op.mode is not None:
7582 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7583 " parameters are accepted",
7585 if self.lu.op.live is not None:
7587 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7589 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7590 # reset the 'live' parameter to None so that repeated
7591 # invocations of CheckPrereq do not raise an exception
7592 self.lu.op.live = None
7593 elif self.lu.op.mode is None:
7594 # read the default value from the hypervisor
7595 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7597 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7599 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7601 # Failover is never live
7604 def _RunAllocator(self):
7605 """Run the allocator based on input opcode.
7608 ial = IAllocator(self.cfg, self.rpc,
7609 mode=constants.IALLOCATOR_MODE_RELOC,
7610 name=self.instance_name,
7611 # TODO See why hail breaks with a single node below
7612 relocate_from=[self.instance.primary_node,
7613 self.instance.primary_node],
7616 ial.Run(self.lu.op.iallocator)
7619 raise errors.OpPrereqError("Can't compute nodes using"
7620 " iallocator '%s': %s" %
7621 (self.lu.op.iallocator, ial.info),
7623 if len(ial.result) != ial.required_nodes:
7624 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7625 " of nodes (%s), required %s" %
7626 (self.lu.op.iallocator, len(ial.result),
7627 ial.required_nodes), errors.ECODE_FAULT)
7628 self.target_node = ial.result[0]
7629 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7630 self.instance_name, self.lu.op.iallocator,
7631 utils.CommaJoin(ial.result))
7633 def _WaitUntilSync(self):
7634 """Poll with custom rpc for disk sync.
7636 This uses our own step-based rpc call.
7639 self.feedback_fn("* wait until resync is done")
7643 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7645 self.instance.disks)
7647 for node, nres in result.items():
7648 nres.Raise("Cannot resync disks on node %s" % node)
7649 node_done, node_percent = nres.payload
7650 all_done = all_done and node_done
7651 if node_percent is not None:
7652 min_percent = min(min_percent, node_percent)
7654 if min_percent < 100:
7655 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7658 def _EnsureSecondary(self, node):
7659 """Demote a node to secondary.
7662 self.feedback_fn("* switching node %s to secondary mode" % node)
7664 for dev in self.instance.disks:
7665 self.cfg.SetDiskID(dev, node)
7667 result = self.rpc.call_blockdev_close(node, self.instance.name,
7668 self.instance.disks)
7669 result.Raise("Cannot change disk to secondary on node %s" % node)
7671 def _GoStandalone(self):
7672 """Disconnect from the network.
7675 self.feedback_fn("* changing into standalone mode")
7676 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7677 self.instance.disks)
7678 for node, nres in result.items():
7679 nres.Raise("Cannot disconnect disks node %s" % node)
7681 def _GoReconnect(self, multimaster):
7682 """Reconnect to the network.
7688 msg = "single-master"
7689 self.feedback_fn("* changing disks into %s mode" % msg)
7690 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7691 self.instance.disks,
7692 self.instance.name, multimaster)
7693 for node, nres in result.items():
7694 nres.Raise("Cannot change disks config on node %s" % node)
7696 def _ExecCleanup(self):
7697 """Try to cleanup after a failed migration.
7699 The cleanup is done by:
7700 - check that the instance is running only on one node
7701 (and update the config if needed)
7702 - change disks on its secondary node to secondary
7703 - wait until disks are fully synchronized
7704 - disconnect from the network
7705 - change disks into single-master mode
7706 - wait again until disks are fully synchronized
7709 instance = self.instance
7710 target_node = self.target_node
7711 source_node = self.source_node
7713 # check running on only one node
7714 self.feedback_fn("* checking where the instance actually runs"
7715 " (if this hangs, the hypervisor might be in"
7717 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7718 for node, result in ins_l.items():
7719 result.Raise("Can't contact node %s" % node)
7721 runningon_source = instance.name in ins_l[source_node].payload
7722 runningon_target = instance.name in ins_l[target_node].payload
7724 if runningon_source and runningon_target:
7725 raise errors.OpExecError("Instance seems to be running on two nodes,"
7726 " or the hypervisor is confused; you will have"
7727 " to ensure manually that it runs only on one"
7728 " and restart this operation")
7730 if not (runningon_source or runningon_target):
7731 raise errors.OpExecError("Instance does not seem to be running at all;"
7732 " in this case it's safer to repair by"
7733 " running 'gnt-instance stop' to ensure disk"
7734 " shutdown, and then restarting it")
7736 if runningon_target:
7737 # the migration has actually succeeded, we need to update the config
7738 self.feedback_fn("* instance running on secondary node (%s),"
7739 " updating config" % target_node)
7740 instance.primary_node = target_node
7741 self.cfg.Update(instance, self.feedback_fn)
7742 demoted_node = source_node
7744 self.feedback_fn("* instance confirmed to be running on its"
7745 " primary node (%s)" % source_node)
7746 demoted_node = target_node
7748 if instance.disk_template in constants.DTS_INT_MIRROR:
7749 self._EnsureSecondary(demoted_node)
7751 self._WaitUntilSync()
7752 except errors.OpExecError:
7753 # we ignore here errors, since if the device is standalone, it
7754 # won't be able to sync
7756 self._GoStandalone()
7757 self._GoReconnect(False)
7758 self._WaitUntilSync()
7760 self.feedback_fn("* done")
7762 def _RevertDiskStatus(self):
7763 """Try to revert the disk status after a failed migration.
7766 target_node = self.target_node
7767 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7771 self._EnsureSecondary(target_node)
7772 self._GoStandalone()
7773 self._GoReconnect(False)
7774 self._WaitUntilSync()
7775 except errors.OpExecError, err:
7776 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7777 " please try to recover the instance manually;"
7778 " error '%s'" % str(err))
7780 def _AbortMigration(self):
7781 """Call the hypervisor code to abort a started migration.
7784 instance = self.instance
7785 target_node = self.target_node
7786 source_node = self.source_node
7787 migration_info = self.migration_info
7789 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7793 abort_msg = abort_result.fail_msg
7795 logging.error("Aborting migration failed on target node %s: %s",
7796 target_node, abort_msg)
7797 # Don't raise an exception here, as we stil have to try to revert the
7798 # disk status, even if this step failed.
7800 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7801 instance, False, self.live)
7802 abort_msg = abort_result.fail_msg
7804 logging.error("Aborting migration failed on source node %s: %s",
7805 source_node, abort_msg)
7807 def _ExecMigration(self):
7808 """Migrate an instance.
7810 The migrate is done by:
7811 - change the disks into dual-master mode
7812 - wait until disks are fully synchronized again
7813 - migrate the instance
7814 - change disks on the new secondary node (the old primary) to secondary
7815 - wait until disks are fully synchronized
7816 - change disks into single-master mode
7819 instance = self.instance
7820 target_node = self.target_node
7821 source_node = self.source_node
7823 # Check for hypervisor version mismatch and warn the user.
7824 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7825 None, [self.instance.hypervisor])
7826 for ninfo in nodeinfo.values():
7827 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7829 (_, _, (src_info, )) = nodeinfo[source_node].payload
7830 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7832 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7833 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7834 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7835 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7836 if src_version != dst_version:
7837 self.feedback_fn("* warning: hypervisor version mismatch between"
7838 " source (%s) and target (%s) node" %
7839 (src_version, dst_version))
7841 self.feedback_fn("* checking disk consistency between source and target")
7842 for dev in instance.disks:
7843 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7844 raise errors.OpExecError("Disk %s is degraded or not fully"
7845 " synchronized on target node,"
7846 " aborting migration" % dev.iv_name)
7848 # First get the migration information from the remote node
7849 result = self.rpc.call_migration_info(source_node, instance)
7850 msg = result.fail_msg
7852 log_err = ("Failed fetching source migration information from %s: %s" %
7854 logging.error(log_err)
7855 raise errors.OpExecError(log_err)
7857 self.migration_info = migration_info = result.payload
7859 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7860 # Then switch the disks to master/master mode
7861 self._EnsureSecondary(target_node)
7862 self._GoStandalone()
7863 self._GoReconnect(True)
7864 self._WaitUntilSync()
7866 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7867 result = self.rpc.call_accept_instance(target_node,
7870 self.nodes_ip[target_node])
7872 msg = result.fail_msg
7874 logging.error("Instance pre-migration failed, trying to revert"
7875 " disk status: %s", msg)
7876 self.feedback_fn("Pre-migration failed, aborting")
7877 self._AbortMigration()
7878 self._RevertDiskStatus()
7879 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7880 (instance.name, msg))
7882 self.feedback_fn("* migrating instance to %s" % target_node)
7883 result = self.rpc.call_instance_migrate(source_node, instance,
7884 self.nodes_ip[target_node],
7886 msg = result.fail_msg
7888 logging.error("Instance migration failed, trying to revert"
7889 " disk status: %s", msg)
7890 self.feedback_fn("Migration failed, aborting")
7891 self._AbortMigration()
7892 self._RevertDiskStatus()
7893 raise errors.OpExecError("Could not migrate instance %s: %s" %
7894 (instance.name, msg))
7896 self.feedback_fn("* starting memory transfer")
7897 last_feedback = time.time()
7899 result = self.rpc.call_instance_get_migration_status(source_node,
7901 msg = result.fail_msg
7902 ms = result.payload # MigrationStatus instance
7903 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7904 logging.error("Instance migration failed, trying to revert"
7905 " disk status: %s", msg)
7906 self.feedback_fn("Migration failed, aborting")
7907 self._AbortMigration()
7908 self._RevertDiskStatus()
7909 raise errors.OpExecError("Could not migrate instance %s: %s" %
7910 (instance.name, msg))
7912 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7913 self.feedback_fn("* memory transfer complete")
7916 if (utils.TimeoutExpired(last_feedback,
7917 self._MIGRATION_FEEDBACK_INTERVAL) and
7918 ms.transferred_ram is not None):
7919 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7920 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7921 last_feedback = time.time()
7923 time.sleep(self._MIGRATION_POLL_INTERVAL)
7925 result = self.rpc.call_instance_finalize_migration_src(source_node,
7929 msg = result.fail_msg
7931 logging.error("Instance migration succeeded, but finalization failed"
7932 " on the source node: %s", msg)
7933 raise errors.OpExecError("Could not finalize instance migration: %s" %
7936 instance.primary_node = target_node
7938 # distribute new instance config to the other nodes
7939 self.cfg.Update(instance, self.feedback_fn)
7941 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7945 msg = result.fail_msg
7947 logging.error("Instance migration succeeded, but finalization failed"
7948 " on the target node: %s", msg)
7949 raise errors.OpExecError("Could not finalize instance migration: %s" %
7952 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7953 self._EnsureSecondary(source_node)
7954 self._WaitUntilSync()
7955 self._GoStandalone()
7956 self._GoReconnect(False)
7957 self._WaitUntilSync()
7959 self.feedback_fn("* done")
7961 def _ExecFailover(self):
7962 """Failover an instance.
7964 The failover is done by shutting it down on its present node and
7965 starting it on the secondary.
7968 instance = self.instance
7969 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7971 source_node = instance.primary_node
7972 target_node = self.target_node
7974 if instance.admin_state == constants.ADMINST_UP:
7975 self.feedback_fn("* checking disk consistency between source and target")
7976 for dev in instance.disks:
7977 # for drbd, these are drbd over lvm
7978 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7979 if primary_node.offline:
7980 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7982 (primary_node.name, dev.iv_name, target_node))
7983 elif not self.ignore_consistency:
7984 raise errors.OpExecError("Disk %s is degraded on target node,"
7985 " aborting failover" % dev.iv_name)
7987 self.feedback_fn("* not checking disk consistency as instance is not"
7990 self.feedback_fn("* shutting down instance on source node")
7991 logging.info("Shutting down instance %s on node %s",
7992 instance.name, source_node)
7994 result = self.rpc.call_instance_shutdown(source_node, instance,
7995 self.shutdown_timeout)
7996 msg = result.fail_msg
7998 if self.ignore_consistency or primary_node.offline:
7999 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8000 " proceeding anyway; please make sure node"
8001 " %s is down; error details: %s",
8002 instance.name, source_node, source_node, msg)
8004 raise errors.OpExecError("Could not shutdown instance %s on"
8006 (instance.name, source_node, msg))
8008 self.feedback_fn("* deactivating the instance's disks on source node")
8009 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8010 raise errors.OpExecError("Can't shut down the instance's disks")
8012 instance.primary_node = target_node
8013 # distribute new instance config to the other nodes
8014 self.cfg.Update(instance, self.feedback_fn)
8016 # Only start the instance if it's marked as up
8017 if instance.admin_state == constants.ADMINST_UP:
8018 self.feedback_fn("* activating the instance's disks on target node %s" %
8020 logging.info("Starting instance %s on node %s",
8021 instance.name, target_node)
8023 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8024 ignore_secondaries=True)
8026 _ShutdownInstanceDisks(self.lu, instance)
8027 raise errors.OpExecError("Can't activate the instance's disks")
8029 self.feedback_fn("* starting the instance on the target node %s" %
8031 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8033 msg = result.fail_msg
8035 _ShutdownInstanceDisks(self.lu, instance)
8036 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8037 (instance.name, target_node, msg))
8039 def Exec(self, feedback_fn):
8040 """Perform the migration.
8043 self.feedback_fn = feedback_fn
8044 self.source_node = self.instance.primary_node
8046 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8047 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8048 self.target_node = self.instance.secondary_nodes[0]
8049 # Otherwise self.target_node has been populated either
8050 # directly, or through an iallocator.
8052 self.all_nodes = [self.source_node, self.target_node]
8053 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8054 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8057 feedback_fn("Failover instance %s" % self.instance.name)
8058 self._ExecFailover()
8060 feedback_fn("Migrating instance %s" % self.instance.name)
8063 return self._ExecCleanup()
8065 return self._ExecMigration()
8068 def _CreateBlockDev(lu, node, instance, device, force_create,
8070 """Create a tree of block devices on a given node.
8072 If this device type has to be created on secondaries, create it and
8075 If not, just recurse to children keeping the same 'force' value.
8077 @param lu: the lu on whose behalf we execute
8078 @param node: the node on which to create the device
8079 @type instance: L{objects.Instance}
8080 @param instance: the instance which owns the device
8081 @type device: L{objects.Disk}
8082 @param device: the device to create
8083 @type force_create: boolean
8084 @param force_create: whether to force creation of this device; this
8085 will be change to True whenever we find a device which has
8086 CreateOnSecondary() attribute
8087 @param info: the extra 'metadata' we should attach to the device
8088 (this will be represented as a LVM tag)
8089 @type force_open: boolean
8090 @param force_open: this parameter will be passes to the
8091 L{backend.BlockdevCreate} function where it specifies
8092 whether we run on primary or not, and it affects both
8093 the child assembly and the device own Open() execution
8096 if device.CreateOnSecondary():
8100 for child in device.children:
8101 _CreateBlockDev(lu, node, instance, child, force_create,
8104 if not force_create:
8107 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8110 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8111 """Create a single block device on a given node.
8113 This will not recurse over children of the device, so they must be
8116 @param lu: the lu on whose behalf we execute
8117 @param node: the node on which to create the device
8118 @type instance: L{objects.Instance}
8119 @param instance: the instance which owns the device
8120 @type device: L{objects.Disk}
8121 @param device: the device to create
8122 @param info: the extra 'metadata' we should attach to the device
8123 (this will be represented as a LVM tag)
8124 @type force_open: boolean
8125 @param force_open: this parameter will be passes to the
8126 L{backend.BlockdevCreate} function where it specifies
8127 whether we run on primary or not, and it affects both
8128 the child assembly and the device own Open() execution
8131 lu.cfg.SetDiskID(device, node)
8132 result = lu.rpc.call_blockdev_create(node, device, device.size,
8133 instance.name, force_open, info)
8134 result.Raise("Can't create block device %s on"
8135 " node %s for instance %s" % (device, node, instance.name))
8136 if device.physical_id is None:
8137 device.physical_id = result.payload
8140 def _GenerateUniqueNames(lu, exts):
8141 """Generate a suitable LV name.
8143 This will generate a logical volume name for the given instance.
8148 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8149 results.append("%s%s" % (new_id, val))
8153 def _ComputeLDParams(disk_template, disk_params):
8154 """Computes Logical Disk parameters from Disk Template parameters.
8156 @type disk_template: string
8157 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8158 @type disk_params: dict
8159 @param disk_params: disk template parameters; dict(template_name -> parameters
8161 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8162 contains the LD parameters of the node. The tree is flattened in-order.
8165 if disk_template not in constants.DISK_TEMPLATES:
8166 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8169 dt_params = disk_params[disk_template]
8170 if disk_template == constants.DT_DRBD8:
8172 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8173 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8174 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8175 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8176 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8177 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8181 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8184 result.append(drbd_params)
8188 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8191 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8193 result.append(data_params)
8197 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8200 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8202 result.append(meta_params)
8204 elif (disk_template == constants.DT_FILE or
8205 disk_template == constants.DT_SHARED_FILE):
8206 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8208 elif disk_template == constants.DT_PLAIN:
8210 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8213 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8215 result.append(params)
8217 elif disk_template == constants.DT_BLOCK:
8218 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8223 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8224 iv_name, p_minor, s_minor, drbd_params, data_params,
8226 """Generate a drbd8 device complete with its children.
8229 assert len(vgnames) == len(names) == 2
8230 port = lu.cfg.AllocatePort()
8231 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8233 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8234 logical_id=(vgnames[0], names[0]),
8236 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8237 logical_id=(vgnames[1], names[1]),
8239 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8240 logical_id=(primary, secondary, port,
8243 children=[dev_data, dev_meta],
8244 iv_name=iv_name, params=drbd_params)
8248 def _GenerateDiskTemplate(lu, template_name,
8249 instance_name, primary_node,
8250 secondary_nodes, disk_info,
8251 file_storage_dir, file_driver,
8252 base_index, feedback_fn, disk_params):
8253 """Generate the entire disk layout for a given template type.
8256 #TODO: compute space requirements
8258 vgname = lu.cfg.GetVGName()
8259 disk_count = len(disk_info)
8261 ld_params = _ComputeLDParams(template_name, disk_params)
8262 if template_name == constants.DT_DISKLESS:
8264 elif template_name == constants.DT_PLAIN:
8265 if len(secondary_nodes) != 0:
8266 raise errors.ProgrammerError("Wrong template configuration")
8268 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8269 for i in range(disk_count)])
8270 for idx, disk in enumerate(disk_info):
8271 disk_index = idx + base_index
8272 vg = disk.get(constants.IDISK_VG, vgname)
8273 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8274 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8275 size=disk[constants.IDISK_SIZE],
8276 logical_id=(vg, names[idx]),
8277 iv_name="disk/%d" % disk_index,
8278 mode=disk[constants.IDISK_MODE],
8279 params=ld_params[0])
8280 disks.append(disk_dev)
8281 elif template_name == constants.DT_DRBD8:
8282 drbd_params, data_params, meta_params = ld_params
8283 if len(secondary_nodes) != 1:
8284 raise errors.ProgrammerError("Wrong template configuration")
8285 remote_node = secondary_nodes[0]
8286 minors = lu.cfg.AllocateDRBDMinor(
8287 [primary_node, remote_node] * len(disk_info), instance_name)
8290 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8291 for i in range(disk_count)]):
8292 names.append(lv_prefix + "_data")
8293 names.append(lv_prefix + "_meta")
8294 for idx, disk in enumerate(disk_info):
8295 disk_index = idx + base_index
8296 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8297 data_vg = disk.get(constants.IDISK_VG, vgname)
8298 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8299 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8300 disk[constants.IDISK_SIZE],
8302 names[idx * 2:idx * 2 + 2],
8303 "disk/%d" % disk_index,
8304 minors[idx * 2], minors[idx * 2 + 1],
8305 drbd_params, data_params, meta_params)
8306 disk_dev.mode = disk[constants.IDISK_MODE]
8307 disks.append(disk_dev)
8308 elif template_name == constants.DT_FILE:
8309 if len(secondary_nodes) != 0:
8310 raise errors.ProgrammerError("Wrong template configuration")
8312 opcodes.RequireFileStorage()
8314 for idx, disk in enumerate(disk_info):
8315 disk_index = idx + base_index
8316 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8317 size=disk[constants.IDISK_SIZE],
8318 iv_name="disk/%d" % disk_index,
8319 logical_id=(file_driver,
8320 "%s/disk%d" % (file_storage_dir,
8322 mode=disk[constants.IDISK_MODE],
8323 params=ld_params[0])
8324 disks.append(disk_dev)
8325 elif template_name == constants.DT_SHARED_FILE:
8326 if len(secondary_nodes) != 0:
8327 raise errors.ProgrammerError("Wrong template configuration")
8329 opcodes.RequireSharedFileStorage()
8331 for idx, disk in enumerate(disk_info):
8332 disk_index = idx + base_index
8333 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8334 size=disk[constants.IDISK_SIZE],
8335 iv_name="disk/%d" % disk_index,
8336 logical_id=(file_driver,
8337 "%s/disk%d" % (file_storage_dir,
8339 mode=disk[constants.IDISK_MODE],
8340 params=ld_params[0])
8341 disks.append(disk_dev)
8342 elif template_name == constants.DT_BLOCK:
8343 if len(secondary_nodes) != 0:
8344 raise errors.ProgrammerError("Wrong template configuration")
8346 for idx, disk in enumerate(disk_info):
8347 disk_index = idx + base_index
8348 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8349 size=disk[constants.IDISK_SIZE],
8350 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8351 disk[constants.IDISK_ADOPT]),
8352 iv_name="disk/%d" % disk_index,
8353 mode=disk[constants.IDISK_MODE],
8354 params=ld_params[0])
8355 disks.append(disk_dev)
8358 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8362 def _GetInstanceInfoText(instance):
8363 """Compute that text that should be added to the disk's metadata.
8366 return "originstname+%s" % instance.name
8369 def _CalcEta(time_taken, written, total_size):
8370 """Calculates the ETA based on size written and total size.
8372 @param time_taken: The time taken so far
8373 @param written: amount written so far
8374 @param total_size: The total size of data to be written
8375 @return: The remaining time in seconds
8378 avg_time = time_taken / float(written)
8379 return (total_size - written) * avg_time
8382 def _WipeDisks(lu, instance):
8383 """Wipes instance disks.
8385 @type lu: L{LogicalUnit}
8386 @param lu: the logical unit on whose behalf we execute
8387 @type instance: L{objects.Instance}
8388 @param instance: the instance whose disks we should create
8389 @return: the success of the wipe
8392 node = instance.primary_node
8394 for device in instance.disks:
8395 lu.cfg.SetDiskID(device, node)
8397 logging.info("Pause sync of instance %s disks", instance.name)
8398 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8400 for idx, success in enumerate(result.payload):
8402 logging.warn("pause-sync of instance %s for disks %d failed",
8406 for idx, device in enumerate(instance.disks):
8407 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8408 # MAX_WIPE_CHUNK at max
8409 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8410 constants.MIN_WIPE_CHUNK_PERCENT)
8411 # we _must_ make this an int, otherwise rounding errors will
8413 wipe_chunk_size = int(wipe_chunk_size)
8415 lu.LogInfo("* Wiping disk %d", idx)
8416 logging.info("Wiping disk %d for instance %s, node %s using"
8417 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8422 start_time = time.time()
8424 while offset < size:
8425 wipe_size = min(wipe_chunk_size, size - offset)
8426 logging.debug("Wiping disk %d, offset %s, chunk %s",
8427 idx, offset, wipe_size)
8428 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8429 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8430 (idx, offset, wipe_size))
8433 if now - last_output >= 60:
8434 eta = _CalcEta(now - start_time, offset, size)
8435 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8436 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8439 logging.info("Resume sync of instance %s disks", instance.name)
8441 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8443 for idx, success in enumerate(result.payload):
8445 lu.LogWarning("Resume sync of disk %d failed, please have a"
8446 " look at the status and troubleshoot the issue", idx)
8447 logging.warn("resume-sync of instance %s for disks %d failed",
8451 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8452 """Create all disks for an instance.
8454 This abstracts away some work from AddInstance.
8456 @type lu: L{LogicalUnit}
8457 @param lu: the logical unit on whose behalf we execute
8458 @type instance: L{objects.Instance}
8459 @param instance: the instance whose disks we should create
8461 @param to_skip: list of indices to skip
8462 @type target_node: string
8463 @param target_node: if passed, overrides the target node for creation
8465 @return: the success of the creation
8468 info = _GetInstanceInfoText(instance)
8469 if target_node is None:
8470 pnode = instance.primary_node
8471 all_nodes = instance.all_nodes
8476 if instance.disk_template in constants.DTS_FILEBASED:
8477 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8478 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8480 result.Raise("Failed to create directory '%s' on"
8481 " node %s" % (file_storage_dir, pnode))
8483 # Note: this needs to be kept in sync with adding of disks in
8484 # LUInstanceSetParams
8485 for idx, device in enumerate(instance.disks):
8486 if to_skip and idx in to_skip:
8488 logging.info("Creating volume %s for instance %s",
8489 device.iv_name, instance.name)
8491 for node in all_nodes:
8492 f_create = node == pnode
8493 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8496 def _RemoveDisks(lu, instance, target_node=None):
8497 """Remove all disks for an instance.
8499 This abstracts away some work from `AddInstance()` and
8500 `RemoveInstance()`. Note that in case some of the devices couldn't
8501 be removed, the removal will continue with the other ones (compare
8502 with `_CreateDisks()`).
8504 @type lu: L{LogicalUnit}
8505 @param lu: the logical unit on whose behalf we execute
8506 @type instance: L{objects.Instance}
8507 @param instance: the instance whose disks we should remove
8508 @type target_node: string
8509 @param target_node: used to override the node on which to remove the disks
8511 @return: the success of the removal
8514 logging.info("Removing block devices for instance %s", instance.name)
8517 for device in instance.disks:
8519 edata = [(target_node, device)]
8521 edata = device.ComputeNodeTree(instance.primary_node)
8522 for node, disk in edata:
8523 lu.cfg.SetDiskID(disk, node)
8524 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8526 lu.LogWarning("Could not remove block device %s on node %s,"
8527 " continuing anyway: %s", device.iv_name, node, msg)
8530 # if this is a DRBD disk, return its port to the pool
8531 if device.dev_type in constants.LDS_DRBD:
8532 tcp_port = device.logical_id[2]
8533 lu.cfg.AddTcpUdpPort(tcp_port)
8535 if instance.disk_template == constants.DT_FILE:
8536 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8540 tgt = instance.primary_node
8541 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8543 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8544 file_storage_dir, instance.primary_node, result.fail_msg)
8550 def _ComputeDiskSizePerVG(disk_template, disks):
8551 """Compute disk size requirements in the volume group
8554 def _compute(disks, payload):
8555 """Universal algorithm.
8560 vgs[disk[constants.IDISK_VG]] = \
8561 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8565 # Required free disk space as a function of disk and swap space
8567 constants.DT_DISKLESS: {},
8568 constants.DT_PLAIN: _compute(disks, 0),
8569 # 128 MB are added for drbd metadata for each disk
8570 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8571 constants.DT_FILE: {},
8572 constants.DT_SHARED_FILE: {},
8575 if disk_template not in req_size_dict:
8576 raise errors.ProgrammerError("Disk template '%s' size requirement"
8577 " is unknown" % disk_template)
8579 return req_size_dict[disk_template]
8582 def _ComputeDiskSize(disk_template, disks):
8583 """Compute disk size requirements in the volume group
8586 # Required free disk space as a function of disk and swap space
8588 constants.DT_DISKLESS: None,
8589 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8590 # 128 MB are added for drbd metadata for each disk
8592 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8593 constants.DT_FILE: None,
8594 constants.DT_SHARED_FILE: 0,
8595 constants.DT_BLOCK: 0,
8598 if disk_template not in req_size_dict:
8599 raise errors.ProgrammerError("Disk template '%s' size requirement"
8600 " is unknown" % disk_template)
8602 return req_size_dict[disk_template]
8605 def _FilterVmNodes(lu, nodenames):
8606 """Filters out non-vm_capable nodes from a list.
8608 @type lu: L{LogicalUnit}
8609 @param lu: the logical unit for which we check
8610 @type nodenames: list
8611 @param nodenames: the list of nodes on which we should check
8613 @return: the list of vm-capable nodes
8616 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8617 return [name for name in nodenames if name not in vm_nodes]
8620 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8621 """Hypervisor parameter validation.
8623 This function abstract the hypervisor parameter validation to be
8624 used in both instance create and instance modify.
8626 @type lu: L{LogicalUnit}
8627 @param lu: the logical unit for which we check
8628 @type nodenames: list
8629 @param nodenames: the list of nodes on which we should check
8630 @type hvname: string
8631 @param hvname: the name of the hypervisor we should use
8632 @type hvparams: dict
8633 @param hvparams: the parameters which we need to check
8634 @raise errors.OpPrereqError: if the parameters are not valid
8637 nodenames = _FilterVmNodes(lu, nodenames)
8639 cluster = lu.cfg.GetClusterInfo()
8640 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8642 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8643 for node in nodenames:
8647 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8650 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8651 """OS parameters validation.
8653 @type lu: L{LogicalUnit}
8654 @param lu: the logical unit for which we check
8655 @type required: boolean
8656 @param required: whether the validation should fail if the OS is not
8658 @type nodenames: list
8659 @param nodenames: the list of nodes on which we should check
8660 @type osname: string
8661 @param osname: the name of the hypervisor we should use
8662 @type osparams: dict
8663 @param osparams: the parameters which we need to check
8664 @raise errors.OpPrereqError: if the parameters are not valid
8667 nodenames = _FilterVmNodes(lu, nodenames)
8668 result = lu.rpc.call_os_validate(nodenames, required, osname,
8669 [constants.OS_VALIDATE_PARAMETERS],
8671 for node, nres in result.items():
8672 # we don't check for offline cases since this should be run only
8673 # against the master node and/or an instance's nodes
8674 nres.Raise("OS Parameters validation failed on node %s" % node)
8675 if not nres.payload:
8676 lu.LogInfo("OS %s not found on node %s, validation skipped",
8680 class LUInstanceCreate(LogicalUnit):
8681 """Create an instance.
8684 HPATH = "instance-add"
8685 HTYPE = constants.HTYPE_INSTANCE
8688 def CheckArguments(self):
8692 # do not require name_check to ease forward/backward compatibility
8694 if self.op.no_install and self.op.start:
8695 self.LogInfo("No-installation mode selected, disabling startup")
8696 self.op.start = False
8697 # validate/normalize the instance name
8698 self.op.instance_name = \
8699 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8701 if self.op.ip_check and not self.op.name_check:
8702 # TODO: make the ip check more flexible and not depend on the name check
8703 raise errors.OpPrereqError("Cannot do IP address check without a name"
8704 " check", errors.ECODE_INVAL)
8706 # check nics' parameter names
8707 for nic in self.op.nics:
8708 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8710 # check disks. parameter names and consistent adopt/no-adopt strategy
8711 has_adopt = has_no_adopt = False
8712 for disk in self.op.disks:
8713 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8714 if constants.IDISK_ADOPT in disk:
8718 if has_adopt and has_no_adopt:
8719 raise errors.OpPrereqError("Either all disks are adopted or none is",
8722 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8723 raise errors.OpPrereqError("Disk adoption is not supported for the"
8724 " '%s' disk template" %
8725 self.op.disk_template,
8727 if self.op.iallocator is not None:
8728 raise errors.OpPrereqError("Disk adoption not allowed with an"
8729 " iallocator script", errors.ECODE_INVAL)
8730 if self.op.mode == constants.INSTANCE_IMPORT:
8731 raise errors.OpPrereqError("Disk adoption not allowed for"
8732 " instance import", errors.ECODE_INVAL)
8734 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8735 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8736 " but no 'adopt' parameter given" %
8737 self.op.disk_template,
8740 self.adopt_disks = has_adopt
8742 # instance name verification
8743 if self.op.name_check:
8744 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8745 self.op.instance_name = self.hostname1.name
8746 # used in CheckPrereq for ip ping check
8747 self.check_ip = self.hostname1.ip
8749 self.check_ip = None
8751 # file storage checks
8752 if (self.op.file_driver and
8753 not self.op.file_driver in constants.FILE_DRIVER):
8754 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8755 self.op.file_driver, errors.ECODE_INVAL)
8757 if self.op.disk_template == constants.DT_FILE:
8758 opcodes.RequireFileStorage()
8759 elif self.op.disk_template == constants.DT_SHARED_FILE:
8760 opcodes.RequireSharedFileStorage()
8762 ### Node/iallocator related checks
8763 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8765 if self.op.pnode is not None:
8766 if self.op.disk_template in constants.DTS_INT_MIRROR:
8767 if self.op.snode is None:
8768 raise errors.OpPrereqError("The networked disk templates need"
8769 " a mirror node", errors.ECODE_INVAL)
8771 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8773 self.op.snode = None
8775 self._cds = _GetClusterDomainSecret()
8777 if self.op.mode == constants.INSTANCE_IMPORT:
8778 # On import force_variant must be True, because if we forced it at
8779 # initial install, our only chance when importing it back is that it
8781 self.op.force_variant = True
8783 if self.op.no_install:
8784 self.LogInfo("No-installation mode has no effect during import")
8786 elif self.op.mode == constants.INSTANCE_CREATE:
8787 if self.op.os_type is None:
8788 raise errors.OpPrereqError("No guest OS specified",
8790 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8791 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8792 " installation" % self.op.os_type,
8794 if self.op.disk_template is None:
8795 raise errors.OpPrereqError("No disk template specified",
8798 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8799 # Check handshake to ensure both clusters have the same domain secret
8800 src_handshake = self.op.source_handshake
8801 if not src_handshake:
8802 raise errors.OpPrereqError("Missing source handshake",
8805 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8808 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8811 # Load and check source CA
8812 self.source_x509_ca_pem = self.op.source_x509_ca
8813 if not self.source_x509_ca_pem:
8814 raise errors.OpPrereqError("Missing source X509 CA",
8818 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8820 except OpenSSL.crypto.Error, err:
8821 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8822 (err, ), errors.ECODE_INVAL)
8824 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8825 if errcode is not None:
8826 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8829 self.source_x509_ca = cert
8831 src_instance_name = self.op.source_instance_name
8832 if not src_instance_name:
8833 raise errors.OpPrereqError("Missing source instance name",
8836 self.source_instance_name = \
8837 netutils.GetHostname(name=src_instance_name).name
8840 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8841 self.op.mode, errors.ECODE_INVAL)
8843 def ExpandNames(self):
8844 """ExpandNames for CreateInstance.
8846 Figure out the right locks for instance creation.
8849 self.needed_locks = {}
8851 instance_name = self.op.instance_name
8852 # this is just a preventive check, but someone might still add this
8853 # instance in the meantime, and creation will fail at lock-add time
8854 if instance_name in self.cfg.GetInstanceList():
8855 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8856 instance_name, errors.ECODE_EXISTS)
8858 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8860 if self.op.iallocator:
8861 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8862 # specifying a group on instance creation and then selecting nodes from
8864 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8865 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8867 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8868 nodelist = [self.op.pnode]
8869 if self.op.snode is not None:
8870 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8871 nodelist.append(self.op.snode)
8872 self.needed_locks[locking.LEVEL_NODE] = nodelist
8873 # Lock resources of instance's primary and secondary nodes (copy to
8874 # prevent accidential modification)
8875 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8877 # in case of import lock the source node too
8878 if self.op.mode == constants.INSTANCE_IMPORT:
8879 src_node = self.op.src_node
8880 src_path = self.op.src_path
8882 if src_path is None:
8883 self.op.src_path = src_path = self.op.instance_name
8885 if src_node is None:
8886 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8887 self.op.src_node = None
8888 if os.path.isabs(src_path):
8889 raise errors.OpPrereqError("Importing an instance from a path"
8890 " requires a source node option",
8893 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8894 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8895 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8896 if not os.path.isabs(src_path):
8897 self.op.src_path = src_path = \
8898 utils.PathJoin(constants.EXPORT_DIR, src_path)
8900 def _RunAllocator(self):
8901 """Run the allocator based on input opcode.
8904 nics = [n.ToDict() for n in self.nics]
8905 ial = IAllocator(self.cfg, self.rpc,
8906 mode=constants.IALLOCATOR_MODE_ALLOC,
8907 name=self.op.instance_name,
8908 disk_template=self.op.disk_template,
8911 vcpus=self.be_full[constants.BE_VCPUS],
8912 memory=self.be_full[constants.BE_MAXMEM],
8915 hypervisor=self.op.hypervisor,
8918 ial.Run(self.op.iallocator)
8921 raise errors.OpPrereqError("Can't compute nodes using"
8922 " iallocator '%s': %s" %
8923 (self.op.iallocator, ial.info),
8925 if len(ial.result) != ial.required_nodes:
8926 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8927 " of nodes (%s), required %s" %
8928 (self.op.iallocator, len(ial.result),
8929 ial.required_nodes), errors.ECODE_FAULT)
8930 self.op.pnode = ial.result[0]
8931 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8932 self.op.instance_name, self.op.iallocator,
8933 utils.CommaJoin(ial.result))
8934 if ial.required_nodes == 2:
8935 self.op.snode = ial.result[1]
8937 def BuildHooksEnv(self):
8940 This runs on master, primary and secondary nodes of the instance.
8944 "ADD_MODE": self.op.mode,
8946 if self.op.mode == constants.INSTANCE_IMPORT:
8947 env["SRC_NODE"] = self.op.src_node
8948 env["SRC_PATH"] = self.op.src_path
8949 env["SRC_IMAGES"] = self.src_images
8951 env.update(_BuildInstanceHookEnv(
8952 name=self.op.instance_name,
8953 primary_node=self.op.pnode,
8954 secondary_nodes=self.secondaries,
8955 status=self.op.start,
8956 os_type=self.op.os_type,
8957 minmem=self.be_full[constants.BE_MINMEM],
8958 maxmem=self.be_full[constants.BE_MAXMEM],
8959 vcpus=self.be_full[constants.BE_VCPUS],
8960 nics=_NICListToTuple(self, self.nics),
8961 disk_template=self.op.disk_template,
8962 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8963 for d in self.disks],
8966 hypervisor_name=self.op.hypervisor,
8972 def BuildHooksNodes(self):
8973 """Build hooks nodes.
8976 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8979 def _ReadExportInfo(self):
8980 """Reads the export information from disk.
8982 It will override the opcode source node and path with the actual
8983 information, if these two were not specified before.
8985 @return: the export information
8988 assert self.op.mode == constants.INSTANCE_IMPORT
8990 src_node = self.op.src_node
8991 src_path = self.op.src_path
8993 if src_node is None:
8994 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8995 exp_list = self.rpc.call_export_list(locked_nodes)
8997 for node in exp_list:
8998 if exp_list[node].fail_msg:
9000 if src_path in exp_list[node].payload:
9002 self.op.src_node = src_node = node
9003 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9007 raise errors.OpPrereqError("No export found for relative path %s" %
9008 src_path, errors.ECODE_INVAL)
9010 _CheckNodeOnline(self, src_node)
9011 result = self.rpc.call_export_info(src_node, src_path)
9012 result.Raise("No export or invalid export found in dir %s" % src_path)
9014 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9015 if not export_info.has_section(constants.INISECT_EXP):
9016 raise errors.ProgrammerError("Corrupted export config",
9017 errors.ECODE_ENVIRON)
9019 ei_version = export_info.get(constants.INISECT_EXP, "version")
9020 if (int(ei_version) != constants.EXPORT_VERSION):
9021 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9022 (ei_version, constants.EXPORT_VERSION),
9023 errors.ECODE_ENVIRON)
9026 def _ReadExportParams(self, einfo):
9027 """Use export parameters as defaults.
9029 In case the opcode doesn't specify (as in override) some instance
9030 parameters, then try to use them from the export information, if
9034 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9036 if self.op.disk_template is None:
9037 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9038 self.op.disk_template = einfo.get(constants.INISECT_INS,
9040 if self.op.disk_template not in constants.DISK_TEMPLATES:
9041 raise errors.OpPrereqError("Disk template specified in configuration"
9042 " file is not one of the allowed values:"
9043 " %s" % " ".join(constants.DISK_TEMPLATES))
9045 raise errors.OpPrereqError("No disk template specified and the export"
9046 " is missing the disk_template information",
9049 if not self.op.disks:
9051 # TODO: import the disk iv_name too
9052 for idx in range(constants.MAX_DISKS):
9053 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9054 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9055 disks.append({constants.IDISK_SIZE: disk_sz})
9056 self.op.disks = disks
9057 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9058 raise errors.OpPrereqError("No disk info specified and the export"
9059 " is missing the disk information",
9062 if not self.op.nics:
9064 for idx in range(constants.MAX_NICS):
9065 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9067 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9068 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9075 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9076 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9078 if (self.op.hypervisor is None and
9079 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9080 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9082 if einfo.has_section(constants.INISECT_HYP):
9083 # use the export parameters but do not override the ones
9084 # specified by the user
9085 for name, value in einfo.items(constants.INISECT_HYP):
9086 if name not in self.op.hvparams:
9087 self.op.hvparams[name] = value
9089 if einfo.has_section(constants.INISECT_BEP):
9090 # use the parameters, without overriding
9091 for name, value in einfo.items(constants.INISECT_BEP):
9092 if name not in self.op.beparams:
9093 self.op.beparams[name] = value
9094 # Compatibility for the old "memory" be param
9095 if name == constants.BE_MEMORY:
9096 if constants.BE_MAXMEM not in self.op.beparams:
9097 self.op.beparams[constants.BE_MAXMEM] = value
9098 if constants.BE_MINMEM not in self.op.beparams:
9099 self.op.beparams[constants.BE_MINMEM] = value
9101 # try to read the parameters old style, from the main section
9102 for name in constants.BES_PARAMETERS:
9103 if (name not in self.op.beparams and
9104 einfo.has_option(constants.INISECT_INS, name)):
9105 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9107 if einfo.has_section(constants.INISECT_OSP):
9108 # use the parameters, without overriding
9109 for name, value in einfo.items(constants.INISECT_OSP):
9110 if name not in self.op.osparams:
9111 self.op.osparams[name] = value
9113 def _RevertToDefaults(self, cluster):
9114 """Revert the instance parameters to the default values.
9118 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9119 for name in self.op.hvparams.keys():
9120 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9121 del self.op.hvparams[name]
9123 be_defs = cluster.SimpleFillBE({})
9124 for name in self.op.beparams.keys():
9125 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9126 del self.op.beparams[name]
9128 nic_defs = cluster.SimpleFillNIC({})
9129 for nic in self.op.nics:
9130 for name in constants.NICS_PARAMETERS:
9131 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9134 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9135 for name in self.op.osparams.keys():
9136 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9137 del self.op.osparams[name]
9139 def _CalculateFileStorageDir(self):
9140 """Calculate final instance file storage dir.
9143 # file storage dir calculation/check
9144 self.instance_file_storage_dir = None
9145 if self.op.disk_template in constants.DTS_FILEBASED:
9146 # build the full file storage dir path
9149 if self.op.disk_template == constants.DT_SHARED_FILE:
9150 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9152 get_fsd_fn = self.cfg.GetFileStorageDir
9154 cfg_storagedir = get_fsd_fn()
9155 if not cfg_storagedir:
9156 raise errors.OpPrereqError("Cluster file storage dir not defined")
9157 joinargs.append(cfg_storagedir)
9159 if self.op.file_storage_dir is not None:
9160 joinargs.append(self.op.file_storage_dir)
9162 joinargs.append(self.op.instance_name)
9164 # pylint: disable=W0142
9165 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9167 def CheckPrereq(self):
9168 """Check prerequisites.
9171 self._CalculateFileStorageDir()
9173 if self.op.mode == constants.INSTANCE_IMPORT:
9174 export_info = self._ReadExportInfo()
9175 self._ReadExportParams(export_info)
9177 if (not self.cfg.GetVGName() and
9178 self.op.disk_template not in constants.DTS_NOT_LVM):
9179 raise errors.OpPrereqError("Cluster does not support lvm-based"
9180 " instances", errors.ECODE_STATE)
9182 if (self.op.hypervisor is None or
9183 self.op.hypervisor == constants.VALUE_AUTO):
9184 self.op.hypervisor = self.cfg.GetHypervisorType()
9186 cluster = self.cfg.GetClusterInfo()
9187 enabled_hvs = cluster.enabled_hypervisors
9188 if self.op.hypervisor not in enabled_hvs:
9189 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9190 " cluster (%s)" % (self.op.hypervisor,
9191 ",".join(enabled_hvs)),
9194 # Check tag validity
9195 for tag in self.op.tags:
9196 objects.TaggableObject.ValidateTag(tag)
9198 # check hypervisor parameter syntax (locally)
9199 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9200 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9202 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9203 hv_type.CheckParameterSyntax(filled_hvp)
9204 self.hv_full = filled_hvp
9205 # check that we don't specify global parameters on an instance
9206 _CheckGlobalHvParams(self.op.hvparams)
9208 # fill and remember the beparams dict
9209 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9210 for param, value in self.op.beparams.iteritems():
9211 if value == constants.VALUE_AUTO:
9212 self.op.beparams[param] = default_beparams[param]
9213 objects.UpgradeBeParams(self.op.beparams)
9214 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9215 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9217 # build os parameters
9218 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9220 # now that hvp/bep are in final format, let's reset to defaults,
9222 if self.op.identify_defaults:
9223 self._RevertToDefaults(cluster)
9227 for idx, nic in enumerate(self.op.nics):
9228 nic_mode_req = nic.get(constants.INIC_MODE, None)
9229 nic_mode = nic_mode_req
9230 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9231 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9233 # in routed mode, for the first nic, the default ip is 'auto'
9234 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9235 default_ip_mode = constants.VALUE_AUTO
9237 default_ip_mode = constants.VALUE_NONE
9239 # ip validity checks
9240 ip = nic.get(constants.INIC_IP, default_ip_mode)
9241 if ip is None or ip.lower() == constants.VALUE_NONE:
9243 elif ip.lower() == constants.VALUE_AUTO:
9244 if not self.op.name_check:
9245 raise errors.OpPrereqError("IP address set to auto but name checks"
9246 " have been skipped",
9248 nic_ip = self.hostname1.ip
9250 if not netutils.IPAddress.IsValid(ip):
9251 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9255 # TODO: check the ip address for uniqueness
9256 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9257 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9260 # MAC address verification
9261 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9262 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9263 mac = utils.NormalizeAndValidateMac(mac)
9266 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9267 except errors.ReservationError:
9268 raise errors.OpPrereqError("MAC address %s already in use"
9269 " in cluster" % mac,
9270 errors.ECODE_NOTUNIQUE)
9272 # Build nic parameters
9273 link = nic.get(constants.INIC_LINK, None)
9274 if link == constants.VALUE_AUTO:
9275 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9278 nicparams[constants.NIC_MODE] = nic_mode
9280 nicparams[constants.NIC_LINK] = link
9282 check_params = cluster.SimpleFillNIC(nicparams)
9283 objects.NIC.CheckParameterSyntax(check_params)
9284 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9286 # disk checks/pre-build
9287 default_vg = self.cfg.GetVGName()
9289 for disk in self.op.disks:
9290 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9291 if mode not in constants.DISK_ACCESS_SET:
9292 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9293 mode, errors.ECODE_INVAL)
9294 size = disk.get(constants.IDISK_SIZE, None)
9296 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9299 except (TypeError, ValueError):
9300 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9303 data_vg = disk.get(constants.IDISK_VG, default_vg)
9305 constants.IDISK_SIZE: size,
9306 constants.IDISK_MODE: mode,
9307 constants.IDISK_VG: data_vg,
9309 if constants.IDISK_METAVG in disk:
9310 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9311 if constants.IDISK_ADOPT in disk:
9312 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9313 self.disks.append(new_disk)
9315 if self.op.mode == constants.INSTANCE_IMPORT:
9317 for idx in range(len(self.disks)):
9318 option = "disk%d_dump" % idx
9319 if export_info.has_option(constants.INISECT_INS, option):
9320 # FIXME: are the old os-es, disk sizes, etc. useful?
9321 export_name = export_info.get(constants.INISECT_INS, option)
9322 image = utils.PathJoin(self.op.src_path, export_name)
9323 disk_images.append(image)
9325 disk_images.append(False)
9327 self.src_images = disk_images
9329 old_name = export_info.get(constants.INISECT_INS, "name")
9330 if self.op.instance_name == old_name:
9331 for idx, nic in enumerate(self.nics):
9332 if nic.mac == constants.VALUE_AUTO:
9333 nic_mac_ini = "nic%d_mac" % idx
9334 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9336 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9338 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9339 if self.op.ip_check:
9340 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9341 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9342 (self.check_ip, self.op.instance_name),
9343 errors.ECODE_NOTUNIQUE)
9345 #### mac address generation
9346 # By generating here the mac address both the allocator and the hooks get
9347 # the real final mac address rather than the 'auto' or 'generate' value.
9348 # There is a race condition between the generation and the instance object
9349 # creation, which means that we know the mac is valid now, but we're not
9350 # sure it will be when we actually add the instance. If things go bad
9351 # adding the instance will abort because of a duplicate mac, and the
9352 # creation job will fail.
9353 for nic in self.nics:
9354 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9355 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9359 if self.op.iallocator is not None:
9360 self._RunAllocator()
9362 # Release all unneeded node locks
9363 _ReleaseLocks(self, locking.LEVEL_NODE,
9364 keep=filter(None, [self.op.pnode, self.op.snode,
9367 #### node related checks
9369 # check primary node
9370 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9371 assert self.pnode is not None, \
9372 "Cannot retrieve locked node %s" % self.op.pnode
9374 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9375 pnode.name, errors.ECODE_STATE)
9377 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9378 pnode.name, errors.ECODE_STATE)
9379 if not pnode.vm_capable:
9380 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9381 " '%s'" % pnode.name, errors.ECODE_STATE)
9383 self.secondaries = []
9385 # mirror node verification
9386 if self.op.disk_template in constants.DTS_INT_MIRROR:
9387 if self.op.snode == pnode.name:
9388 raise errors.OpPrereqError("The secondary node cannot be the"
9389 " primary node", errors.ECODE_INVAL)
9390 _CheckNodeOnline(self, self.op.snode)
9391 _CheckNodeNotDrained(self, self.op.snode)
9392 _CheckNodeVmCapable(self, self.op.snode)
9393 self.secondaries.append(self.op.snode)
9395 snode = self.cfg.GetNodeInfo(self.op.snode)
9396 if pnode.group != snode.group:
9397 self.LogWarning("The primary and secondary nodes are in two"
9398 " different node groups; the disk parameters"
9399 " from the first disk's node group will be"
9402 nodenames = [pnode.name] + self.secondaries
9404 # disk parameters (not customizable at instance or node level)
9405 # just use the primary node parameters, ignoring the secondary.
9406 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9408 if not self.adopt_disks:
9409 # Check lv size requirements, if not adopting
9410 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9411 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9413 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9414 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9415 disk[constants.IDISK_ADOPT])
9416 for disk in self.disks])
9417 if len(all_lvs) != len(self.disks):
9418 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9420 for lv_name in all_lvs:
9422 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9423 # to ReserveLV uses the same syntax
9424 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9425 except errors.ReservationError:
9426 raise errors.OpPrereqError("LV named %s used by another instance" %
9427 lv_name, errors.ECODE_NOTUNIQUE)
9429 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9430 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9432 node_lvs = self.rpc.call_lv_list([pnode.name],
9433 vg_names.payload.keys())[pnode.name]
9434 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9435 node_lvs = node_lvs.payload
9437 delta = all_lvs.difference(node_lvs.keys())
9439 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9440 utils.CommaJoin(delta),
9442 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9444 raise errors.OpPrereqError("Online logical volumes found, cannot"
9445 " adopt: %s" % utils.CommaJoin(online_lvs),
9447 # update the size of disk based on what is found
9448 for dsk in self.disks:
9449 dsk[constants.IDISK_SIZE] = \
9450 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9451 dsk[constants.IDISK_ADOPT])][0]))
9453 elif self.op.disk_template == constants.DT_BLOCK:
9454 # Normalize and de-duplicate device paths
9455 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9456 for disk in self.disks])
9457 if len(all_disks) != len(self.disks):
9458 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9460 baddisks = [d for d in all_disks
9461 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9463 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9464 " cannot be adopted" %
9465 (", ".join(baddisks),
9466 constants.ADOPTABLE_BLOCKDEV_ROOT),
9469 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9470 list(all_disks))[pnode.name]
9471 node_disks.Raise("Cannot get block device information from node %s" %
9473 node_disks = node_disks.payload
9474 delta = all_disks.difference(node_disks.keys())
9476 raise errors.OpPrereqError("Missing block device(s): %s" %
9477 utils.CommaJoin(delta),
9479 for dsk in self.disks:
9480 dsk[constants.IDISK_SIZE] = \
9481 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9483 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9485 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9486 # check OS parameters (remotely)
9487 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9489 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9491 # memory check on primary node
9492 #TODO(dynmem): use MINMEM for checking
9494 _CheckNodeFreeMemory(self, self.pnode.name,
9495 "creating instance %s" % self.op.instance_name,
9496 self.be_full[constants.BE_MAXMEM],
9499 self.dry_run_result = list(nodenames)
9501 def Exec(self, feedback_fn):
9502 """Create and add the instance to the cluster.
9505 instance = self.op.instance_name
9506 pnode_name = self.pnode.name
9508 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9509 self.owned_locks(locking.LEVEL_NODE)), \
9510 "Node locks differ from node resource locks"
9512 ht_kind = self.op.hypervisor
9513 if ht_kind in constants.HTS_REQ_PORT:
9514 network_port = self.cfg.AllocatePort()
9518 disks = _GenerateDiskTemplate(self,
9519 self.op.disk_template,
9520 instance, pnode_name,
9523 self.instance_file_storage_dir,
9524 self.op.file_driver,
9529 iobj = objects.Instance(name=instance, os=self.op.os_type,
9530 primary_node=pnode_name,
9531 nics=self.nics, disks=disks,
9532 disk_template=self.op.disk_template,
9533 admin_state=constants.ADMINST_DOWN,
9534 network_port=network_port,
9535 beparams=self.op.beparams,
9536 hvparams=self.op.hvparams,
9537 hypervisor=self.op.hypervisor,
9538 osparams=self.op.osparams,
9542 for tag in self.op.tags:
9545 if self.adopt_disks:
9546 if self.op.disk_template == constants.DT_PLAIN:
9547 # rename LVs to the newly-generated names; we need to construct
9548 # 'fake' LV disks with the old data, plus the new unique_id
9549 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9551 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9552 rename_to.append(t_dsk.logical_id)
9553 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9554 self.cfg.SetDiskID(t_dsk, pnode_name)
9555 result = self.rpc.call_blockdev_rename(pnode_name,
9556 zip(tmp_disks, rename_to))
9557 result.Raise("Failed to rename adoped LVs")
9559 feedback_fn("* creating instance disks...")
9561 _CreateDisks(self, iobj)
9562 except errors.OpExecError:
9563 self.LogWarning("Device creation failed, reverting...")
9565 _RemoveDisks(self, iobj)
9567 self.cfg.ReleaseDRBDMinors(instance)
9570 feedback_fn("adding instance %s to cluster config" % instance)
9572 self.cfg.AddInstance(iobj, self.proc.GetECId())
9574 # Declare that we don't want to remove the instance lock anymore, as we've
9575 # added the instance to the config
9576 del self.remove_locks[locking.LEVEL_INSTANCE]
9578 if self.op.mode == constants.INSTANCE_IMPORT:
9579 # Release unused nodes
9580 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9583 _ReleaseLocks(self, locking.LEVEL_NODE)
9586 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9587 feedback_fn("* wiping instance disks...")
9589 _WipeDisks(self, iobj)
9590 except errors.OpExecError, err:
9591 logging.exception("Wiping disks failed")
9592 self.LogWarning("Wiping instance disks failed (%s)", err)
9596 # Something is already wrong with the disks, don't do anything else
9598 elif self.op.wait_for_sync:
9599 disk_abort = not _WaitForSync(self, iobj)
9600 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9601 # make sure the disks are not degraded (still sync-ing is ok)
9602 feedback_fn("* checking mirrors status")
9603 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9608 _RemoveDisks(self, iobj)
9609 self.cfg.RemoveInstance(iobj.name)
9610 # Make sure the instance lock gets removed
9611 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9612 raise errors.OpExecError("There are some degraded disks for"
9615 # Release all node resource locks
9616 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9618 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9619 if self.op.mode == constants.INSTANCE_CREATE:
9620 if not self.op.no_install:
9621 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9622 not self.op.wait_for_sync)
9624 feedback_fn("* pausing disk sync to install instance OS")
9625 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9627 for idx, success in enumerate(result.payload):
9629 logging.warn("pause-sync of instance %s for disk %d failed",
9632 feedback_fn("* running the instance OS create scripts...")
9633 # FIXME: pass debug option from opcode to backend
9635 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9636 self.op.debug_level)
9638 feedback_fn("* resuming disk sync")
9639 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9641 for idx, success in enumerate(result.payload):
9643 logging.warn("resume-sync of instance %s for disk %d failed",
9646 os_add_result.Raise("Could not add os for instance %s"
9647 " on node %s" % (instance, pnode_name))
9649 elif self.op.mode == constants.INSTANCE_IMPORT:
9650 feedback_fn("* running the instance OS import scripts...")
9654 for idx, image in enumerate(self.src_images):
9658 # FIXME: pass debug option from opcode to backend
9659 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9660 constants.IEIO_FILE, (image, ),
9661 constants.IEIO_SCRIPT,
9662 (iobj.disks[idx], idx),
9664 transfers.append(dt)
9667 masterd.instance.TransferInstanceData(self, feedback_fn,
9668 self.op.src_node, pnode_name,
9669 self.pnode.secondary_ip,
9671 if not compat.all(import_result):
9672 self.LogWarning("Some disks for instance %s on node %s were not"
9673 " imported successfully" % (instance, pnode_name))
9675 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9676 feedback_fn("* preparing remote import...")
9677 # The source cluster will stop the instance before attempting to make a
9678 # connection. In some cases stopping an instance can take a long time,
9679 # hence the shutdown timeout is added to the connection timeout.
9680 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9681 self.op.source_shutdown_timeout)
9682 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9684 assert iobj.primary_node == self.pnode.name
9686 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9687 self.source_x509_ca,
9688 self._cds, timeouts)
9689 if not compat.all(disk_results):
9690 # TODO: Should the instance still be started, even if some disks
9691 # failed to import (valid for local imports, too)?
9692 self.LogWarning("Some disks for instance %s on node %s were not"
9693 " imported successfully" % (instance, pnode_name))
9695 # Run rename script on newly imported instance
9696 assert iobj.name == instance
9697 feedback_fn("Running rename script for %s" % instance)
9698 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9699 self.source_instance_name,
9700 self.op.debug_level)
9702 self.LogWarning("Failed to run rename script for %s on node"
9703 " %s: %s" % (instance, pnode_name, result.fail_msg))
9706 # also checked in the prereq part
9707 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9710 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9713 iobj.admin_state = constants.ADMINST_UP
9714 self.cfg.Update(iobj, feedback_fn)
9715 logging.info("Starting instance %s on node %s", instance, pnode_name)
9716 feedback_fn("* starting instance...")
9717 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9719 result.Raise("Could not start instance")
9721 return list(iobj.all_nodes)
9724 class LUInstanceConsole(NoHooksLU):
9725 """Connect to an instance's console.
9727 This is somewhat special in that it returns the command line that
9728 you need to run on the master node in order to connect to the
9734 def ExpandNames(self):
9735 self.share_locks = _ShareAll()
9736 self._ExpandAndLockInstance()
9738 def CheckPrereq(self):
9739 """Check prerequisites.
9741 This checks that the instance is in the cluster.
9744 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9745 assert self.instance is not None, \
9746 "Cannot retrieve locked instance %s" % self.op.instance_name
9747 _CheckNodeOnline(self, self.instance.primary_node)
9749 def Exec(self, feedback_fn):
9750 """Connect to the console of an instance
9753 instance = self.instance
9754 node = instance.primary_node
9756 node_insts = self.rpc.call_instance_list([node],
9757 [instance.hypervisor])[node]
9758 node_insts.Raise("Can't get node information from %s" % node)
9760 if instance.name not in node_insts.payload:
9761 if instance.admin_state == constants.ADMINST_UP:
9762 state = constants.INSTST_ERRORDOWN
9763 elif instance.admin_state == constants.ADMINST_DOWN:
9764 state = constants.INSTST_ADMINDOWN
9766 state = constants.INSTST_ADMINOFFLINE
9767 raise errors.OpExecError("Instance %s is not running (state %s)" %
9768 (instance.name, state))
9770 logging.debug("Connecting to console of %s on %s", instance.name, node)
9772 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9775 def _GetInstanceConsole(cluster, instance):
9776 """Returns console information for an instance.
9778 @type cluster: L{objects.Cluster}
9779 @type instance: L{objects.Instance}
9783 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9784 # beparams and hvparams are passed separately, to avoid editing the
9785 # instance and then saving the defaults in the instance itself.
9786 hvparams = cluster.FillHV(instance)
9787 beparams = cluster.FillBE(instance)
9788 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9790 assert console.instance == instance.name
9791 assert console.Validate()
9793 return console.ToDict()
9796 class LUInstanceReplaceDisks(LogicalUnit):
9797 """Replace the disks of an instance.
9800 HPATH = "mirrors-replace"
9801 HTYPE = constants.HTYPE_INSTANCE
9804 def CheckArguments(self):
9805 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9808 def ExpandNames(self):
9809 self._ExpandAndLockInstance()
9811 assert locking.LEVEL_NODE not in self.needed_locks
9812 assert locking.LEVEL_NODE_RES not in self.needed_locks
9813 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9815 assert self.op.iallocator is None or self.op.remote_node is None, \
9816 "Conflicting options"
9818 if self.op.remote_node is not None:
9819 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9821 # Warning: do not remove the locking of the new secondary here
9822 # unless DRBD8.AddChildren is changed to work in parallel;
9823 # currently it doesn't since parallel invocations of
9824 # FindUnusedMinor will conflict
9825 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9826 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9828 self.needed_locks[locking.LEVEL_NODE] = []
9829 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9831 if self.op.iallocator is not None:
9832 # iallocator will select a new node in the same group
9833 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9835 self.needed_locks[locking.LEVEL_NODE_RES] = []
9837 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9838 self.op.iallocator, self.op.remote_node,
9839 self.op.disks, False, self.op.early_release)
9841 self.tasklets = [self.replacer]
9843 def DeclareLocks(self, level):
9844 if level == locking.LEVEL_NODEGROUP:
9845 assert self.op.remote_node is None
9846 assert self.op.iallocator is not None
9847 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9849 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9850 # Lock all groups used by instance optimistically; this requires going
9851 # via the node before it's locked, requiring verification later on
9852 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9853 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9855 elif level == locking.LEVEL_NODE:
9856 if self.op.iallocator is not None:
9857 assert self.op.remote_node is None
9858 assert not self.needed_locks[locking.LEVEL_NODE]
9860 # Lock member nodes of all locked groups
9861 self.needed_locks[locking.LEVEL_NODE] = [node_name
9862 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9863 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9865 self._LockInstancesNodes()
9866 elif level == locking.LEVEL_NODE_RES:
9868 self.needed_locks[locking.LEVEL_NODE_RES] = \
9869 self.needed_locks[locking.LEVEL_NODE]
9871 def BuildHooksEnv(self):
9874 This runs on the master, the primary and all the secondaries.
9877 instance = self.replacer.instance
9879 "MODE": self.op.mode,
9880 "NEW_SECONDARY": self.op.remote_node,
9881 "OLD_SECONDARY": instance.secondary_nodes[0],
9883 env.update(_BuildInstanceHookEnvByObject(self, instance))
9886 def BuildHooksNodes(self):
9887 """Build hooks nodes.
9890 instance = self.replacer.instance
9892 self.cfg.GetMasterNode(),
9893 instance.primary_node,
9895 if self.op.remote_node is not None:
9896 nl.append(self.op.remote_node)
9899 def CheckPrereq(self):
9900 """Check prerequisites.
9903 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9904 self.op.iallocator is None)
9906 # Verify if node group locks are still correct
9907 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9909 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9911 return LogicalUnit.CheckPrereq(self)
9914 class TLReplaceDisks(Tasklet):
9915 """Replaces disks for an instance.
9917 Note: Locking is not within the scope of this class.
9920 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9921 disks, delay_iallocator, early_release):
9922 """Initializes this class.
9925 Tasklet.__init__(self, lu)
9928 self.instance_name = instance_name
9930 self.iallocator_name = iallocator_name
9931 self.remote_node = remote_node
9933 self.delay_iallocator = delay_iallocator
9934 self.early_release = early_release
9937 self.instance = None
9938 self.new_node = None
9939 self.target_node = None
9940 self.other_node = None
9941 self.remote_node_info = None
9942 self.node_secondary_ip = None
9945 def CheckArguments(mode, remote_node, iallocator):
9946 """Helper function for users of this class.
9949 # check for valid parameter combination
9950 if mode == constants.REPLACE_DISK_CHG:
9951 if remote_node is None and iallocator is None:
9952 raise errors.OpPrereqError("When changing the secondary either an"
9953 " iallocator script must be used or the"
9954 " new node given", errors.ECODE_INVAL)
9956 if remote_node is not None and iallocator is not None:
9957 raise errors.OpPrereqError("Give either the iallocator or the new"
9958 " secondary, not both", errors.ECODE_INVAL)
9960 elif remote_node is not None or iallocator is not None:
9961 # Not replacing the secondary
9962 raise errors.OpPrereqError("The iallocator and new node options can"
9963 " only be used when changing the"
9964 " secondary node", errors.ECODE_INVAL)
9967 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9968 """Compute a new secondary node using an IAllocator.
9971 ial = IAllocator(lu.cfg, lu.rpc,
9972 mode=constants.IALLOCATOR_MODE_RELOC,
9974 relocate_from=list(relocate_from))
9976 ial.Run(iallocator_name)
9979 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9980 " %s" % (iallocator_name, ial.info),
9983 if len(ial.result) != ial.required_nodes:
9984 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9985 " of nodes (%s), required %s" %
9987 len(ial.result), ial.required_nodes),
9990 remote_node_name = ial.result[0]
9992 lu.LogInfo("Selected new secondary for instance '%s': %s",
9993 instance_name, remote_node_name)
9995 return remote_node_name
9997 def _FindFaultyDisks(self, node_name):
9998 """Wrapper for L{_FindFaultyInstanceDisks}.
10001 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10004 def _CheckDisksActivated(self, instance):
10005 """Checks if the instance disks are activated.
10007 @param instance: The instance to check disks
10008 @return: True if they are activated, False otherwise
10011 nodes = instance.all_nodes
10013 for idx, dev in enumerate(instance.disks):
10015 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10016 self.cfg.SetDiskID(dev, node)
10018 result = self.rpc.call_blockdev_find(node, dev)
10022 elif result.fail_msg or not result.payload:
10027 def CheckPrereq(self):
10028 """Check prerequisites.
10030 This checks that the instance is in the cluster.
10033 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10034 assert instance is not None, \
10035 "Cannot retrieve locked instance %s" % self.instance_name
10037 if instance.disk_template != constants.DT_DRBD8:
10038 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10039 " instances", errors.ECODE_INVAL)
10041 if len(instance.secondary_nodes) != 1:
10042 raise errors.OpPrereqError("The instance has a strange layout,"
10043 " expected one secondary but found %d" %
10044 len(instance.secondary_nodes),
10045 errors.ECODE_FAULT)
10047 if not self.delay_iallocator:
10048 self._CheckPrereq2()
10050 def _CheckPrereq2(self):
10051 """Check prerequisites, second part.
10053 This function should always be part of CheckPrereq. It was separated and is
10054 now called from Exec because during node evacuation iallocator was only
10055 called with an unmodified cluster model, not taking planned changes into
10059 instance = self.instance
10060 secondary_node = instance.secondary_nodes[0]
10062 if self.iallocator_name is None:
10063 remote_node = self.remote_node
10065 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10066 instance.name, instance.secondary_nodes)
10068 if remote_node is None:
10069 self.remote_node_info = None
10071 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10072 "Remote node '%s' is not locked" % remote_node
10074 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10075 assert self.remote_node_info is not None, \
10076 "Cannot retrieve locked node %s" % remote_node
10078 if remote_node == self.instance.primary_node:
10079 raise errors.OpPrereqError("The specified node is the primary node of"
10080 " the instance", errors.ECODE_INVAL)
10082 if remote_node == secondary_node:
10083 raise errors.OpPrereqError("The specified node is already the"
10084 " secondary node of the instance",
10085 errors.ECODE_INVAL)
10087 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10088 constants.REPLACE_DISK_CHG):
10089 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10090 errors.ECODE_INVAL)
10092 if self.mode == constants.REPLACE_DISK_AUTO:
10093 if not self._CheckDisksActivated(instance):
10094 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10095 " first" % self.instance_name,
10096 errors.ECODE_STATE)
10097 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10098 faulty_secondary = self._FindFaultyDisks(secondary_node)
10100 if faulty_primary and faulty_secondary:
10101 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10102 " one node and can not be repaired"
10103 " automatically" % self.instance_name,
10104 errors.ECODE_STATE)
10107 self.disks = faulty_primary
10108 self.target_node = instance.primary_node
10109 self.other_node = secondary_node
10110 check_nodes = [self.target_node, self.other_node]
10111 elif faulty_secondary:
10112 self.disks = faulty_secondary
10113 self.target_node = secondary_node
10114 self.other_node = instance.primary_node
10115 check_nodes = [self.target_node, self.other_node]
10121 # Non-automatic modes
10122 if self.mode == constants.REPLACE_DISK_PRI:
10123 self.target_node = instance.primary_node
10124 self.other_node = secondary_node
10125 check_nodes = [self.target_node, self.other_node]
10127 elif self.mode == constants.REPLACE_DISK_SEC:
10128 self.target_node = secondary_node
10129 self.other_node = instance.primary_node
10130 check_nodes = [self.target_node, self.other_node]
10132 elif self.mode == constants.REPLACE_DISK_CHG:
10133 self.new_node = remote_node
10134 self.other_node = instance.primary_node
10135 self.target_node = secondary_node
10136 check_nodes = [self.new_node, self.other_node]
10138 _CheckNodeNotDrained(self.lu, remote_node)
10139 _CheckNodeVmCapable(self.lu, remote_node)
10141 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10142 assert old_node_info is not None
10143 if old_node_info.offline and not self.early_release:
10144 # doesn't make sense to delay the release
10145 self.early_release = True
10146 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10147 " early-release mode", secondary_node)
10150 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10153 # If not specified all disks should be replaced
10155 self.disks = range(len(self.instance.disks))
10157 # TODO: compute disk parameters
10158 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10159 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10160 if primary_node_info.group != secondary_node_info.group:
10161 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10162 " different node groups; the disk parameters of the"
10163 " primary node's group will be applied.")
10165 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10167 for node in check_nodes:
10168 _CheckNodeOnline(self.lu, node)
10170 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10173 if node_name is not None)
10175 # Release unneeded node and node resource locks
10176 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10177 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10179 # Release any owned node group
10180 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10181 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10183 # Check whether disks are valid
10184 for disk_idx in self.disks:
10185 instance.FindDisk(disk_idx)
10187 # Get secondary node IP addresses
10188 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10189 in self.cfg.GetMultiNodeInfo(touched_nodes))
10191 def Exec(self, feedback_fn):
10192 """Execute disk replacement.
10194 This dispatches the disk replacement to the appropriate handler.
10197 if self.delay_iallocator:
10198 self._CheckPrereq2()
10201 # Verify owned locks before starting operation
10202 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10203 assert set(owned_nodes) == set(self.node_secondary_ip), \
10204 ("Incorrect node locks, owning %s, expected %s" %
10205 (owned_nodes, self.node_secondary_ip.keys()))
10206 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10207 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10209 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10210 assert list(owned_instances) == [self.instance_name], \
10211 "Instance '%s' not locked" % self.instance_name
10213 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10214 "Should not own any node group lock at this point"
10217 feedback_fn("No disks need replacement")
10220 feedback_fn("Replacing disk(s) %s for %s" %
10221 (utils.CommaJoin(self.disks), self.instance.name))
10223 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10225 # Activate the instance disks if we're replacing them on a down instance
10227 _StartInstanceDisks(self.lu, self.instance, True)
10230 # Should we replace the secondary node?
10231 if self.new_node is not None:
10232 fn = self._ExecDrbd8Secondary
10234 fn = self._ExecDrbd8DiskOnly
10236 result = fn(feedback_fn)
10238 # Deactivate the instance disks if we're replacing them on a
10241 _SafeShutdownInstanceDisks(self.lu, self.instance)
10243 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10246 # Verify owned locks
10247 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10248 nodes = frozenset(self.node_secondary_ip)
10249 assert ((self.early_release and not owned_nodes) or
10250 (not self.early_release and not (set(owned_nodes) - nodes))), \
10251 ("Not owning the correct locks, early_release=%s, owned=%r,"
10252 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10256 def _CheckVolumeGroup(self, nodes):
10257 self.lu.LogInfo("Checking volume groups")
10259 vgname = self.cfg.GetVGName()
10261 # Make sure volume group exists on all involved nodes
10262 results = self.rpc.call_vg_list(nodes)
10264 raise errors.OpExecError("Can't list volume groups on the nodes")
10267 res = results[node]
10268 res.Raise("Error checking node %s" % node)
10269 if vgname not in res.payload:
10270 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10273 def _CheckDisksExistence(self, nodes):
10274 # Check disk existence
10275 for idx, dev in enumerate(self.instance.disks):
10276 if idx not in self.disks:
10280 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10281 self.cfg.SetDiskID(dev, node)
10283 result = self.rpc.call_blockdev_find(node, dev)
10285 msg = result.fail_msg
10286 if msg or not result.payload:
10288 msg = "disk not found"
10289 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10292 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10293 for idx, dev in enumerate(self.instance.disks):
10294 if idx not in self.disks:
10297 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10300 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10302 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10303 " replace disks for instance %s" %
10304 (node_name, self.instance.name))
10306 def _CreateNewStorage(self, node_name):
10307 """Create new storage on the primary or secondary node.
10309 This is only used for same-node replaces, not for changing the
10310 secondary node, hence we don't want to modify the existing disk.
10315 for idx, dev in enumerate(self.instance.disks):
10316 if idx not in self.disks:
10319 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10321 self.cfg.SetDiskID(dev, node_name)
10323 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10324 names = _GenerateUniqueNames(self.lu, lv_names)
10326 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10328 vg_data = dev.children[0].logical_id[0]
10329 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10330 logical_id=(vg_data, names[0]), params=data_p)
10331 vg_meta = dev.children[1].logical_id[0]
10332 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10333 logical_id=(vg_meta, names[1]), params=meta_p)
10335 new_lvs = [lv_data, lv_meta]
10336 old_lvs = [child.Copy() for child in dev.children]
10337 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10339 # we pass force_create=True to force the LVM creation
10340 for new_lv in new_lvs:
10341 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10342 _GetInstanceInfoText(self.instance), False)
10346 def _CheckDevices(self, node_name, iv_names):
10347 for name, (dev, _, _) in iv_names.iteritems():
10348 self.cfg.SetDiskID(dev, node_name)
10350 result = self.rpc.call_blockdev_find(node_name, dev)
10352 msg = result.fail_msg
10353 if msg or not result.payload:
10355 msg = "disk not found"
10356 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10359 if result.payload.is_degraded:
10360 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10362 def _RemoveOldStorage(self, node_name, iv_names):
10363 for name, (_, old_lvs, _) in iv_names.iteritems():
10364 self.lu.LogInfo("Remove logical volumes for %s" % name)
10367 self.cfg.SetDiskID(lv, node_name)
10369 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10371 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10372 hint="remove unused LVs manually")
10374 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10375 """Replace a disk on the primary or secondary for DRBD 8.
10377 The algorithm for replace is quite complicated:
10379 1. for each disk to be replaced:
10381 1. create new LVs on the target node with unique names
10382 1. detach old LVs from the drbd device
10383 1. rename old LVs to name_replaced.<time_t>
10384 1. rename new LVs to old LVs
10385 1. attach the new LVs (with the old names now) to the drbd device
10387 1. wait for sync across all devices
10389 1. for each modified disk:
10391 1. remove old LVs (which have the name name_replaces.<time_t>)
10393 Failures are not very well handled.
10398 # Step: check device activation
10399 self.lu.LogStep(1, steps_total, "Check device existence")
10400 self._CheckDisksExistence([self.other_node, self.target_node])
10401 self._CheckVolumeGroup([self.target_node, self.other_node])
10403 # Step: check other node consistency
10404 self.lu.LogStep(2, steps_total, "Check peer consistency")
10405 self._CheckDisksConsistency(self.other_node,
10406 self.other_node == self.instance.primary_node,
10409 # Step: create new storage
10410 self.lu.LogStep(3, steps_total, "Allocate new storage")
10411 iv_names = self._CreateNewStorage(self.target_node)
10413 # Step: for each lv, detach+rename*2+attach
10414 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10415 for dev, old_lvs, new_lvs in iv_names.itervalues():
10416 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10418 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10420 result.Raise("Can't detach drbd from local storage on node"
10421 " %s for device %s" % (self.target_node, dev.iv_name))
10423 #cfg.Update(instance)
10425 # ok, we created the new LVs, so now we know we have the needed
10426 # storage; as such, we proceed on the target node to rename
10427 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10428 # using the assumption that logical_id == physical_id (which in
10429 # turn is the unique_id on that node)
10431 # FIXME(iustin): use a better name for the replaced LVs
10432 temp_suffix = int(time.time())
10433 ren_fn = lambda d, suff: (d.physical_id[0],
10434 d.physical_id[1] + "_replaced-%s" % suff)
10436 # Build the rename list based on what LVs exist on the node
10437 rename_old_to_new = []
10438 for to_ren in old_lvs:
10439 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10440 if not result.fail_msg and result.payload:
10442 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10444 self.lu.LogInfo("Renaming the old LVs on the target node")
10445 result = self.rpc.call_blockdev_rename(self.target_node,
10447 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10449 # Now we rename the new LVs to the old LVs
10450 self.lu.LogInfo("Renaming the new LVs on the target node")
10451 rename_new_to_old = [(new, old.physical_id)
10452 for old, new in zip(old_lvs, new_lvs)]
10453 result = self.rpc.call_blockdev_rename(self.target_node,
10455 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10457 # Intermediate steps of in memory modifications
10458 for old, new in zip(old_lvs, new_lvs):
10459 new.logical_id = old.logical_id
10460 self.cfg.SetDiskID(new, self.target_node)
10462 # We need to modify old_lvs so that removal later removes the
10463 # right LVs, not the newly added ones; note that old_lvs is a
10465 for disk in old_lvs:
10466 disk.logical_id = ren_fn(disk, temp_suffix)
10467 self.cfg.SetDiskID(disk, self.target_node)
10469 # Now that the new lvs have the old name, we can add them to the device
10470 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10471 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10473 msg = result.fail_msg
10475 for new_lv in new_lvs:
10476 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10479 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10480 hint=("cleanup manually the unused logical"
10482 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10484 cstep = itertools.count(5)
10486 if self.early_release:
10487 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10488 self._RemoveOldStorage(self.target_node, iv_names)
10489 # TODO: Check if releasing locks early still makes sense
10490 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10492 # Release all resource locks except those used by the instance
10493 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10494 keep=self.node_secondary_ip.keys())
10496 # Release all node locks while waiting for sync
10497 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10499 # TODO: Can the instance lock be downgraded here? Take the optional disk
10500 # shutdown in the caller into consideration.
10503 # This can fail as the old devices are degraded and _WaitForSync
10504 # does a combined result over all disks, so we don't check its return value
10505 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10506 _WaitForSync(self.lu, self.instance)
10508 # Check all devices manually
10509 self._CheckDevices(self.instance.primary_node, iv_names)
10511 # Step: remove old storage
10512 if not self.early_release:
10513 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10514 self._RemoveOldStorage(self.target_node, iv_names)
10516 def _ExecDrbd8Secondary(self, feedback_fn):
10517 """Replace the secondary node for DRBD 8.
10519 The algorithm for replace is quite complicated:
10520 - for all disks of the instance:
10521 - create new LVs on the new node with same names
10522 - shutdown the drbd device on the old secondary
10523 - disconnect the drbd network on the primary
10524 - create the drbd device on the new secondary
10525 - network attach the drbd on the primary, using an artifice:
10526 the drbd code for Attach() will connect to the network if it
10527 finds a device which is connected to the good local disks but
10528 not network enabled
10529 - wait for sync across all devices
10530 - remove all disks from the old secondary
10532 Failures are not very well handled.
10537 pnode = self.instance.primary_node
10539 # Step: check device activation
10540 self.lu.LogStep(1, steps_total, "Check device existence")
10541 self._CheckDisksExistence([self.instance.primary_node])
10542 self._CheckVolumeGroup([self.instance.primary_node])
10544 # Step: check other node consistency
10545 self.lu.LogStep(2, steps_total, "Check peer consistency")
10546 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10548 # Step: create new storage
10549 self.lu.LogStep(3, steps_total, "Allocate new storage")
10550 for idx, dev in enumerate(self.instance.disks):
10551 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10552 (self.new_node, idx))
10553 # we pass force_create=True to force LVM creation
10554 for new_lv in dev.children:
10555 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10556 _GetInstanceInfoText(self.instance), False)
10558 # Step 4: dbrd minors and drbd setups changes
10559 # after this, we must manually remove the drbd minors on both the
10560 # error and the success paths
10561 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10562 minors = self.cfg.AllocateDRBDMinor([self.new_node
10563 for dev in self.instance.disks],
10564 self.instance.name)
10565 logging.debug("Allocated minors %r", minors)
10568 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10569 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10570 (self.new_node, idx))
10571 # create new devices on new_node; note that we create two IDs:
10572 # one without port, so the drbd will be activated without
10573 # networking information on the new node at this stage, and one
10574 # with network, for the latter activation in step 4
10575 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10576 if self.instance.primary_node == o_node1:
10579 assert self.instance.primary_node == o_node2, "Three-node instance?"
10582 new_alone_id = (self.instance.primary_node, self.new_node, None,
10583 p_minor, new_minor, o_secret)
10584 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10585 p_minor, new_minor, o_secret)
10587 iv_names[idx] = (dev, dev.children, new_net_id)
10588 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10590 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10591 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10592 logical_id=new_alone_id,
10593 children=dev.children,
10595 params=drbd_params)
10597 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10598 _GetInstanceInfoText(self.instance), False)
10599 except errors.GenericError:
10600 self.cfg.ReleaseDRBDMinors(self.instance.name)
10603 # We have new devices, shutdown the drbd on the old secondary
10604 for idx, dev in enumerate(self.instance.disks):
10605 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10606 self.cfg.SetDiskID(dev, self.target_node)
10607 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10609 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10610 "node: %s" % (idx, msg),
10611 hint=("Please cleanup this device manually as"
10612 " soon as possible"))
10614 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10615 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10616 self.instance.disks)[pnode]
10618 msg = result.fail_msg
10620 # detaches didn't succeed (unlikely)
10621 self.cfg.ReleaseDRBDMinors(self.instance.name)
10622 raise errors.OpExecError("Can't detach the disks from the network on"
10623 " old node: %s" % (msg,))
10625 # if we managed to detach at least one, we update all the disks of
10626 # the instance to point to the new secondary
10627 self.lu.LogInfo("Updating instance configuration")
10628 for dev, _, new_logical_id in iv_names.itervalues():
10629 dev.logical_id = new_logical_id
10630 self.cfg.SetDiskID(dev, self.instance.primary_node)
10632 self.cfg.Update(self.instance, feedback_fn)
10634 # Release all node locks (the configuration has been updated)
10635 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10637 # and now perform the drbd attach
10638 self.lu.LogInfo("Attaching primary drbds to new secondary"
10639 " (standalone => connected)")
10640 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10642 self.node_secondary_ip,
10643 self.instance.disks,
10644 self.instance.name,
10646 for to_node, to_result in result.items():
10647 msg = to_result.fail_msg
10649 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10651 hint=("please do a gnt-instance info to see the"
10652 " status of disks"))
10654 cstep = itertools.count(5)
10656 if self.early_release:
10657 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10658 self._RemoveOldStorage(self.target_node, iv_names)
10659 # TODO: Check if releasing locks early still makes sense
10660 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10662 # Release all resource locks except those used by the instance
10663 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10664 keep=self.node_secondary_ip.keys())
10666 # TODO: Can the instance lock be downgraded here? Take the optional disk
10667 # shutdown in the caller into consideration.
10670 # This can fail as the old devices are degraded and _WaitForSync
10671 # does a combined result over all disks, so we don't check its return value
10672 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10673 _WaitForSync(self.lu, self.instance)
10675 # Check all devices manually
10676 self._CheckDevices(self.instance.primary_node, iv_names)
10678 # Step: remove old storage
10679 if not self.early_release:
10680 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10681 self._RemoveOldStorage(self.target_node, iv_names)
10684 class LURepairNodeStorage(NoHooksLU):
10685 """Repairs the volume group on a node.
10690 def CheckArguments(self):
10691 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10693 storage_type = self.op.storage_type
10695 if (constants.SO_FIX_CONSISTENCY not in
10696 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10697 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10698 " repaired" % storage_type,
10699 errors.ECODE_INVAL)
10701 def ExpandNames(self):
10702 self.needed_locks = {
10703 locking.LEVEL_NODE: [self.op.node_name],
10706 def _CheckFaultyDisks(self, instance, node_name):
10707 """Ensure faulty disks abort the opcode or at least warn."""
10709 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10711 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10712 " node '%s'" % (instance.name, node_name),
10713 errors.ECODE_STATE)
10714 except errors.OpPrereqError, err:
10715 if self.op.ignore_consistency:
10716 self.proc.LogWarning(str(err.args[0]))
10720 def CheckPrereq(self):
10721 """Check prerequisites.
10724 # Check whether any instance on this node has faulty disks
10725 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10726 if inst.admin_state != constants.ADMINST_UP:
10728 check_nodes = set(inst.all_nodes)
10729 check_nodes.discard(self.op.node_name)
10730 for inst_node_name in check_nodes:
10731 self._CheckFaultyDisks(inst, inst_node_name)
10733 def Exec(self, feedback_fn):
10734 feedback_fn("Repairing storage unit '%s' on %s ..." %
10735 (self.op.name, self.op.node_name))
10737 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10738 result = self.rpc.call_storage_execute(self.op.node_name,
10739 self.op.storage_type, st_args,
10741 constants.SO_FIX_CONSISTENCY)
10742 result.Raise("Failed to repair storage unit '%s' on %s" %
10743 (self.op.name, self.op.node_name))
10746 class LUNodeEvacuate(NoHooksLU):
10747 """Evacuates instances off a list of nodes.
10752 _MODE2IALLOCATOR = {
10753 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10754 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10755 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10757 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10758 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10759 constants.IALLOCATOR_NEVAC_MODES)
10761 def CheckArguments(self):
10762 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10764 def ExpandNames(self):
10765 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10767 if self.op.remote_node is not None:
10768 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10769 assert self.op.remote_node
10771 if self.op.remote_node == self.op.node_name:
10772 raise errors.OpPrereqError("Can not use evacuated node as a new"
10773 " secondary node", errors.ECODE_INVAL)
10775 if self.op.mode != constants.NODE_EVAC_SEC:
10776 raise errors.OpPrereqError("Without the use of an iallocator only"
10777 " secondary instances can be evacuated",
10778 errors.ECODE_INVAL)
10781 self.share_locks = _ShareAll()
10782 self.needed_locks = {
10783 locking.LEVEL_INSTANCE: [],
10784 locking.LEVEL_NODEGROUP: [],
10785 locking.LEVEL_NODE: [],
10788 # Determine nodes (via group) optimistically, needs verification once locks
10789 # have been acquired
10790 self.lock_nodes = self._DetermineNodes()
10792 def _DetermineNodes(self):
10793 """Gets the list of nodes to operate on.
10796 if self.op.remote_node is None:
10797 # Iallocator will choose any node(s) in the same group
10798 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10800 group_nodes = frozenset([self.op.remote_node])
10802 # Determine nodes to be locked
10803 return set([self.op.node_name]) | group_nodes
10805 def _DetermineInstances(self):
10806 """Builds list of instances to operate on.
10809 assert self.op.mode in constants.NODE_EVAC_MODES
10811 if self.op.mode == constants.NODE_EVAC_PRI:
10812 # Primary instances only
10813 inst_fn = _GetNodePrimaryInstances
10814 assert self.op.remote_node is None, \
10815 "Evacuating primary instances requires iallocator"
10816 elif self.op.mode == constants.NODE_EVAC_SEC:
10817 # Secondary instances only
10818 inst_fn = _GetNodeSecondaryInstances
10821 assert self.op.mode == constants.NODE_EVAC_ALL
10822 inst_fn = _GetNodeInstances
10823 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10825 raise errors.OpPrereqError("Due to an issue with the iallocator"
10826 " interface it is not possible to evacuate"
10827 " all instances at once; specify explicitly"
10828 " whether to evacuate primary or secondary"
10830 errors.ECODE_INVAL)
10832 return inst_fn(self.cfg, self.op.node_name)
10834 def DeclareLocks(self, level):
10835 if level == locking.LEVEL_INSTANCE:
10836 # Lock instances optimistically, needs verification once node and group
10837 # locks have been acquired
10838 self.needed_locks[locking.LEVEL_INSTANCE] = \
10839 set(i.name for i in self._DetermineInstances())
10841 elif level == locking.LEVEL_NODEGROUP:
10842 # Lock node groups for all potential target nodes optimistically, needs
10843 # verification once nodes have been acquired
10844 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10845 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10847 elif level == locking.LEVEL_NODE:
10848 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10850 def CheckPrereq(self):
10852 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10853 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10854 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10856 need_nodes = self._DetermineNodes()
10858 if not owned_nodes.issuperset(need_nodes):
10859 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10860 " locks were acquired, current nodes are"
10861 " are '%s', used to be '%s'; retry the"
10863 (self.op.node_name,
10864 utils.CommaJoin(need_nodes),
10865 utils.CommaJoin(owned_nodes)),
10866 errors.ECODE_STATE)
10868 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10869 if owned_groups != wanted_groups:
10870 raise errors.OpExecError("Node groups changed since locks were acquired,"
10871 " current groups are '%s', used to be '%s';"
10872 " retry the operation" %
10873 (utils.CommaJoin(wanted_groups),
10874 utils.CommaJoin(owned_groups)))
10876 # Determine affected instances
10877 self.instances = self._DetermineInstances()
10878 self.instance_names = [i.name for i in self.instances]
10880 if set(self.instance_names) != owned_instances:
10881 raise errors.OpExecError("Instances on node '%s' changed since locks"
10882 " were acquired, current instances are '%s',"
10883 " used to be '%s'; retry the operation" %
10884 (self.op.node_name,
10885 utils.CommaJoin(self.instance_names),
10886 utils.CommaJoin(owned_instances)))
10888 if self.instance_names:
10889 self.LogInfo("Evacuating instances from node '%s': %s",
10891 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10893 self.LogInfo("No instances to evacuate from node '%s'",
10896 if self.op.remote_node is not None:
10897 for i in self.instances:
10898 if i.primary_node == self.op.remote_node:
10899 raise errors.OpPrereqError("Node %s is the primary node of"
10900 " instance %s, cannot use it as"
10902 (self.op.remote_node, i.name),
10903 errors.ECODE_INVAL)
10905 def Exec(self, feedback_fn):
10906 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10908 if not self.instance_names:
10909 # No instances to evacuate
10912 elif self.op.iallocator is not None:
10913 # TODO: Implement relocation to other group
10914 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10915 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10916 instances=list(self.instance_names))
10918 ial.Run(self.op.iallocator)
10920 if not ial.success:
10921 raise errors.OpPrereqError("Can't compute node evacuation using"
10922 " iallocator '%s': %s" %
10923 (self.op.iallocator, ial.info),
10924 errors.ECODE_NORES)
10926 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10928 elif self.op.remote_node is not None:
10929 assert self.op.mode == constants.NODE_EVAC_SEC
10931 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10932 remote_node=self.op.remote_node,
10934 mode=constants.REPLACE_DISK_CHG,
10935 early_release=self.op.early_release)]
10936 for instance_name in self.instance_names
10940 raise errors.ProgrammerError("No iallocator or remote node")
10942 return ResultWithJobs(jobs)
10945 def _SetOpEarlyRelease(early_release, op):
10946 """Sets C{early_release} flag on opcodes if available.
10950 op.early_release = early_release
10951 except AttributeError:
10952 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10957 def _NodeEvacDest(use_nodes, group, nodes):
10958 """Returns group or nodes depending on caller's choice.
10962 return utils.CommaJoin(nodes)
10967 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10968 """Unpacks the result of change-group and node-evacuate iallocator requests.
10970 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10971 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10973 @type lu: L{LogicalUnit}
10974 @param lu: Logical unit instance
10975 @type alloc_result: tuple/list
10976 @param alloc_result: Result from iallocator
10977 @type early_release: bool
10978 @param early_release: Whether to release locks early if possible
10979 @type use_nodes: bool
10980 @param use_nodes: Whether to display node names instead of groups
10983 (moved, failed, jobs) = alloc_result
10986 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10987 for (name, reason) in failed)
10988 lu.LogWarning("Unable to evacuate instances %s", failreason)
10989 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10992 lu.LogInfo("Instances to be moved: %s",
10993 utils.CommaJoin("%s (to %s)" %
10994 (name, _NodeEvacDest(use_nodes, group, nodes))
10995 for (name, group, nodes) in moved))
10997 return [map(compat.partial(_SetOpEarlyRelease, early_release),
10998 map(opcodes.OpCode.LoadOpCode, ops))
11002 class LUInstanceGrowDisk(LogicalUnit):
11003 """Grow a disk of an instance.
11006 HPATH = "disk-grow"
11007 HTYPE = constants.HTYPE_INSTANCE
11010 def ExpandNames(self):
11011 self._ExpandAndLockInstance()
11012 self.needed_locks[locking.LEVEL_NODE] = []
11013 self.needed_locks[locking.LEVEL_NODE_RES] = []
11014 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11016 def DeclareLocks(self, level):
11017 if level == locking.LEVEL_NODE:
11018 self._LockInstancesNodes()
11019 elif level == locking.LEVEL_NODE_RES:
11021 self.needed_locks[locking.LEVEL_NODE_RES] = \
11022 self.needed_locks[locking.LEVEL_NODE][:]
11024 def BuildHooksEnv(self):
11025 """Build hooks env.
11027 This runs on the master, the primary and all the secondaries.
11031 "DISK": self.op.disk,
11032 "AMOUNT": self.op.amount,
11034 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11037 def BuildHooksNodes(self):
11038 """Build hooks nodes.
11041 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11044 def CheckPrereq(self):
11045 """Check prerequisites.
11047 This checks that the instance is in the cluster.
11050 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11051 assert instance is not None, \
11052 "Cannot retrieve locked instance %s" % self.op.instance_name
11053 nodenames = list(instance.all_nodes)
11054 for node in nodenames:
11055 _CheckNodeOnline(self, node)
11057 self.instance = instance
11059 if instance.disk_template not in constants.DTS_GROWABLE:
11060 raise errors.OpPrereqError("Instance's disk layout does not support"
11061 " growing", errors.ECODE_INVAL)
11063 self.disk = instance.FindDisk(self.op.disk)
11065 if instance.disk_template not in (constants.DT_FILE,
11066 constants.DT_SHARED_FILE):
11067 # TODO: check the free disk space for file, when that feature will be
11069 _CheckNodesFreeDiskPerVG(self, nodenames,
11070 self.disk.ComputeGrowth(self.op.amount))
11072 def Exec(self, feedback_fn):
11073 """Execute disk grow.
11076 instance = self.instance
11079 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11080 assert (self.owned_locks(locking.LEVEL_NODE) ==
11081 self.owned_locks(locking.LEVEL_NODE_RES))
11083 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11085 raise errors.OpExecError("Cannot activate block device to grow")
11087 feedback_fn("Growing disk %s of instance '%s' by %s" %
11088 (self.op.disk, instance.name,
11089 utils.FormatUnit(self.op.amount, "h")))
11091 # First run all grow ops in dry-run mode
11092 for node in instance.all_nodes:
11093 self.cfg.SetDiskID(disk, node)
11094 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11095 result.Raise("Grow request failed to node %s" % node)
11097 # We know that (as far as we can test) operations across different
11098 # nodes will succeed, time to run it for real
11099 for node in instance.all_nodes:
11100 self.cfg.SetDiskID(disk, node)
11101 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11102 result.Raise("Grow request failed to node %s" % node)
11104 # TODO: Rewrite code to work properly
11105 # DRBD goes into sync mode for a short amount of time after executing the
11106 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11107 # calling "resize" in sync mode fails. Sleeping for a short amount of
11108 # time is a work-around.
11111 disk.RecordGrow(self.op.amount)
11112 self.cfg.Update(instance, feedback_fn)
11114 # Changes have been recorded, release node lock
11115 _ReleaseLocks(self, locking.LEVEL_NODE)
11117 # Downgrade lock while waiting for sync
11118 self.glm.downgrade(locking.LEVEL_INSTANCE)
11120 if self.op.wait_for_sync:
11121 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11123 self.proc.LogWarning("Disk sync-ing has not returned a good"
11124 " status; please check the instance")
11125 if instance.admin_state != constants.ADMINST_UP:
11126 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11127 elif instance.admin_state != constants.ADMINST_UP:
11128 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11129 " not supposed to be running because no wait for"
11130 " sync mode was requested")
11132 assert self.owned_locks(locking.LEVEL_NODE_RES)
11133 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11136 class LUInstanceQueryData(NoHooksLU):
11137 """Query runtime instance data.
11142 def ExpandNames(self):
11143 self.needed_locks = {}
11145 # Use locking if requested or when non-static information is wanted
11146 if not (self.op.static or self.op.use_locking):
11147 self.LogWarning("Non-static data requested, locks need to be acquired")
11148 self.op.use_locking = True
11150 if self.op.instances or not self.op.use_locking:
11151 # Expand instance names right here
11152 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11154 # Will use acquired locks
11155 self.wanted_names = None
11157 if self.op.use_locking:
11158 self.share_locks = _ShareAll()
11160 if self.wanted_names is None:
11161 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11163 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11165 self.needed_locks[locking.LEVEL_NODE] = []
11166 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11168 def DeclareLocks(self, level):
11169 if self.op.use_locking and level == locking.LEVEL_NODE:
11170 self._LockInstancesNodes()
11172 def CheckPrereq(self):
11173 """Check prerequisites.
11175 This only checks the optional instance list against the existing names.
11178 if self.wanted_names is None:
11179 assert self.op.use_locking, "Locking was not used"
11180 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11182 self.wanted_instances = \
11183 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11185 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11186 """Returns the status of a block device
11189 if self.op.static or not node:
11192 self.cfg.SetDiskID(dev, node)
11194 result = self.rpc.call_blockdev_find(node, dev)
11198 result.Raise("Can't compute disk status for %s" % instance_name)
11200 status = result.payload
11204 return (status.dev_path, status.major, status.minor,
11205 status.sync_percent, status.estimated_time,
11206 status.is_degraded, status.ldisk_status)
11208 def _ComputeDiskStatus(self, instance, snode, dev):
11209 """Compute block device status.
11212 if dev.dev_type in constants.LDS_DRBD:
11213 # we change the snode then (otherwise we use the one passed in)
11214 if dev.logical_id[0] == instance.primary_node:
11215 snode = dev.logical_id[1]
11217 snode = dev.logical_id[0]
11219 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11220 instance.name, dev)
11221 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11224 dev_children = map(compat.partial(self._ComputeDiskStatus,
11231 "iv_name": dev.iv_name,
11232 "dev_type": dev.dev_type,
11233 "logical_id": dev.logical_id,
11234 "physical_id": dev.physical_id,
11235 "pstatus": dev_pstatus,
11236 "sstatus": dev_sstatus,
11237 "children": dev_children,
11242 def Exec(self, feedback_fn):
11243 """Gather and return data"""
11246 cluster = self.cfg.GetClusterInfo()
11248 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11249 for i in self.wanted_instances)
11250 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11251 if self.op.static or pnode.offline:
11252 remote_state = None
11254 self.LogWarning("Primary node %s is marked offline, returning static"
11255 " information only for instance %s" %
11256 (pnode.name, instance.name))
11258 remote_info = self.rpc.call_instance_info(instance.primary_node,
11260 instance.hypervisor)
11261 remote_info.Raise("Error checking node %s" % instance.primary_node)
11262 remote_info = remote_info.payload
11263 if remote_info and "state" in remote_info:
11264 remote_state = "up"
11266 if instance.admin_state == constants.ADMINST_UP:
11267 remote_state = "down"
11269 remote_state = instance.admin_state
11271 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11274 result[instance.name] = {
11275 "name": instance.name,
11276 "config_state": instance.admin_state,
11277 "run_state": remote_state,
11278 "pnode": instance.primary_node,
11279 "snodes": instance.secondary_nodes,
11281 # this happens to be the same format used for hooks
11282 "nics": _NICListToTuple(self, instance.nics),
11283 "disk_template": instance.disk_template,
11285 "hypervisor": instance.hypervisor,
11286 "network_port": instance.network_port,
11287 "hv_instance": instance.hvparams,
11288 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11289 "be_instance": instance.beparams,
11290 "be_actual": cluster.FillBE(instance),
11291 "os_instance": instance.osparams,
11292 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11293 "serial_no": instance.serial_no,
11294 "mtime": instance.mtime,
11295 "ctime": instance.ctime,
11296 "uuid": instance.uuid,
11302 class LUInstanceSetParams(LogicalUnit):
11303 """Modifies an instances's parameters.
11306 HPATH = "instance-modify"
11307 HTYPE = constants.HTYPE_INSTANCE
11310 def CheckArguments(self):
11311 if not (self.op.nics or self.op.disks or self.op.disk_template or
11312 self.op.hvparams or self.op.beparams or self.op.os_name or
11313 self.op.online_inst or self.op.offline_inst):
11314 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11316 if self.op.hvparams:
11317 _CheckGlobalHvParams(self.op.hvparams)
11321 for disk_op, disk_dict in self.op.disks:
11322 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11323 if disk_op == constants.DDM_REMOVE:
11324 disk_addremove += 1
11326 elif disk_op == constants.DDM_ADD:
11327 disk_addremove += 1
11329 if not isinstance(disk_op, int):
11330 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11331 if not isinstance(disk_dict, dict):
11332 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11333 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11335 if disk_op == constants.DDM_ADD:
11336 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11337 if mode not in constants.DISK_ACCESS_SET:
11338 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11339 errors.ECODE_INVAL)
11340 size = disk_dict.get(constants.IDISK_SIZE, None)
11342 raise errors.OpPrereqError("Required disk parameter size missing",
11343 errors.ECODE_INVAL)
11346 except (TypeError, ValueError), err:
11347 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11348 str(err), errors.ECODE_INVAL)
11349 disk_dict[constants.IDISK_SIZE] = size
11351 # modification of disk
11352 if constants.IDISK_SIZE in disk_dict:
11353 raise errors.OpPrereqError("Disk size change not possible, use"
11354 " grow-disk", errors.ECODE_INVAL)
11356 if disk_addremove > 1:
11357 raise errors.OpPrereqError("Only one disk add or remove operation"
11358 " supported at a time", errors.ECODE_INVAL)
11360 if self.op.disks and self.op.disk_template is not None:
11361 raise errors.OpPrereqError("Disk template conversion and other disk"
11362 " changes not supported at the same time",
11363 errors.ECODE_INVAL)
11365 if (self.op.disk_template and
11366 self.op.disk_template in constants.DTS_INT_MIRROR and
11367 self.op.remote_node is None):
11368 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11369 " one requires specifying a secondary node",
11370 errors.ECODE_INVAL)
11374 for nic_op, nic_dict in self.op.nics:
11375 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11376 if nic_op == constants.DDM_REMOVE:
11379 elif nic_op == constants.DDM_ADD:
11382 if not isinstance(nic_op, int):
11383 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11384 if not isinstance(nic_dict, dict):
11385 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11386 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11388 # nic_dict should be a dict
11389 nic_ip = nic_dict.get(constants.INIC_IP, None)
11390 if nic_ip is not None:
11391 if nic_ip.lower() == constants.VALUE_NONE:
11392 nic_dict[constants.INIC_IP] = None
11394 if not netutils.IPAddress.IsValid(nic_ip):
11395 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11396 errors.ECODE_INVAL)
11398 nic_bridge = nic_dict.get("bridge", None)
11399 nic_link = nic_dict.get(constants.INIC_LINK, None)
11400 if nic_bridge and nic_link:
11401 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11402 " at the same time", errors.ECODE_INVAL)
11403 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11404 nic_dict["bridge"] = None
11405 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11406 nic_dict[constants.INIC_LINK] = None
11408 if nic_op == constants.DDM_ADD:
11409 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11410 if nic_mac is None:
11411 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11413 if constants.INIC_MAC in nic_dict:
11414 nic_mac = nic_dict[constants.INIC_MAC]
11415 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11416 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11418 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11419 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11420 " modifying an existing nic",
11421 errors.ECODE_INVAL)
11423 if nic_addremove > 1:
11424 raise errors.OpPrereqError("Only one NIC add or remove operation"
11425 " supported at a time", errors.ECODE_INVAL)
11427 def ExpandNames(self):
11428 self._ExpandAndLockInstance()
11429 # Can't even acquire node locks in shared mode as upcoming changes in
11430 # Ganeti 2.6 will start to modify the node object on disk conversion
11431 self.needed_locks[locking.LEVEL_NODE] = []
11432 self.needed_locks[locking.LEVEL_NODE_RES] = []
11433 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11435 def DeclareLocks(self, level):
11436 if level == locking.LEVEL_NODE:
11437 self._LockInstancesNodes()
11438 if self.op.disk_template and self.op.remote_node:
11439 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11440 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11441 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11443 self.needed_locks[locking.LEVEL_NODE_RES] = \
11444 self.needed_locks[locking.LEVEL_NODE][:]
11446 def BuildHooksEnv(self):
11447 """Build hooks env.
11449 This runs on the master, primary and secondaries.
11453 if constants.BE_MINMEM in self.be_new:
11454 args["minmem"] = self.be_new[constants.BE_MINMEM]
11455 if constants.BE_MAXMEM in self.be_new:
11456 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11457 if constants.BE_VCPUS in self.be_new:
11458 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11459 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11460 # information at all.
11463 nic_override = dict(self.op.nics)
11464 for idx, nic in enumerate(self.instance.nics):
11465 if idx in nic_override:
11466 this_nic_override = nic_override[idx]
11468 this_nic_override = {}
11469 if constants.INIC_IP in this_nic_override:
11470 ip = this_nic_override[constants.INIC_IP]
11473 if constants.INIC_MAC in this_nic_override:
11474 mac = this_nic_override[constants.INIC_MAC]
11477 if idx in self.nic_pnew:
11478 nicparams = self.nic_pnew[idx]
11480 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11481 mode = nicparams[constants.NIC_MODE]
11482 link = nicparams[constants.NIC_LINK]
11483 args["nics"].append((ip, mac, mode, link))
11484 if constants.DDM_ADD in nic_override:
11485 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11486 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11487 nicparams = self.nic_pnew[constants.DDM_ADD]
11488 mode = nicparams[constants.NIC_MODE]
11489 link = nicparams[constants.NIC_LINK]
11490 args["nics"].append((ip, mac, mode, link))
11491 elif constants.DDM_REMOVE in nic_override:
11492 del args["nics"][-1]
11494 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11495 if self.op.disk_template:
11496 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11500 def BuildHooksNodes(self):
11501 """Build hooks nodes.
11504 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11507 def CheckPrereq(self):
11508 """Check prerequisites.
11510 This only checks the instance list against the existing names.
11513 # checking the new params on the primary/secondary nodes
11515 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11516 cluster = self.cluster = self.cfg.GetClusterInfo()
11517 assert self.instance is not None, \
11518 "Cannot retrieve locked instance %s" % self.op.instance_name
11519 pnode = instance.primary_node
11520 nodelist = list(instance.all_nodes)
11521 pnode_info = self.cfg.GetNodeInfo(pnode)
11522 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11525 if self.op.os_name and not self.op.force:
11526 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11527 self.op.force_variant)
11528 instance_os = self.op.os_name
11530 instance_os = instance.os
11532 if self.op.disk_template:
11533 if instance.disk_template == self.op.disk_template:
11534 raise errors.OpPrereqError("Instance already has disk template %s" %
11535 instance.disk_template, errors.ECODE_INVAL)
11537 if (instance.disk_template,
11538 self.op.disk_template) not in self._DISK_CONVERSIONS:
11539 raise errors.OpPrereqError("Unsupported disk template conversion from"
11540 " %s to %s" % (instance.disk_template,
11541 self.op.disk_template),
11542 errors.ECODE_INVAL)
11543 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11544 msg="cannot change disk template")
11545 if self.op.disk_template in constants.DTS_INT_MIRROR:
11546 if self.op.remote_node == pnode:
11547 raise errors.OpPrereqError("Given new secondary node %s is the same"
11548 " as the primary node of the instance" %
11549 self.op.remote_node, errors.ECODE_STATE)
11550 _CheckNodeOnline(self, self.op.remote_node)
11551 _CheckNodeNotDrained(self, self.op.remote_node)
11552 # FIXME: here we assume that the old instance type is DT_PLAIN
11553 assert instance.disk_template == constants.DT_PLAIN
11554 disks = [{constants.IDISK_SIZE: d.size,
11555 constants.IDISK_VG: d.logical_id[0]}
11556 for d in instance.disks]
11557 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11558 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11560 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11561 if pnode_info.group != snode_info.group:
11562 self.LogWarning("The primary and secondary nodes are in two"
11563 " different node groups; the disk parameters"
11564 " from the first disk's node group will be"
11567 # hvparams processing
11568 if self.op.hvparams:
11569 hv_type = instance.hypervisor
11570 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11571 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11572 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11575 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11576 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11577 self.hv_proposed = self.hv_new = hv_new # the new actual values
11578 self.hv_inst = i_hvdict # the new dict (without defaults)
11580 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11582 self.hv_new = self.hv_inst = {}
11584 # beparams processing
11585 if self.op.beparams:
11586 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11588 objects.UpgradeBeParams(i_bedict)
11589 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11590 be_new = cluster.SimpleFillBE(i_bedict)
11591 self.be_proposed = self.be_new = be_new # the new actual values
11592 self.be_inst = i_bedict # the new dict (without defaults)
11594 self.be_new = self.be_inst = {}
11595 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11596 be_old = cluster.FillBE(instance)
11598 # CPU param validation -- checking every time a paramtere is
11599 # changed to cover all cases where either CPU mask or vcpus have
11601 if (constants.BE_VCPUS in self.be_proposed and
11602 constants.HV_CPU_MASK in self.hv_proposed):
11604 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11605 # Verify mask is consistent with number of vCPUs. Can skip this
11606 # test if only 1 entry in the CPU mask, which means same mask
11607 # is applied to all vCPUs.
11608 if (len(cpu_list) > 1 and
11609 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11610 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11612 (self.be_proposed[constants.BE_VCPUS],
11613 self.hv_proposed[constants.HV_CPU_MASK]),
11614 errors.ECODE_INVAL)
11616 # Only perform this test if a new CPU mask is given
11617 if constants.HV_CPU_MASK in self.hv_new:
11618 # Calculate the largest CPU number requested
11619 max_requested_cpu = max(map(max, cpu_list))
11620 # Check that all of the instance's nodes have enough physical CPUs to
11621 # satisfy the requested CPU mask
11622 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11623 max_requested_cpu + 1, instance.hypervisor)
11625 # osparams processing
11626 if self.op.osparams:
11627 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11628 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11629 self.os_inst = i_osdict # the new dict (without defaults)
11635 #TODO(dynmem): do the appropriate check involving MINMEM
11636 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11637 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11638 mem_check_list = [pnode]
11639 if be_new[constants.BE_AUTO_BALANCE]:
11640 # either we changed auto_balance to yes or it was from before
11641 mem_check_list.extend(instance.secondary_nodes)
11642 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11643 instance.hypervisor)
11644 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11645 [instance.hypervisor])
11646 pninfo = nodeinfo[pnode]
11647 msg = pninfo.fail_msg
11649 # Assume the primary node is unreachable and go ahead
11650 self.warn.append("Can't get info from primary node %s: %s" %
11653 (_, _, (pnhvinfo, )) = pninfo.payload
11654 if not isinstance(pnhvinfo.get("memory_free", None), int):
11655 self.warn.append("Node data from primary node %s doesn't contain"
11656 " free memory information" % pnode)
11657 elif instance_info.fail_msg:
11658 self.warn.append("Can't get instance runtime information: %s" %
11659 instance_info.fail_msg)
11661 if instance_info.payload:
11662 current_mem = int(instance_info.payload["memory"])
11664 # Assume instance not running
11665 # (there is a slight race condition here, but it's not very
11666 # probable, and we have no other way to check)
11667 # TODO: Describe race condition
11669 #TODO(dynmem): do the appropriate check involving MINMEM
11670 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11671 pnhvinfo["memory_free"])
11673 raise errors.OpPrereqError("This change will prevent the instance"
11674 " from starting, due to %d MB of memory"
11675 " missing on its primary node" %
11677 errors.ECODE_NORES)
11679 if be_new[constants.BE_AUTO_BALANCE]:
11680 for node, nres in nodeinfo.items():
11681 if node not in instance.secondary_nodes:
11683 nres.Raise("Can't get info from secondary node %s" % node,
11684 prereq=True, ecode=errors.ECODE_STATE)
11685 (_, _, (nhvinfo, )) = nres.payload
11686 if not isinstance(nhvinfo.get("memory_free", None), int):
11687 raise errors.OpPrereqError("Secondary node %s didn't return free"
11688 " memory information" % node,
11689 errors.ECODE_STATE)
11690 #TODO(dynmem): do the appropriate check involving MINMEM
11691 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11692 raise errors.OpPrereqError("This change will prevent the instance"
11693 " from failover to its secondary node"
11694 " %s, due to not enough memory" % node,
11695 errors.ECODE_STATE)
11699 self.nic_pinst = {}
11700 for nic_op, nic_dict in self.op.nics:
11701 if nic_op == constants.DDM_REMOVE:
11702 if not instance.nics:
11703 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11704 errors.ECODE_INVAL)
11706 if nic_op != constants.DDM_ADD:
11708 if not instance.nics:
11709 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11710 " no NICs" % nic_op,
11711 errors.ECODE_INVAL)
11712 if nic_op < 0 or nic_op >= len(instance.nics):
11713 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11715 (nic_op, len(instance.nics) - 1),
11716 errors.ECODE_INVAL)
11717 old_nic_params = instance.nics[nic_op].nicparams
11718 old_nic_ip = instance.nics[nic_op].ip
11720 old_nic_params = {}
11723 update_params_dict = dict([(key, nic_dict[key])
11724 for key in constants.NICS_PARAMETERS
11725 if key in nic_dict])
11727 if "bridge" in nic_dict:
11728 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11730 new_nic_params = _GetUpdatedParams(old_nic_params,
11731 update_params_dict)
11732 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11733 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11734 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11735 self.nic_pinst[nic_op] = new_nic_params
11736 self.nic_pnew[nic_op] = new_filled_nic_params
11737 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11739 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11740 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11741 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11743 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11745 self.warn.append(msg)
11747 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11748 if new_nic_mode == constants.NIC_MODE_ROUTED:
11749 if constants.INIC_IP in nic_dict:
11750 nic_ip = nic_dict[constants.INIC_IP]
11752 nic_ip = old_nic_ip
11754 raise errors.OpPrereqError("Cannot set the nic ip to None"
11755 " on a routed nic", errors.ECODE_INVAL)
11756 if constants.INIC_MAC in nic_dict:
11757 nic_mac = nic_dict[constants.INIC_MAC]
11758 if nic_mac is None:
11759 raise errors.OpPrereqError("Cannot set the nic mac to None",
11760 errors.ECODE_INVAL)
11761 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11762 # otherwise generate the mac
11763 nic_dict[constants.INIC_MAC] = \
11764 self.cfg.GenerateMAC(self.proc.GetECId())
11766 # or validate/reserve the current one
11768 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11769 except errors.ReservationError:
11770 raise errors.OpPrereqError("MAC address %s already in use"
11771 " in cluster" % nic_mac,
11772 errors.ECODE_NOTUNIQUE)
11775 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11776 raise errors.OpPrereqError("Disk operations not supported for"
11777 " diskless instances",
11778 errors.ECODE_INVAL)
11779 for disk_op, _ in self.op.disks:
11780 if disk_op == constants.DDM_REMOVE:
11781 if len(instance.disks) == 1:
11782 raise errors.OpPrereqError("Cannot remove the last disk of"
11783 " an instance", errors.ECODE_INVAL)
11784 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11785 msg="cannot remove disks")
11787 if (disk_op == constants.DDM_ADD and
11788 len(instance.disks) >= constants.MAX_DISKS):
11789 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11790 " add more" % constants.MAX_DISKS,
11791 errors.ECODE_STATE)
11792 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11794 if disk_op < 0 or disk_op >= len(instance.disks):
11795 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11797 (disk_op, len(instance.disks)),
11798 errors.ECODE_INVAL)
11800 # disabling the instance
11801 if self.op.offline_inst:
11802 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11803 msg="cannot change instance state to offline")
11805 # enabling the instance
11806 if self.op.online_inst:
11807 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11808 msg="cannot make instance go online")
11810 def _ConvertPlainToDrbd(self, feedback_fn):
11811 """Converts an instance from plain to drbd.
11814 feedback_fn("Converting template to drbd")
11815 instance = self.instance
11816 pnode = instance.primary_node
11817 snode = self.op.remote_node
11819 assert instance.disk_template == constants.DT_PLAIN
11821 # create a fake disk info for _GenerateDiskTemplate
11822 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11823 constants.IDISK_VG: d.logical_id[0]}
11824 for d in instance.disks]
11825 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11826 instance.name, pnode, [snode],
11827 disk_info, None, None, 0, feedback_fn,
11829 info = _GetInstanceInfoText(instance)
11830 feedback_fn("Creating aditional volumes...")
11831 # first, create the missing data and meta devices
11832 for disk in new_disks:
11833 # unfortunately this is... not too nice
11834 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11836 for child in disk.children:
11837 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11838 # at this stage, all new LVs have been created, we can rename the
11840 feedback_fn("Renaming original volumes...")
11841 rename_list = [(o, n.children[0].logical_id)
11842 for (o, n) in zip(instance.disks, new_disks)]
11843 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11844 result.Raise("Failed to rename original LVs")
11846 feedback_fn("Initializing DRBD devices...")
11847 # all child devices are in place, we can now create the DRBD devices
11848 for disk in new_disks:
11849 for node in [pnode, snode]:
11850 f_create = node == pnode
11851 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11853 # at this point, the instance has been modified
11854 instance.disk_template = constants.DT_DRBD8
11855 instance.disks = new_disks
11856 self.cfg.Update(instance, feedback_fn)
11858 # Release node locks while waiting for sync
11859 _ReleaseLocks(self, locking.LEVEL_NODE)
11861 # disks are created, waiting for sync
11862 disk_abort = not _WaitForSync(self, instance,
11863 oneshot=not self.op.wait_for_sync)
11865 raise errors.OpExecError("There are some degraded disks for"
11866 " this instance, please cleanup manually")
11868 # Node resource locks will be released by caller
11870 def _ConvertDrbdToPlain(self, feedback_fn):
11871 """Converts an instance from drbd to plain.
11874 instance = self.instance
11876 assert len(instance.secondary_nodes) == 1
11877 assert instance.disk_template == constants.DT_DRBD8
11879 pnode = instance.primary_node
11880 snode = instance.secondary_nodes[0]
11881 feedback_fn("Converting template to plain")
11883 old_disks = instance.disks
11884 new_disks = [d.children[0] for d in old_disks]
11886 # copy over size and mode
11887 for parent, child in zip(old_disks, new_disks):
11888 child.size = parent.size
11889 child.mode = parent.mode
11891 # update instance structure
11892 instance.disks = new_disks
11893 instance.disk_template = constants.DT_PLAIN
11894 self.cfg.Update(instance, feedback_fn)
11896 # Release locks in case removing disks takes a while
11897 _ReleaseLocks(self, locking.LEVEL_NODE)
11899 feedback_fn("Removing volumes on the secondary node...")
11900 for disk in old_disks:
11901 self.cfg.SetDiskID(disk, snode)
11902 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11904 self.LogWarning("Could not remove block device %s on node %s,"
11905 " continuing anyway: %s", disk.iv_name, snode, msg)
11907 feedback_fn("Removing unneeded volumes on the primary node...")
11908 for idx, disk in enumerate(old_disks):
11909 meta = disk.children[1]
11910 self.cfg.SetDiskID(meta, pnode)
11911 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11913 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11914 " continuing anyway: %s", idx, pnode, msg)
11916 # this is a DRBD disk, return its port to the pool
11917 for disk in old_disks:
11918 tcp_port = disk.logical_id[2]
11919 self.cfg.AddTcpUdpPort(tcp_port)
11921 # Node resource locks will be released by caller
11923 def Exec(self, feedback_fn):
11924 """Modifies an instance.
11926 All parameters take effect only at the next restart of the instance.
11929 # Process here the warnings from CheckPrereq, as we don't have a
11930 # feedback_fn there.
11931 for warn in self.warn:
11932 feedback_fn("WARNING: %s" % warn)
11934 assert ((self.op.disk_template is None) ^
11935 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11936 "Not owning any node resource locks"
11939 instance = self.instance
11941 for disk_op, disk_dict in self.op.disks:
11942 if disk_op == constants.DDM_REMOVE:
11943 # remove the last disk
11944 device = instance.disks.pop()
11945 device_idx = len(instance.disks)
11946 for node, disk in device.ComputeNodeTree(instance.primary_node):
11947 self.cfg.SetDiskID(disk, node)
11948 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11950 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11951 " continuing anyway", device_idx, node, msg)
11952 result.append(("disk/%d" % device_idx, "remove"))
11954 # if this is a DRBD disk, return its port to the pool
11955 if device.dev_type in constants.LDS_DRBD:
11956 tcp_port = device.logical_id[2]
11957 self.cfg.AddTcpUdpPort(tcp_port)
11958 elif disk_op == constants.DDM_ADD:
11960 if instance.disk_template in (constants.DT_FILE,
11961 constants.DT_SHARED_FILE):
11962 file_driver, file_path = instance.disks[0].logical_id
11963 file_path = os.path.dirname(file_path)
11965 file_driver = file_path = None
11966 disk_idx_base = len(instance.disks)
11967 new_disk = _GenerateDiskTemplate(self,
11968 instance.disk_template,
11969 instance.name, instance.primary_node,
11970 instance.secondary_nodes,
11976 self.diskparams)[0]
11977 instance.disks.append(new_disk)
11978 info = _GetInstanceInfoText(instance)
11980 logging.info("Creating volume %s for instance %s",
11981 new_disk.iv_name, instance.name)
11982 # Note: this needs to be kept in sync with _CreateDisks
11984 for node in instance.all_nodes:
11985 f_create = node == instance.primary_node
11987 _CreateBlockDev(self, node, instance, new_disk,
11988 f_create, info, f_create)
11989 except errors.OpExecError, err:
11990 self.LogWarning("Failed to create volume %s (%s) on"
11992 new_disk.iv_name, new_disk, node, err)
11993 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11994 (new_disk.size, new_disk.mode)))
11996 # change a given disk
11997 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11998 result.append(("disk.mode/%d" % disk_op,
11999 disk_dict[constants.IDISK_MODE]))
12001 if self.op.disk_template:
12003 check_nodes = set(instance.all_nodes)
12004 if self.op.remote_node:
12005 check_nodes.add(self.op.remote_node)
12006 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12007 owned = self.owned_locks(level)
12008 assert not (check_nodes - owned), \
12009 ("Not owning the correct locks, owning %r, expected at least %r" %
12010 (owned, check_nodes))
12012 r_shut = _ShutdownInstanceDisks(self, instance)
12014 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12015 " proceed with disk template conversion")
12016 mode = (instance.disk_template, self.op.disk_template)
12018 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12020 self.cfg.ReleaseDRBDMinors(instance.name)
12022 result.append(("disk_template", self.op.disk_template))
12024 assert instance.disk_template == self.op.disk_template, \
12025 ("Expected disk template '%s', found '%s'" %
12026 (self.op.disk_template, instance.disk_template))
12028 # Release node and resource locks if there are any (they might already have
12029 # been released during disk conversion)
12030 _ReleaseLocks(self, locking.LEVEL_NODE)
12031 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12034 for nic_op, nic_dict in self.op.nics:
12035 if nic_op == constants.DDM_REMOVE:
12036 # remove the last nic
12037 del instance.nics[-1]
12038 result.append(("nic.%d" % len(instance.nics), "remove"))
12039 elif nic_op == constants.DDM_ADD:
12040 # mac and bridge should be set, by now
12041 mac = nic_dict[constants.INIC_MAC]
12042 ip = nic_dict.get(constants.INIC_IP, None)
12043 nicparams = self.nic_pinst[constants.DDM_ADD]
12044 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12045 instance.nics.append(new_nic)
12046 result.append(("nic.%d" % (len(instance.nics) - 1),
12047 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12048 (new_nic.mac, new_nic.ip,
12049 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12050 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12053 for key in (constants.INIC_MAC, constants.INIC_IP):
12054 if key in nic_dict:
12055 setattr(instance.nics[nic_op], key, nic_dict[key])
12056 if nic_op in self.nic_pinst:
12057 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12058 for key, val in nic_dict.iteritems():
12059 result.append(("nic.%s/%d" % (key, nic_op), val))
12062 if self.op.hvparams:
12063 instance.hvparams = self.hv_inst
12064 for key, val in self.op.hvparams.iteritems():
12065 result.append(("hv/%s" % key, val))
12068 if self.op.beparams:
12069 instance.beparams = self.be_inst
12070 for key, val in self.op.beparams.iteritems():
12071 result.append(("be/%s" % key, val))
12074 if self.op.os_name:
12075 instance.os = self.op.os_name
12078 if self.op.osparams:
12079 instance.osparams = self.os_inst
12080 for key, val in self.op.osparams.iteritems():
12081 result.append(("os/%s" % key, val))
12083 # online/offline instance
12084 if self.op.online_inst:
12085 self.cfg.MarkInstanceDown(instance.name)
12086 result.append(("admin_state", constants.ADMINST_DOWN))
12087 if self.op.offline_inst:
12088 self.cfg.MarkInstanceOffline(instance.name)
12089 result.append(("admin_state", constants.ADMINST_OFFLINE))
12091 self.cfg.Update(instance, feedback_fn)
12093 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12094 self.owned_locks(locking.LEVEL_NODE)), \
12095 "All node locks should have been released by now"
12099 _DISK_CONVERSIONS = {
12100 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12101 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12105 class LUInstanceChangeGroup(LogicalUnit):
12106 HPATH = "instance-change-group"
12107 HTYPE = constants.HTYPE_INSTANCE
12110 def ExpandNames(self):
12111 self.share_locks = _ShareAll()
12112 self.needed_locks = {
12113 locking.LEVEL_NODEGROUP: [],
12114 locking.LEVEL_NODE: [],
12117 self._ExpandAndLockInstance()
12119 if self.op.target_groups:
12120 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12121 self.op.target_groups)
12123 self.req_target_uuids = None
12125 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12127 def DeclareLocks(self, level):
12128 if level == locking.LEVEL_NODEGROUP:
12129 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12131 if self.req_target_uuids:
12132 lock_groups = set(self.req_target_uuids)
12134 # Lock all groups used by instance optimistically; this requires going
12135 # via the node before it's locked, requiring verification later on
12136 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12137 lock_groups.update(instance_groups)
12139 # No target groups, need to lock all of them
12140 lock_groups = locking.ALL_SET
12142 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12144 elif level == locking.LEVEL_NODE:
12145 if self.req_target_uuids:
12146 # Lock all nodes used by instances
12147 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12148 self._LockInstancesNodes()
12150 # Lock all nodes in all potential target groups
12151 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12152 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12153 member_nodes = [node_name
12154 for group in lock_groups
12155 for node_name in self.cfg.GetNodeGroup(group).members]
12156 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12158 # Lock all nodes as all groups are potential targets
12159 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12161 def CheckPrereq(self):
12162 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12163 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12164 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12166 assert (self.req_target_uuids is None or
12167 owned_groups.issuperset(self.req_target_uuids))
12168 assert owned_instances == set([self.op.instance_name])
12170 # Get instance information
12171 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12173 # Check if node groups for locked instance are still correct
12174 assert owned_nodes.issuperset(self.instance.all_nodes), \
12175 ("Instance %s's nodes changed while we kept the lock" %
12176 self.op.instance_name)
12178 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12181 if self.req_target_uuids:
12182 # User requested specific target groups
12183 self.target_uuids = self.req_target_uuids
12185 # All groups except those used by the instance are potential targets
12186 self.target_uuids = owned_groups - inst_groups
12188 conflicting_groups = self.target_uuids & inst_groups
12189 if conflicting_groups:
12190 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12191 " used by the instance '%s'" %
12192 (utils.CommaJoin(conflicting_groups),
12193 self.op.instance_name),
12194 errors.ECODE_INVAL)
12196 if not self.target_uuids:
12197 raise errors.OpPrereqError("There are no possible target groups",
12198 errors.ECODE_INVAL)
12200 def BuildHooksEnv(self):
12201 """Build hooks env.
12204 assert self.target_uuids
12207 "TARGET_GROUPS": " ".join(self.target_uuids),
12210 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12214 def BuildHooksNodes(self):
12215 """Build hooks nodes.
12218 mn = self.cfg.GetMasterNode()
12219 return ([mn], [mn])
12221 def Exec(self, feedback_fn):
12222 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12224 assert instances == [self.op.instance_name], "Instance not locked"
12226 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12227 instances=instances, target_groups=list(self.target_uuids))
12229 ial.Run(self.op.iallocator)
12231 if not ial.success:
12232 raise errors.OpPrereqError("Can't compute solution for changing group of"
12233 " instance '%s' using iallocator '%s': %s" %
12234 (self.op.instance_name, self.op.iallocator,
12236 errors.ECODE_NORES)
12238 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12240 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12241 " instance '%s'", len(jobs), self.op.instance_name)
12243 return ResultWithJobs(jobs)
12246 class LUBackupQuery(NoHooksLU):
12247 """Query the exports list
12252 def ExpandNames(self):
12253 self.needed_locks = {}
12254 self.share_locks[locking.LEVEL_NODE] = 1
12255 if not self.op.nodes:
12256 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12258 self.needed_locks[locking.LEVEL_NODE] = \
12259 _GetWantedNodes(self, self.op.nodes)
12261 def Exec(self, feedback_fn):
12262 """Compute the list of all the exported system images.
12265 @return: a dictionary with the structure node->(export-list)
12266 where export-list is a list of the instances exported on
12270 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12271 rpcresult = self.rpc.call_export_list(self.nodes)
12273 for node in rpcresult:
12274 if rpcresult[node].fail_msg:
12275 result[node] = False
12277 result[node] = rpcresult[node].payload
12282 class LUBackupPrepare(NoHooksLU):
12283 """Prepares an instance for an export and returns useful information.
12288 def ExpandNames(self):
12289 self._ExpandAndLockInstance()
12291 def CheckPrereq(self):
12292 """Check prerequisites.
12295 instance_name = self.op.instance_name
12297 self.instance = self.cfg.GetInstanceInfo(instance_name)
12298 assert self.instance is not None, \
12299 "Cannot retrieve locked instance %s" % self.op.instance_name
12300 _CheckNodeOnline(self, self.instance.primary_node)
12302 self._cds = _GetClusterDomainSecret()
12304 def Exec(self, feedback_fn):
12305 """Prepares an instance for an export.
12308 instance = self.instance
12310 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12311 salt = utils.GenerateSecret(8)
12313 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12314 result = self.rpc.call_x509_cert_create(instance.primary_node,
12315 constants.RIE_CERT_VALIDITY)
12316 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12318 (name, cert_pem) = result.payload
12320 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12324 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12325 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12327 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12333 class LUBackupExport(LogicalUnit):
12334 """Export an instance to an image in the cluster.
12337 HPATH = "instance-export"
12338 HTYPE = constants.HTYPE_INSTANCE
12341 def CheckArguments(self):
12342 """Check the arguments.
12345 self.x509_key_name = self.op.x509_key_name
12346 self.dest_x509_ca_pem = self.op.destination_x509_ca
12348 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12349 if not self.x509_key_name:
12350 raise errors.OpPrereqError("Missing X509 key name for encryption",
12351 errors.ECODE_INVAL)
12353 if not self.dest_x509_ca_pem:
12354 raise errors.OpPrereqError("Missing destination X509 CA",
12355 errors.ECODE_INVAL)
12357 def ExpandNames(self):
12358 self._ExpandAndLockInstance()
12360 # Lock all nodes for local exports
12361 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12362 # FIXME: lock only instance primary and destination node
12364 # Sad but true, for now we have do lock all nodes, as we don't know where
12365 # the previous export might be, and in this LU we search for it and
12366 # remove it from its current node. In the future we could fix this by:
12367 # - making a tasklet to search (share-lock all), then create the
12368 # new one, then one to remove, after
12369 # - removing the removal operation altogether
12370 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12372 def DeclareLocks(self, level):
12373 """Last minute lock declaration."""
12374 # All nodes are locked anyway, so nothing to do here.
12376 def BuildHooksEnv(self):
12377 """Build hooks env.
12379 This will run on the master, primary node and target node.
12383 "EXPORT_MODE": self.op.mode,
12384 "EXPORT_NODE": self.op.target_node,
12385 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12386 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12387 # TODO: Generic function for boolean env variables
12388 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12391 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12395 def BuildHooksNodes(self):
12396 """Build hooks nodes.
12399 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12401 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12402 nl.append(self.op.target_node)
12406 def CheckPrereq(self):
12407 """Check prerequisites.
12409 This checks that the instance and node names are valid.
12412 instance_name = self.op.instance_name
12414 self.instance = self.cfg.GetInstanceInfo(instance_name)
12415 assert self.instance is not None, \
12416 "Cannot retrieve locked instance %s" % self.op.instance_name
12417 _CheckNodeOnline(self, self.instance.primary_node)
12419 if (self.op.remove_instance and
12420 self.instance.admin_state == constants.ADMINST_UP and
12421 not self.op.shutdown):
12422 raise errors.OpPrereqError("Can not remove instance without shutting it"
12425 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12426 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12427 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12428 assert self.dst_node is not None
12430 _CheckNodeOnline(self, self.dst_node.name)
12431 _CheckNodeNotDrained(self, self.dst_node.name)
12434 self.dest_disk_info = None
12435 self.dest_x509_ca = None
12437 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12438 self.dst_node = None
12440 if len(self.op.target_node) != len(self.instance.disks):
12441 raise errors.OpPrereqError(("Received destination information for %s"
12442 " disks, but instance %s has %s disks") %
12443 (len(self.op.target_node), instance_name,
12444 len(self.instance.disks)),
12445 errors.ECODE_INVAL)
12447 cds = _GetClusterDomainSecret()
12449 # Check X509 key name
12451 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12452 except (TypeError, ValueError), err:
12453 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12455 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12456 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12457 errors.ECODE_INVAL)
12459 # Load and verify CA
12461 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12462 except OpenSSL.crypto.Error, err:
12463 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12464 (err, ), errors.ECODE_INVAL)
12466 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12467 if errcode is not None:
12468 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12469 (msg, ), errors.ECODE_INVAL)
12471 self.dest_x509_ca = cert
12473 # Verify target information
12475 for idx, disk_data in enumerate(self.op.target_node):
12477 (host, port, magic) = \
12478 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12479 except errors.GenericError, err:
12480 raise errors.OpPrereqError("Target info for disk %s: %s" %
12481 (idx, err), errors.ECODE_INVAL)
12483 disk_info.append((host, port, magic))
12485 assert len(disk_info) == len(self.op.target_node)
12486 self.dest_disk_info = disk_info
12489 raise errors.ProgrammerError("Unhandled export mode %r" %
12492 # instance disk type verification
12493 # TODO: Implement export support for file-based disks
12494 for disk in self.instance.disks:
12495 if disk.dev_type == constants.LD_FILE:
12496 raise errors.OpPrereqError("Export not supported for instances with"
12497 " file-based disks", errors.ECODE_INVAL)
12499 def _CleanupExports(self, feedback_fn):
12500 """Removes exports of current instance from all other nodes.
12502 If an instance in a cluster with nodes A..D was exported to node C, its
12503 exports will be removed from the nodes A, B and D.
12506 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12508 nodelist = self.cfg.GetNodeList()
12509 nodelist.remove(self.dst_node.name)
12511 # on one-node clusters nodelist will be empty after the removal
12512 # if we proceed the backup would be removed because OpBackupQuery
12513 # substitutes an empty list with the full cluster node list.
12514 iname = self.instance.name
12516 feedback_fn("Removing old exports for instance %s" % iname)
12517 exportlist = self.rpc.call_export_list(nodelist)
12518 for node in exportlist:
12519 if exportlist[node].fail_msg:
12521 if iname in exportlist[node].payload:
12522 msg = self.rpc.call_export_remove(node, iname).fail_msg
12524 self.LogWarning("Could not remove older export for instance %s"
12525 " on node %s: %s", iname, node, msg)
12527 def Exec(self, feedback_fn):
12528 """Export an instance to an image in the cluster.
12531 assert self.op.mode in constants.EXPORT_MODES
12533 instance = self.instance
12534 src_node = instance.primary_node
12536 if self.op.shutdown:
12537 # shutdown the instance, but not the disks
12538 feedback_fn("Shutting down instance %s" % instance.name)
12539 result = self.rpc.call_instance_shutdown(src_node, instance,
12540 self.op.shutdown_timeout)
12541 # TODO: Maybe ignore failures if ignore_remove_failures is set
12542 result.Raise("Could not shutdown instance %s on"
12543 " node %s" % (instance.name, src_node))
12545 # set the disks ID correctly since call_instance_start needs the
12546 # correct drbd minor to create the symlinks
12547 for disk in instance.disks:
12548 self.cfg.SetDiskID(disk, src_node)
12550 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12553 # Activate the instance disks if we'exporting a stopped instance
12554 feedback_fn("Activating disks for %s" % instance.name)
12555 _StartInstanceDisks(self, instance, None)
12558 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12561 helper.CreateSnapshots()
12563 if (self.op.shutdown and
12564 instance.admin_state == constants.ADMINST_UP and
12565 not self.op.remove_instance):
12566 assert not activate_disks
12567 feedback_fn("Starting instance %s" % instance.name)
12568 result = self.rpc.call_instance_start(src_node,
12569 (instance, None, None), False)
12570 msg = result.fail_msg
12572 feedback_fn("Failed to start instance: %s" % msg)
12573 _ShutdownInstanceDisks(self, instance)
12574 raise errors.OpExecError("Could not start instance: %s" % msg)
12576 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12577 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12578 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12579 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12580 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12582 (key_name, _, _) = self.x509_key_name
12585 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12588 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12589 key_name, dest_ca_pem,
12594 # Check for backwards compatibility
12595 assert len(dresults) == len(instance.disks)
12596 assert compat.all(isinstance(i, bool) for i in dresults), \
12597 "Not all results are boolean: %r" % dresults
12601 feedback_fn("Deactivating disks for %s" % instance.name)
12602 _ShutdownInstanceDisks(self, instance)
12604 if not (compat.all(dresults) and fin_resu):
12607 failures.append("export finalization")
12608 if not compat.all(dresults):
12609 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12611 failures.append("disk export: disk(s) %s" % fdsk)
12613 raise errors.OpExecError("Export failed, errors in %s" %
12614 utils.CommaJoin(failures))
12616 # At this point, the export was successful, we can cleanup/finish
12618 # Remove instance if requested
12619 if self.op.remove_instance:
12620 feedback_fn("Removing instance %s" % instance.name)
12621 _RemoveInstance(self, feedback_fn, instance,
12622 self.op.ignore_remove_failures)
12624 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12625 self._CleanupExports(feedback_fn)
12627 return fin_resu, dresults
12630 class LUBackupRemove(NoHooksLU):
12631 """Remove exports related to the named instance.
12636 def ExpandNames(self):
12637 self.needed_locks = {}
12638 # We need all nodes to be locked in order for RemoveExport to work, but we
12639 # don't need to lock the instance itself, as nothing will happen to it (and
12640 # we can remove exports also for a removed instance)
12641 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12643 def Exec(self, feedback_fn):
12644 """Remove any export.
12647 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12648 # If the instance was not found we'll try with the name that was passed in.
12649 # This will only work if it was an FQDN, though.
12651 if not instance_name:
12653 instance_name = self.op.instance_name
12655 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12656 exportlist = self.rpc.call_export_list(locked_nodes)
12658 for node in exportlist:
12659 msg = exportlist[node].fail_msg
12661 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12663 if instance_name in exportlist[node].payload:
12665 result = self.rpc.call_export_remove(node, instance_name)
12666 msg = result.fail_msg
12668 logging.error("Could not remove export for instance %s"
12669 " on node %s: %s", instance_name, node, msg)
12671 if fqdn_warn and not found:
12672 feedback_fn("Export not found. If trying to remove an export belonging"
12673 " to a deleted instance please use its Fully Qualified"
12677 class LUGroupAdd(LogicalUnit):
12678 """Logical unit for creating node groups.
12681 HPATH = "group-add"
12682 HTYPE = constants.HTYPE_GROUP
12685 def ExpandNames(self):
12686 # We need the new group's UUID here so that we can create and acquire the
12687 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12688 # that it should not check whether the UUID exists in the configuration.
12689 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12690 self.needed_locks = {}
12691 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12693 def CheckPrereq(self):
12694 """Check prerequisites.
12696 This checks that the given group name is not an existing node group
12701 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12702 except errors.OpPrereqError:
12705 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12706 " node group (UUID: %s)" %
12707 (self.op.group_name, existing_uuid),
12708 errors.ECODE_EXISTS)
12710 if self.op.ndparams:
12711 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12713 if self.op.diskparams:
12714 for templ in constants.DISK_TEMPLATES:
12715 if templ not in self.op.diskparams:
12716 self.op.diskparams[templ] = {}
12717 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12719 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12721 def BuildHooksEnv(self):
12722 """Build hooks env.
12726 "GROUP_NAME": self.op.group_name,
12729 def BuildHooksNodes(self):
12730 """Build hooks nodes.
12733 mn = self.cfg.GetMasterNode()
12734 return ([mn], [mn])
12736 def Exec(self, feedback_fn):
12737 """Add the node group to the cluster.
12740 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12741 uuid=self.group_uuid,
12742 alloc_policy=self.op.alloc_policy,
12743 ndparams=self.op.ndparams,
12744 diskparams=self.op.diskparams)
12746 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12747 del self.remove_locks[locking.LEVEL_NODEGROUP]
12750 class LUGroupAssignNodes(NoHooksLU):
12751 """Logical unit for assigning nodes to groups.
12756 def ExpandNames(self):
12757 # These raise errors.OpPrereqError on their own:
12758 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12759 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12761 # We want to lock all the affected nodes and groups. We have readily
12762 # available the list of nodes, and the *destination* group. To gather the
12763 # list of "source" groups, we need to fetch node information later on.
12764 self.needed_locks = {
12765 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12766 locking.LEVEL_NODE: self.op.nodes,
12769 def DeclareLocks(self, level):
12770 if level == locking.LEVEL_NODEGROUP:
12771 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12773 # Try to get all affected nodes' groups without having the group or node
12774 # lock yet. Needs verification later in the code flow.
12775 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12777 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12779 def CheckPrereq(self):
12780 """Check prerequisites.
12783 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12784 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12785 frozenset(self.op.nodes))
12787 expected_locks = (set([self.group_uuid]) |
12788 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12789 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12790 if actual_locks != expected_locks:
12791 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12792 " current groups are '%s', used to be '%s'" %
12793 (utils.CommaJoin(expected_locks),
12794 utils.CommaJoin(actual_locks)))
12796 self.node_data = self.cfg.GetAllNodesInfo()
12797 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12798 instance_data = self.cfg.GetAllInstancesInfo()
12800 if self.group is None:
12801 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12802 (self.op.group_name, self.group_uuid))
12804 (new_splits, previous_splits) = \
12805 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12806 for node in self.op.nodes],
12807 self.node_data, instance_data)
12810 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12812 if not self.op.force:
12813 raise errors.OpExecError("The following instances get split by this"
12814 " change and --force was not given: %s" %
12817 self.LogWarning("This operation will split the following instances: %s",
12820 if previous_splits:
12821 self.LogWarning("In addition, these already-split instances continue"
12822 " to be split across groups: %s",
12823 utils.CommaJoin(utils.NiceSort(previous_splits)))
12825 def Exec(self, feedback_fn):
12826 """Assign nodes to a new group.
12829 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12831 self.cfg.AssignGroupNodes(mods)
12834 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12835 """Check for split instances after a node assignment.
12837 This method considers a series of node assignments as an atomic operation,
12838 and returns information about split instances after applying the set of
12841 In particular, it returns information about newly split instances, and
12842 instances that were already split, and remain so after the change.
12844 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12847 @type changes: list of (node_name, new_group_uuid) pairs.
12848 @param changes: list of node assignments to consider.
12849 @param node_data: a dict with data for all nodes
12850 @param instance_data: a dict with all instances to consider
12851 @rtype: a two-tuple
12852 @return: a list of instances that were previously okay and result split as a
12853 consequence of this change, and a list of instances that were previously
12854 split and this change does not fix.
12857 changed_nodes = dict((node, group) for node, group in changes
12858 if node_data[node].group != group)
12860 all_split_instances = set()
12861 previously_split_instances = set()
12863 def InstanceNodes(instance):
12864 return [instance.primary_node] + list(instance.secondary_nodes)
12866 for inst in instance_data.values():
12867 if inst.disk_template not in constants.DTS_INT_MIRROR:
12870 instance_nodes = InstanceNodes(inst)
12872 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12873 previously_split_instances.add(inst.name)
12875 if len(set(changed_nodes.get(node, node_data[node].group)
12876 for node in instance_nodes)) > 1:
12877 all_split_instances.add(inst.name)
12879 return (list(all_split_instances - previously_split_instances),
12880 list(previously_split_instances & all_split_instances))
12883 class _GroupQuery(_QueryBase):
12884 FIELDS = query.GROUP_FIELDS
12886 def ExpandNames(self, lu):
12887 lu.needed_locks = {}
12889 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12890 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12893 self.wanted = [name_to_uuid[name]
12894 for name in utils.NiceSort(name_to_uuid.keys())]
12896 # Accept names to be either names or UUIDs.
12899 all_uuid = frozenset(self._all_groups.keys())
12901 for name in self.names:
12902 if name in all_uuid:
12903 self.wanted.append(name)
12904 elif name in name_to_uuid:
12905 self.wanted.append(name_to_uuid[name])
12907 missing.append(name)
12910 raise errors.OpPrereqError("Some groups do not exist: %s" %
12911 utils.CommaJoin(missing),
12912 errors.ECODE_NOENT)
12914 def DeclareLocks(self, lu, level):
12917 def _GetQueryData(self, lu):
12918 """Computes the list of node groups and their attributes.
12921 do_nodes = query.GQ_NODE in self.requested_data
12922 do_instances = query.GQ_INST in self.requested_data
12924 group_to_nodes = None
12925 group_to_instances = None
12927 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12928 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12929 # latter GetAllInstancesInfo() is not enough, for we have to go through
12930 # instance->node. Hence, we will need to process nodes even if we only need
12931 # instance information.
12932 if do_nodes or do_instances:
12933 all_nodes = lu.cfg.GetAllNodesInfo()
12934 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12937 for node in all_nodes.values():
12938 if node.group in group_to_nodes:
12939 group_to_nodes[node.group].append(node.name)
12940 node_to_group[node.name] = node.group
12943 all_instances = lu.cfg.GetAllInstancesInfo()
12944 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12946 for instance in all_instances.values():
12947 node = instance.primary_node
12948 if node in node_to_group:
12949 group_to_instances[node_to_group[node]].append(instance.name)
12952 # Do not pass on node information if it was not requested.
12953 group_to_nodes = None
12955 return query.GroupQueryData([self._all_groups[uuid]
12956 for uuid in self.wanted],
12957 group_to_nodes, group_to_instances)
12960 class LUGroupQuery(NoHooksLU):
12961 """Logical unit for querying node groups.
12966 def CheckArguments(self):
12967 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12968 self.op.output_fields, False)
12970 def ExpandNames(self):
12971 self.gq.ExpandNames(self)
12973 def DeclareLocks(self, level):
12974 self.gq.DeclareLocks(self, level)
12976 def Exec(self, feedback_fn):
12977 return self.gq.OldStyleQuery(self)
12980 class LUGroupSetParams(LogicalUnit):
12981 """Modifies the parameters of a node group.
12984 HPATH = "group-modify"
12985 HTYPE = constants.HTYPE_GROUP
12988 def CheckArguments(self):
12991 self.op.diskparams,
12992 self.op.alloc_policy,
12997 if all_changes.count(None) == len(all_changes):
12998 raise errors.OpPrereqError("Please pass at least one modification",
12999 errors.ECODE_INVAL)
13001 def ExpandNames(self):
13002 # This raises errors.OpPrereqError on its own:
13003 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13005 self.needed_locks = {
13006 locking.LEVEL_NODEGROUP: [self.group_uuid],
13009 def CheckPrereq(self):
13010 """Check prerequisites.
13013 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13015 if self.group is None:
13016 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13017 (self.op.group_name, self.group_uuid))
13019 if self.op.ndparams:
13020 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13021 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13022 self.new_ndparams = new_ndparams
13024 if self.op.diskparams:
13025 self.new_diskparams = dict()
13026 for templ in constants.DISK_TEMPLATES:
13027 if templ not in self.op.diskparams:
13028 self.op.diskparams[templ] = {}
13029 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13030 self.op.diskparams[templ])
13031 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13032 self.new_diskparams[templ] = new_templ_params
13034 if self.op.hv_state:
13035 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13036 self.group.hv_state_static)
13038 if self.op.disk_state:
13039 self.new_disk_state = \
13040 _MergeAndVerifyDiskState(self.op.disk_state,
13041 self.group.disk_state_static)
13043 def BuildHooksEnv(self):
13044 """Build hooks env.
13048 "GROUP_NAME": self.op.group_name,
13049 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13052 def BuildHooksNodes(self):
13053 """Build hooks nodes.
13056 mn = self.cfg.GetMasterNode()
13057 return ([mn], [mn])
13059 def Exec(self, feedback_fn):
13060 """Modifies the node group.
13065 if self.op.ndparams:
13066 self.group.ndparams = self.new_ndparams
13067 result.append(("ndparams", str(self.group.ndparams)))
13069 if self.op.diskparams:
13070 self.group.diskparams = self.new_diskparams
13071 result.append(("diskparams", str(self.group.diskparams)))
13073 if self.op.alloc_policy:
13074 self.group.alloc_policy = self.op.alloc_policy
13076 if self.op.hv_state:
13077 self.group.hv_state_static = self.new_hv_state
13079 if self.op.disk_state:
13080 self.group.disk_state_static = self.new_disk_state
13082 self.cfg.Update(self.group, feedback_fn)
13086 class LUGroupRemove(LogicalUnit):
13087 HPATH = "group-remove"
13088 HTYPE = constants.HTYPE_GROUP
13091 def ExpandNames(self):
13092 # This will raises errors.OpPrereqError on its own:
13093 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13094 self.needed_locks = {
13095 locking.LEVEL_NODEGROUP: [self.group_uuid],
13098 def CheckPrereq(self):
13099 """Check prerequisites.
13101 This checks that the given group name exists as a node group, that is
13102 empty (i.e., contains no nodes), and that is not the last group of the
13106 # Verify that the group is empty.
13107 group_nodes = [node.name
13108 for node in self.cfg.GetAllNodesInfo().values()
13109 if node.group == self.group_uuid]
13112 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13114 (self.op.group_name,
13115 utils.CommaJoin(utils.NiceSort(group_nodes))),
13116 errors.ECODE_STATE)
13118 # Verify the cluster would not be left group-less.
13119 if len(self.cfg.GetNodeGroupList()) == 1:
13120 raise errors.OpPrereqError("Group '%s' is the only group,"
13121 " cannot be removed" %
13122 self.op.group_name,
13123 errors.ECODE_STATE)
13125 def BuildHooksEnv(self):
13126 """Build hooks env.
13130 "GROUP_NAME": self.op.group_name,
13133 def BuildHooksNodes(self):
13134 """Build hooks nodes.
13137 mn = self.cfg.GetMasterNode()
13138 return ([mn], [mn])
13140 def Exec(self, feedback_fn):
13141 """Remove the node group.
13145 self.cfg.RemoveNodeGroup(self.group_uuid)
13146 except errors.ConfigurationError:
13147 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13148 (self.op.group_name, self.group_uuid))
13150 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13153 class LUGroupRename(LogicalUnit):
13154 HPATH = "group-rename"
13155 HTYPE = constants.HTYPE_GROUP
13158 def ExpandNames(self):
13159 # This raises errors.OpPrereqError on its own:
13160 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13162 self.needed_locks = {
13163 locking.LEVEL_NODEGROUP: [self.group_uuid],
13166 def CheckPrereq(self):
13167 """Check prerequisites.
13169 Ensures requested new name is not yet used.
13173 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13174 except errors.OpPrereqError:
13177 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13178 " node group (UUID: %s)" %
13179 (self.op.new_name, new_name_uuid),
13180 errors.ECODE_EXISTS)
13182 def BuildHooksEnv(self):
13183 """Build hooks env.
13187 "OLD_NAME": self.op.group_name,
13188 "NEW_NAME": self.op.new_name,
13191 def BuildHooksNodes(self):
13192 """Build hooks nodes.
13195 mn = self.cfg.GetMasterNode()
13197 all_nodes = self.cfg.GetAllNodesInfo()
13198 all_nodes.pop(mn, None)
13201 run_nodes.extend(node.name for node in all_nodes.values()
13202 if node.group == self.group_uuid)
13204 return (run_nodes, run_nodes)
13206 def Exec(self, feedback_fn):
13207 """Rename the node group.
13210 group = self.cfg.GetNodeGroup(self.group_uuid)
13213 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13214 (self.op.group_name, self.group_uuid))
13216 group.name = self.op.new_name
13217 self.cfg.Update(group, feedback_fn)
13219 return self.op.new_name
13222 class LUGroupEvacuate(LogicalUnit):
13223 HPATH = "group-evacuate"
13224 HTYPE = constants.HTYPE_GROUP
13227 def ExpandNames(self):
13228 # This raises errors.OpPrereqError on its own:
13229 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13231 if self.op.target_groups:
13232 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13233 self.op.target_groups)
13235 self.req_target_uuids = []
13237 if self.group_uuid in self.req_target_uuids:
13238 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13239 " as a target group (targets are %s)" %
13241 utils.CommaJoin(self.req_target_uuids)),
13242 errors.ECODE_INVAL)
13244 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13246 self.share_locks = _ShareAll()
13247 self.needed_locks = {
13248 locking.LEVEL_INSTANCE: [],
13249 locking.LEVEL_NODEGROUP: [],
13250 locking.LEVEL_NODE: [],
13253 def DeclareLocks(self, level):
13254 if level == locking.LEVEL_INSTANCE:
13255 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13257 # Lock instances optimistically, needs verification once node and group
13258 # locks have been acquired
13259 self.needed_locks[locking.LEVEL_INSTANCE] = \
13260 self.cfg.GetNodeGroupInstances(self.group_uuid)
13262 elif level == locking.LEVEL_NODEGROUP:
13263 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13265 if self.req_target_uuids:
13266 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13268 # Lock all groups used by instances optimistically; this requires going
13269 # via the node before it's locked, requiring verification later on
13270 lock_groups.update(group_uuid
13271 for instance_name in
13272 self.owned_locks(locking.LEVEL_INSTANCE)
13274 self.cfg.GetInstanceNodeGroups(instance_name))
13276 # No target groups, need to lock all of them
13277 lock_groups = locking.ALL_SET
13279 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13281 elif level == locking.LEVEL_NODE:
13282 # This will only lock the nodes in the group to be evacuated which
13283 # contain actual instances
13284 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13285 self._LockInstancesNodes()
13287 # Lock all nodes in group to be evacuated and target groups
13288 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13289 assert self.group_uuid in owned_groups
13290 member_nodes = [node_name
13291 for group in owned_groups
13292 for node_name in self.cfg.GetNodeGroup(group).members]
13293 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13295 def CheckPrereq(self):
13296 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13297 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13298 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13300 assert owned_groups.issuperset(self.req_target_uuids)
13301 assert self.group_uuid in owned_groups
13303 # Check if locked instances are still correct
13304 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13306 # Get instance information
13307 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13309 # Check if node groups for locked instances are still correct
13310 for instance_name in owned_instances:
13311 inst = self.instances[instance_name]
13312 assert owned_nodes.issuperset(inst.all_nodes), \
13313 "Instance %s's nodes changed while we kept the lock" % instance_name
13315 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13318 assert self.group_uuid in inst_groups, \
13319 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13321 if self.req_target_uuids:
13322 # User requested specific target groups
13323 self.target_uuids = self.req_target_uuids
13325 # All groups except the one to be evacuated are potential targets
13326 self.target_uuids = [group_uuid for group_uuid in owned_groups
13327 if group_uuid != self.group_uuid]
13329 if not self.target_uuids:
13330 raise errors.OpPrereqError("There are no possible target groups",
13331 errors.ECODE_INVAL)
13333 def BuildHooksEnv(self):
13334 """Build hooks env.
13338 "GROUP_NAME": self.op.group_name,
13339 "TARGET_GROUPS": " ".join(self.target_uuids),
13342 def BuildHooksNodes(self):
13343 """Build hooks nodes.
13346 mn = self.cfg.GetMasterNode()
13348 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13350 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13352 return (run_nodes, run_nodes)
13354 def Exec(self, feedback_fn):
13355 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13357 assert self.group_uuid not in self.target_uuids
13359 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13360 instances=instances, target_groups=self.target_uuids)
13362 ial.Run(self.op.iallocator)
13364 if not ial.success:
13365 raise errors.OpPrereqError("Can't compute group evacuation using"
13366 " iallocator '%s': %s" %
13367 (self.op.iallocator, ial.info),
13368 errors.ECODE_NORES)
13370 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13372 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13373 len(jobs), self.op.group_name)
13375 return ResultWithJobs(jobs)
13378 class TagsLU(NoHooksLU): # pylint: disable=W0223
13379 """Generic tags LU.
13381 This is an abstract class which is the parent of all the other tags LUs.
13384 def ExpandNames(self):
13385 self.group_uuid = None
13386 self.needed_locks = {}
13387 if self.op.kind == constants.TAG_NODE:
13388 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13389 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13390 elif self.op.kind == constants.TAG_INSTANCE:
13391 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13392 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13393 elif self.op.kind == constants.TAG_NODEGROUP:
13394 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13396 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13397 # not possible to acquire the BGL based on opcode parameters)
13399 def CheckPrereq(self):
13400 """Check prerequisites.
13403 if self.op.kind == constants.TAG_CLUSTER:
13404 self.target = self.cfg.GetClusterInfo()
13405 elif self.op.kind == constants.TAG_NODE:
13406 self.target = self.cfg.GetNodeInfo(self.op.name)
13407 elif self.op.kind == constants.TAG_INSTANCE:
13408 self.target = self.cfg.GetInstanceInfo(self.op.name)
13409 elif self.op.kind == constants.TAG_NODEGROUP:
13410 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13412 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13413 str(self.op.kind), errors.ECODE_INVAL)
13416 class LUTagsGet(TagsLU):
13417 """Returns the tags of a given object.
13422 def ExpandNames(self):
13423 TagsLU.ExpandNames(self)
13425 # Share locks as this is only a read operation
13426 self.share_locks = _ShareAll()
13428 def Exec(self, feedback_fn):
13429 """Returns the tag list.
13432 return list(self.target.GetTags())
13435 class LUTagsSearch(NoHooksLU):
13436 """Searches the tags for a given pattern.
13441 def ExpandNames(self):
13442 self.needed_locks = {}
13444 def CheckPrereq(self):
13445 """Check prerequisites.
13447 This checks the pattern passed for validity by compiling it.
13451 self.re = re.compile(self.op.pattern)
13452 except re.error, err:
13453 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13454 (self.op.pattern, err), errors.ECODE_INVAL)
13456 def Exec(self, feedback_fn):
13457 """Returns the tag list.
13461 tgts = [("/cluster", cfg.GetClusterInfo())]
13462 ilist = cfg.GetAllInstancesInfo().values()
13463 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13464 nlist = cfg.GetAllNodesInfo().values()
13465 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13466 tgts.extend(("/nodegroup/%s" % n.name, n)
13467 for n in cfg.GetAllNodeGroupsInfo().values())
13469 for path, target in tgts:
13470 for tag in target.GetTags():
13471 if self.re.search(tag):
13472 results.append((path, tag))
13476 class LUTagsSet(TagsLU):
13477 """Sets a tag on a given object.
13482 def CheckPrereq(self):
13483 """Check prerequisites.
13485 This checks the type and length of the tag name and value.
13488 TagsLU.CheckPrereq(self)
13489 for tag in self.op.tags:
13490 objects.TaggableObject.ValidateTag(tag)
13492 def Exec(self, feedback_fn):
13497 for tag in self.op.tags:
13498 self.target.AddTag(tag)
13499 except errors.TagError, err:
13500 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13501 self.cfg.Update(self.target, feedback_fn)
13504 class LUTagsDel(TagsLU):
13505 """Delete a list of tags from a given object.
13510 def CheckPrereq(self):
13511 """Check prerequisites.
13513 This checks that we have the given tag.
13516 TagsLU.CheckPrereq(self)
13517 for tag in self.op.tags:
13518 objects.TaggableObject.ValidateTag(tag)
13519 del_tags = frozenset(self.op.tags)
13520 cur_tags = self.target.GetTags()
13522 diff_tags = del_tags - cur_tags
13524 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13525 raise errors.OpPrereqError("Tag(s) %s not found" %
13526 (utils.CommaJoin(diff_names), ),
13527 errors.ECODE_NOENT)
13529 def Exec(self, feedback_fn):
13530 """Remove the tag from the object.
13533 for tag in self.op.tags:
13534 self.target.RemoveTag(tag)
13535 self.cfg.Update(self.target, feedback_fn)
13538 class LUTestDelay(NoHooksLU):
13539 """Sleep for a specified amount of time.
13541 This LU sleeps on the master and/or nodes for a specified amount of
13547 def ExpandNames(self):
13548 """Expand names and set required locks.
13550 This expands the node list, if any.
13553 self.needed_locks = {}
13554 if self.op.on_nodes:
13555 # _GetWantedNodes can be used here, but is not always appropriate to use
13556 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13557 # more information.
13558 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13559 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13561 def _TestDelay(self):
13562 """Do the actual sleep.
13565 if self.op.on_master:
13566 if not utils.TestDelay(self.op.duration):
13567 raise errors.OpExecError("Error during master delay test")
13568 if self.op.on_nodes:
13569 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13570 for node, node_result in result.items():
13571 node_result.Raise("Failure during rpc call to node %s" % node)
13573 def Exec(self, feedback_fn):
13574 """Execute the test delay opcode, with the wanted repetitions.
13577 if self.op.repeat == 0:
13580 top_value = self.op.repeat - 1
13581 for i in range(self.op.repeat):
13582 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13586 class LUTestJqueue(NoHooksLU):
13587 """Utility LU to test some aspects of the job queue.
13592 # Must be lower than default timeout for WaitForJobChange to see whether it
13593 # notices changed jobs
13594 _CLIENT_CONNECT_TIMEOUT = 20.0
13595 _CLIENT_CONFIRM_TIMEOUT = 60.0
13598 def _NotifyUsingSocket(cls, cb, errcls):
13599 """Opens a Unix socket and waits for another program to connect.
13602 @param cb: Callback to send socket name to client
13603 @type errcls: class
13604 @param errcls: Exception class to use for errors
13607 # Using a temporary directory as there's no easy way to create temporary
13608 # sockets without writing a custom loop around tempfile.mktemp and
13610 tmpdir = tempfile.mkdtemp()
13612 tmpsock = utils.PathJoin(tmpdir, "sock")
13614 logging.debug("Creating temporary socket at %s", tmpsock)
13615 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13620 # Send details to client
13623 # Wait for client to connect before continuing
13624 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13626 (conn, _) = sock.accept()
13627 except socket.error, err:
13628 raise errcls("Client didn't connect in time (%s)" % err)
13632 # Remove as soon as client is connected
13633 shutil.rmtree(tmpdir)
13635 # Wait for client to close
13638 # pylint: disable=E1101
13639 # Instance of '_socketobject' has no ... member
13640 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13642 except socket.error, err:
13643 raise errcls("Client failed to confirm notification (%s)" % err)
13647 def _SendNotification(self, test, arg, sockname):
13648 """Sends a notification to the client.
13651 @param test: Test name
13652 @param arg: Test argument (depends on test)
13653 @type sockname: string
13654 @param sockname: Socket path
13657 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13659 def _Notify(self, prereq, test, arg):
13660 """Notifies the client of a test.
13663 @param prereq: Whether this is a prereq-phase test
13665 @param test: Test name
13666 @param arg: Test argument (depends on test)
13670 errcls = errors.OpPrereqError
13672 errcls = errors.OpExecError
13674 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13678 def CheckArguments(self):
13679 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13680 self.expandnames_calls = 0
13682 def ExpandNames(self):
13683 checkargs_calls = getattr(self, "checkargs_calls", 0)
13684 if checkargs_calls < 1:
13685 raise errors.ProgrammerError("CheckArguments was not called")
13687 self.expandnames_calls += 1
13689 if self.op.notify_waitlock:
13690 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13692 self.LogInfo("Expanding names")
13694 # Get lock on master node (just to get a lock, not for a particular reason)
13695 self.needed_locks = {
13696 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13699 def Exec(self, feedback_fn):
13700 if self.expandnames_calls < 1:
13701 raise errors.ProgrammerError("ExpandNames was not called")
13703 if self.op.notify_exec:
13704 self._Notify(False, constants.JQT_EXEC, None)
13706 self.LogInfo("Executing")
13708 if self.op.log_messages:
13709 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13710 for idx, msg in enumerate(self.op.log_messages):
13711 self.LogInfo("Sending log message %s", idx + 1)
13712 feedback_fn(constants.JQT_MSGPREFIX + msg)
13713 # Report how many test messages have been sent
13714 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13717 raise errors.OpExecError("Opcode failure was requested")
13722 class IAllocator(object):
13723 """IAllocator framework.
13725 An IAllocator instance has three sets of attributes:
13726 - cfg that is needed to query the cluster
13727 - input data (all members of the _KEYS class attribute are required)
13728 - four buffer attributes (in|out_data|text), that represent the
13729 input (to the external script) in text and data structure format,
13730 and the output from it, again in two formats
13731 - the result variables from the script (success, info, nodes) for
13735 # pylint: disable=R0902
13736 # lots of instance attributes
13738 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13740 self.rpc = rpc_runner
13741 # init buffer variables
13742 self.in_text = self.out_text = self.in_data = self.out_data = None
13743 # init all input fields so that pylint is happy
13745 self.memory = self.disks = self.disk_template = None
13746 self.os = self.tags = self.nics = self.vcpus = None
13747 self.hypervisor = None
13748 self.relocate_from = None
13750 self.instances = None
13751 self.evac_mode = None
13752 self.target_groups = []
13754 self.required_nodes = None
13755 # init result fields
13756 self.success = self.info = self.result = None
13759 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13761 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13762 " IAllocator" % self.mode)
13764 keyset = [n for (n, _) in keydata]
13767 if key not in keyset:
13768 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13769 " IAllocator" % key)
13770 setattr(self, key, kwargs[key])
13773 if key not in kwargs:
13774 raise errors.ProgrammerError("Missing input parameter '%s' to"
13775 " IAllocator" % key)
13776 self._BuildInputData(compat.partial(fn, self), keydata)
13778 def _ComputeClusterData(self):
13779 """Compute the generic allocator input data.
13781 This is the data that is independent of the actual operation.
13785 cluster_info = cfg.GetClusterInfo()
13788 "version": constants.IALLOCATOR_VERSION,
13789 "cluster_name": cfg.GetClusterName(),
13790 "cluster_tags": list(cluster_info.GetTags()),
13791 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13792 # we don't have job IDs
13794 ninfo = cfg.GetAllNodesInfo()
13795 iinfo = cfg.GetAllInstancesInfo().values()
13796 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13799 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13801 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13802 hypervisor_name = self.hypervisor
13803 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13804 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13806 hypervisor_name = cluster_info.primary_hypervisor
13808 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13811 self.rpc.call_all_instances_info(node_list,
13812 cluster_info.enabled_hypervisors)
13814 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13816 config_ndata = self._ComputeBasicNodeData(ninfo)
13817 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13818 i_list, config_ndata)
13819 assert len(data["nodes"]) == len(ninfo), \
13820 "Incomplete node data computed"
13822 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13824 self.in_data = data
13827 def _ComputeNodeGroupData(cfg):
13828 """Compute node groups data.
13831 ng = dict((guuid, {
13832 "name": gdata.name,
13833 "alloc_policy": gdata.alloc_policy,
13835 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13840 def _ComputeBasicNodeData(node_cfg):
13841 """Compute global node data.
13844 @returns: a dict of name: (node dict, node config)
13847 # fill in static (config-based) values
13848 node_results = dict((ninfo.name, {
13849 "tags": list(ninfo.GetTags()),
13850 "primary_ip": ninfo.primary_ip,
13851 "secondary_ip": ninfo.secondary_ip,
13852 "offline": ninfo.offline,
13853 "drained": ninfo.drained,
13854 "master_candidate": ninfo.master_candidate,
13855 "group": ninfo.group,
13856 "master_capable": ninfo.master_capable,
13857 "vm_capable": ninfo.vm_capable,
13859 for ninfo in node_cfg.values())
13861 return node_results
13864 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13866 """Compute global node data.
13868 @param node_results: the basic node structures as filled from the config
13871 #TODO(dynmem): compute the right data on MAX and MIN memory
13872 # make a copy of the current dict
13873 node_results = dict(node_results)
13874 for nname, nresult in node_data.items():
13875 assert nname in node_results, "Missing basic data for node %s" % nname
13876 ninfo = node_cfg[nname]
13878 if not (ninfo.offline or ninfo.drained):
13879 nresult.Raise("Can't get data for node %s" % nname)
13880 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13882 remote_info = _MakeLegacyNodeInfo(nresult.payload)
13884 for attr in ["memory_total", "memory_free", "memory_dom0",
13885 "vg_size", "vg_free", "cpu_total"]:
13886 if attr not in remote_info:
13887 raise errors.OpExecError("Node '%s' didn't return attribute"
13888 " '%s'" % (nname, attr))
13889 if not isinstance(remote_info[attr], int):
13890 raise errors.OpExecError("Node '%s' returned invalid value"
13892 (nname, attr, remote_info[attr]))
13893 # compute memory used by primary instances
13894 i_p_mem = i_p_up_mem = 0
13895 for iinfo, beinfo in i_list:
13896 if iinfo.primary_node == nname:
13897 i_p_mem += beinfo[constants.BE_MAXMEM]
13898 if iinfo.name not in node_iinfo[nname].payload:
13901 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13902 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13903 remote_info["memory_free"] -= max(0, i_mem_diff)
13905 if iinfo.admin_state == constants.ADMINST_UP:
13906 i_p_up_mem += beinfo[constants.BE_MAXMEM]
13908 # compute memory used by instances
13910 "total_memory": remote_info["memory_total"],
13911 "reserved_memory": remote_info["memory_dom0"],
13912 "free_memory": remote_info["memory_free"],
13913 "total_disk": remote_info["vg_size"],
13914 "free_disk": remote_info["vg_free"],
13915 "total_cpus": remote_info["cpu_total"],
13916 "i_pri_memory": i_p_mem,
13917 "i_pri_up_memory": i_p_up_mem,
13919 pnr_dyn.update(node_results[nname])
13920 node_results[nname] = pnr_dyn
13922 return node_results
13925 def _ComputeInstanceData(cluster_info, i_list):
13926 """Compute global instance data.
13930 for iinfo, beinfo in i_list:
13932 for nic in iinfo.nics:
13933 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13937 "mode": filled_params[constants.NIC_MODE],
13938 "link": filled_params[constants.NIC_LINK],
13940 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13941 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13942 nic_data.append(nic_dict)
13944 "tags": list(iinfo.GetTags()),
13945 "admin_state": iinfo.admin_state,
13946 "vcpus": beinfo[constants.BE_VCPUS],
13947 "memory": beinfo[constants.BE_MAXMEM],
13949 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13951 "disks": [{constants.IDISK_SIZE: dsk.size,
13952 constants.IDISK_MODE: dsk.mode}
13953 for dsk in iinfo.disks],
13954 "disk_template": iinfo.disk_template,
13955 "hypervisor": iinfo.hypervisor,
13957 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13959 instance_data[iinfo.name] = pir
13961 return instance_data
13963 def _AddNewInstance(self):
13964 """Add new instance data to allocator structure.
13966 This in combination with _AllocatorGetClusterData will create the
13967 correct structure needed as input for the allocator.
13969 The checks for the completeness of the opcode must have already been
13973 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13975 if self.disk_template in constants.DTS_INT_MIRROR:
13976 self.required_nodes = 2
13978 self.required_nodes = 1
13982 "disk_template": self.disk_template,
13985 "vcpus": self.vcpus,
13986 "memory": self.memory,
13987 "disks": self.disks,
13988 "disk_space_total": disk_space,
13990 "required_nodes": self.required_nodes,
13991 "hypervisor": self.hypervisor,
13996 def _AddRelocateInstance(self):
13997 """Add relocate instance data to allocator structure.
13999 This in combination with _IAllocatorGetClusterData will create the
14000 correct structure needed as input for the allocator.
14002 The checks for the completeness of the opcode must have already been
14006 instance = self.cfg.GetInstanceInfo(self.name)
14007 if instance is None:
14008 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14009 " IAllocator" % self.name)
14011 if instance.disk_template not in constants.DTS_MIRRORED:
14012 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14013 errors.ECODE_INVAL)
14015 if instance.disk_template in constants.DTS_INT_MIRROR and \
14016 len(instance.secondary_nodes) != 1:
14017 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14018 errors.ECODE_STATE)
14020 self.required_nodes = 1
14021 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14022 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14026 "disk_space_total": disk_space,
14027 "required_nodes": self.required_nodes,
14028 "relocate_from": self.relocate_from,
14032 def _AddNodeEvacuate(self):
14033 """Get data for node-evacuate requests.
14037 "instances": self.instances,
14038 "evac_mode": self.evac_mode,
14041 def _AddChangeGroup(self):
14042 """Get data for node-evacuate requests.
14046 "instances": self.instances,
14047 "target_groups": self.target_groups,
14050 def _BuildInputData(self, fn, keydata):
14051 """Build input data structures.
14054 self._ComputeClusterData()
14057 request["type"] = self.mode
14058 for keyname, keytype in keydata:
14059 if keyname not in request:
14060 raise errors.ProgrammerError("Request parameter %s is missing" %
14062 val = request[keyname]
14063 if not keytype(val):
14064 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14065 " validation, value %s, expected"
14066 " type %s" % (keyname, val, keytype))
14067 self.in_data["request"] = request
14069 self.in_text = serializer.Dump(self.in_data)
14071 _STRING_LIST = ht.TListOf(ht.TString)
14072 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14073 # pylint: disable=E1101
14074 # Class '...' has no 'OP_ID' member
14075 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14076 opcodes.OpInstanceMigrate.OP_ID,
14077 opcodes.OpInstanceReplaceDisks.OP_ID])
14081 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14082 ht.TItems([ht.TNonEmptyString,
14083 ht.TNonEmptyString,
14084 ht.TListOf(ht.TNonEmptyString),
14087 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14088 ht.TItems([ht.TNonEmptyString,
14091 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14092 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14095 constants.IALLOCATOR_MODE_ALLOC:
14098 ("name", ht.TString),
14099 ("memory", ht.TInt),
14100 ("disks", ht.TListOf(ht.TDict)),
14101 ("disk_template", ht.TString),
14102 ("os", ht.TString),
14103 ("tags", _STRING_LIST),
14104 ("nics", ht.TListOf(ht.TDict)),
14105 ("vcpus", ht.TInt),
14106 ("hypervisor", ht.TString),
14108 constants.IALLOCATOR_MODE_RELOC:
14109 (_AddRelocateInstance,
14110 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14112 constants.IALLOCATOR_MODE_NODE_EVAC:
14113 (_AddNodeEvacuate, [
14114 ("instances", _STRING_LIST),
14115 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14117 constants.IALLOCATOR_MODE_CHG_GROUP:
14118 (_AddChangeGroup, [
14119 ("instances", _STRING_LIST),
14120 ("target_groups", _STRING_LIST),
14124 def Run(self, name, validate=True, call_fn=None):
14125 """Run an instance allocator and return the results.
14128 if call_fn is None:
14129 call_fn = self.rpc.call_iallocator_runner
14131 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14132 result.Raise("Failure while running the iallocator script")
14134 self.out_text = result.payload
14136 self._ValidateResult()
14138 def _ValidateResult(self):
14139 """Process the allocator results.
14141 This will process and if successful save the result in
14142 self.out_data and the other parameters.
14146 rdict = serializer.Load(self.out_text)
14147 except Exception, err:
14148 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14150 if not isinstance(rdict, dict):
14151 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14153 # TODO: remove backwards compatiblity in later versions
14154 if "nodes" in rdict and "result" not in rdict:
14155 rdict["result"] = rdict["nodes"]
14158 for key in "success", "info", "result":
14159 if key not in rdict:
14160 raise errors.OpExecError("Can't parse iallocator results:"
14161 " missing key '%s'" % key)
14162 setattr(self, key, rdict[key])
14164 if not self._result_check(self.result):
14165 raise errors.OpExecError("Iallocator returned invalid result,"
14166 " expected %s, got %s" %
14167 (self._result_check, self.result),
14168 errors.ECODE_INVAL)
14170 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14171 assert self.relocate_from is not None
14172 assert self.required_nodes == 1
14174 node2group = dict((name, ndata["group"])
14175 for (name, ndata) in self.in_data["nodes"].items())
14177 fn = compat.partial(self._NodesToGroups, node2group,
14178 self.in_data["nodegroups"])
14180 instance = self.cfg.GetInstanceInfo(self.name)
14181 request_groups = fn(self.relocate_from + [instance.primary_node])
14182 result_groups = fn(rdict["result"] + [instance.primary_node])
14184 if self.success and not set(result_groups).issubset(request_groups):
14185 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14186 " differ from original groups (%s)" %
14187 (utils.CommaJoin(result_groups),
14188 utils.CommaJoin(request_groups)))
14190 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14191 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14193 self.out_data = rdict
14196 def _NodesToGroups(node2group, groups, nodes):
14197 """Returns a list of unique group names for a list of nodes.
14199 @type node2group: dict
14200 @param node2group: Map from node name to group UUID
14202 @param groups: Group information
14204 @param nodes: Node names
14211 group_uuid = node2group[node]
14213 # Ignore unknown node
14217 group = groups[group_uuid]
14219 # Can't find group, let's use UUID
14220 group_name = group_uuid
14222 group_name = group["name"]
14224 result.add(group_name)
14226 return sorted(result)
14229 class LUTestAllocator(NoHooksLU):
14230 """Run allocator tests.
14232 This LU runs the allocator tests
14235 def CheckPrereq(self):
14236 """Check prerequisites.
14238 This checks the opcode parameters depending on the director and mode test.
14241 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14242 for attr in ["memory", "disks", "disk_template",
14243 "os", "tags", "nics", "vcpus"]:
14244 if not hasattr(self.op, attr):
14245 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14246 attr, errors.ECODE_INVAL)
14247 iname = self.cfg.ExpandInstanceName(self.op.name)
14248 if iname is not None:
14249 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14250 iname, errors.ECODE_EXISTS)
14251 if not isinstance(self.op.nics, list):
14252 raise errors.OpPrereqError("Invalid parameter 'nics'",
14253 errors.ECODE_INVAL)
14254 if not isinstance(self.op.disks, list):
14255 raise errors.OpPrereqError("Invalid parameter 'disks'",
14256 errors.ECODE_INVAL)
14257 for row in self.op.disks:
14258 if (not isinstance(row, dict) or
14259 constants.IDISK_SIZE not in row or
14260 not isinstance(row[constants.IDISK_SIZE], int) or
14261 constants.IDISK_MODE not in row or
14262 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14263 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14264 " parameter", errors.ECODE_INVAL)
14265 if self.op.hypervisor is None:
14266 self.op.hypervisor = self.cfg.GetHypervisorType()
14267 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14268 fname = _ExpandInstanceName(self.cfg, self.op.name)
14269 self.op.name = fname
14270 self.relocate_from = \
14271 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14272 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14273 constants.IALLOCATOR_MODE_NODE_EVAC):
14274 if not self.op.instances:
14275 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14276 self.op.instances = _GetWantedInstances(self, self.op.instances)
14278 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14279 self.op.mode, errors.ECODE_INVAL)
14281 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14282 if self.op.allocator is None:
14283 raise errors.OpPrereqError("Missing allocator name",
14284 errors.ECODE_INVAL)
14285 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14286 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14287 self.op.direction, errors.ECODE_INVAL)
14289 def Exec(self, feedback_fn):
14290 """Run the allocator test.
14293 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14294 ial = IAllocator(self.cfg, self.rpc,
14297 memory=self.op.memory,
14298 disks=self.op.disks,
14299 disk_template=self.op.disk_template,
14303 vcpus=self.op.vcpus,
14304 hypervisor=self.op.hypervisor,
14306 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14307 ial = IAllocator(self.cfg, self.rpc,
14310 relocate_from=list(self.relocate_from),
14312 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14313 ial = IAllocator(self.cfg, self.rpc,
14315 instances=self.op.instances,
14316 target_groups=self.op.target_groups)
14317 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14318 ial = IAllocator(self.cfg, self.rpc,
14320 instances=self.op.instances,
14321 evac_mode=self.op.evac_mode)
14323 raise errors.ProgrammerError("Uncatched mode %s in"
14324 " LUTestAllocator.Exec", self.op.mode)
14326 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14327 result = ial.in_text
14329 ial.Run(self.op.allocator, validate=False)
14330 result = ial.out_text
14334 #: Query type implementations
14336 constants.QR_INSTANCE: _InstanceQuery,
14337 constants.QR_NODE: _NodeQuery,
14338 constants.QR_GROUP: _GroupQuery,
14339 constants.QR_OS: _OsQuery,
14342 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14345 def _GetQueryImplementation(name):
14346 """Returns the implemtnation for a query type.
14348 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14352 return _QUERY_IMPL[name]
14354 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14355 errors.ECODE_INVAL)