4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _ExpandItemName(fn, name, kind):
1020 """Expand an item name.
1022 @param fn: the function to use for expansion
1023 @param name: requested item name
1024 @param kind: text description ('Node' or 'Instance')
1025 @return: the resolved (full) name
1026 @raise errors.OpPrereqError: if the item is not found
1029 full_name = fn(name)
1030 if full_name is None:
1031 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1036 def _ExpandNodeName(cfg, name):
1037 """Wrapper over L{_ExpandItemName} for nodes."""
1038 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1041 def _ExpandInstanceName(cfg, name):
1042 """Wrapper over L{_ExpandItemName} for instance."""
1043 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1046 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1047 minmem, maxmem, vcpus, nics, disk_template, disks,
1048 bep, hvp, hypervisor_name, tags):
1049 """Builds instance related env variables for hooks
1051 This builds the hook environment from individual variables.
1054 @param name: the name of the instance
1055 @type primary_node: string
1056 @param primary_node: the name of the instance's primary node
1057 @type secondary_nodes: list
1058 @param secondary_nodes: list of secondary nodes as strings
1059 @type os_type: string
1060 @param os_type: the name of the instance's OS
1061 @type status: string
1062 @param status: the desired status of the instance
1063 @type minmem: string
1064 @param minmem: the minimum memory size of the instance
1065 @type maxmem: string
1066 @param maxmem: the maximum memory size of the instance
1068 @param vcpus: the count of VCPUs the instance has
1070 @param nics: list of tuples (ip, mac, mode, link) representing
1071 the NICs the instance has
1072 @type disk_template: string
1073 @param disk_template: the disk template of the instance
1075 @param disks: the list of (size, mode) pairs
1077 @param bep: the backend parameters for the instance
1079 @param hvp: the hypervisor parameters for the instance
1080 @type hypervisor_name: string
1081 @param hypervisor_name: the hypervisor for the instance
1083 @param tags: list of instance tags as strings
1085 @return: the hook environment for this instance
1090 "INSTANCE_NAME": name,
1091 "INSTANCE_PRIMARY": primary_node,
1092 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1093 "INSTANCE_OS_TYPE": os_type,
1094 "INSTANCE_STATUS": status,
1095 "INSTANCE_MINMEM": minmem,
1096 "INSTANCE_MAXMEM": maxmem,
1097 # TODO(2.7) remove deprecated "memory" value
1098 "INSTANCE_MEMORY": maxmem,
1099 "INSTANCE_VCPUS": vcpus,
1100 "INSTANCE_DISK_TEMPLATE": disk_template,
1101 "INSTANCE_HYPERVISOR": hypervisor_name,
1104 nic_count = len(nics)
1105 for idx, (ip, mac, mode, link) in enumerate(nics):
1108 env["INSTANCE_NIC%d_IP" % idx] = ip
1109 env["INSTANCE_NIC%d_MAC" % idx] = mac
1110 env["INSTANCE_NIC%d_MODE" % idx] = mode
1111 env["INSTANCE_NIC%d_LINK" % idx] = link
1112 if mode == constants.NIC_MODE_BRIDGED:
1113 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1117 env["INSTANCE_NIC_COUNT"] = nic_count
1120 disk_count = len(disks)
1121 for idx, (size, mode) in enumerate(disks):
1122 env["INSTANCE_DISK%d_SIZE" % idx] = size
1123 env["INSTANCE_DISK%d_MODE" % idx] = mode
1127 env["INSTANCE_DISK_COUNT"] = disk_count
1132 env["INSTANCE_TAGS"] = " ".join(tags)
1134 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1135 for key, value in source.items():
1136 env["INSTANCE_%s_%s" % (kind, key)] = value
1141 def _NICListToTuple(lu, nics):
1142 """Build a list of nic information tuples.
1144 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1145 value in LUInstanceQueryData.
1147 @type lu: L{LogicalUnit}
1148 @param lu: the logical unit on whose behalf we execute
1149 @type nics: list of L{objects.NIC}
1150 @param nics: list of nics to convert to hooks tuples
1154 cluster = lu.cfg.GetClusterInfo()
1158 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1159 mode = filled_params[constants.NIC_MODE]
1160 link = filled_params[constants.NIC_LINK]
1161 hooks_nics.append((ip, mac, mode, link))
1165 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1166 """Builds instance related env variables for hooks from an object.
1168 @type lu: L{LogicalUnit}
1169 @param lu: the logical unit on whose behalf we execute
1170 @type instance: L{objects.Instance}
1171 @param instance: the instance for which we should build the
1173 @type override: dict
1174 @param override: dictionary with key/values that will override
1177 @return: the hook environment dictionary
1180 cluster = lu.cfg.GetClusterInfo()
1181 bep = cluster.FillBE(instance)
1182 hvp = cluster.FillHV(instance)
1184 "name": instance.name,
1185 "primary_node": instance.primary_node,
1186 "secondary_nodes": instance.secondary_nodes,
1187 "os_type": instance.os,
1188 "status": instance.admin_state,
1189 "maxmem": bep[constants.BE_MAXMEM],
1190 "minmem": bep[constants.BE_MINMEM],
1191 "vcpus": bep[constants.BE_VCPUS],
1192 "nics": _NICListToTuple(lu, instance.nics),
1193 "disk_template": instance.disk_template,
1194 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1197 "hypervisor_name": instance.hypervisor,
1198 "tags": instance.tags,
1201 args.update(override)
1202 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1205 def _AdjustCandidatePool(lu, exceptions):
1206 """Adjust the candidate pool after node operations.
1209 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1211 lu.LogInfo("Promoted nodes to master candidate role: %s",
1212 utils.CommaJoin(node.name for node in mod_list))
1213 for name in mod_list:
1214 lu.context.ReaddNode(name)
1215 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1217 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1221 def _DecideSelfPromotion(lu, exceptions=None):
1222 """Decide whether I should promote myself as a master candidate.
1225 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1226 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1227 # the new node will increase mc_max with one, so:
1228 mc_should = min(mc_should + 1, cp_size)
1229 return mc_now < mc_should
1232 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1233 """Check that the brigdes needed by a list of nics exist.
1236 cluster = lu.cfg.GetClusterInfo()
1237 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1238 brlist = [params[constants.NIC_LINK] for params in paramslist
1239 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1241 result = lu.rpc.call_bridges_exist(target_node, brlist)
1242 result.Raise("Error checking bridges on destination node '%s'" %
1243 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1246 def _CheckInstanceBridgesExist(lu, instance, node=None):
1247 """Check that the brigdes needed by an instance exist.
1251 node = instance.primary_node
1252 _CheckNicsBridgesExist(lu, instance.nics, node)
1255 def _CheckOSVariant(os_obj, name):
1256 """Check whether an OS name conforms to the os variants specification.
1258 @type os_obj: L{objects.OS}
1259 @param os_obj: OS object to check
1261 @param name: OS name passed by the user, to check for validity
1264 variant = objects.OS.GetVariant(name)
1265 if not os_obj.supported_variants:
1267 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1268 " passed)" % (os_obj.name, variant),
1272 raise errors.OpPrereqError("OS name must include a variant",
1275 if variant not in os_obj.supported_variants:
1276 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1279 def _GetNodeInstancesInner(cfg, fn):
1280 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1283 def _GetNodeInstances(cfg, node_name):
1284 """Returns a list of all primary and secondary instances on a node.
1288 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1291 def _GetNodePrimaryInstances(cfg, node_name):
1292 """Returns primary instances on a node.
1295 return _GetNodeInstancesInner(cfg,
1296 lambda inst: node_name == inst.primary_node)
1299 def _GetNodeSecondaryInstances(cfg, node_name):
1300 """Returns secondary instances on a node.
1303 return _GetNodeInstancesInner(cfg,
1304 lambda inst: node_name in inst.secondary_nodes)
1307 def _GetStorageTypeArgs(cfg, storage_type):
1308 """Returns the arguments for a storage type.
1311 # Special case for file storage
1312 if storage_type == constants.ST_FILE:
1313 # storage.FileStorage wants a list of storage directories
1314 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1319 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1322 for dev in instance.disks:
1323 cfg.SetDiskID(dev, node_name)
1325 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1326 result.Raise("Failed to get disk status from node %s" % node_name,
1327 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1329 for idx, bdev_status in enumerate(result.payload):
1330 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1336 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1337 """Check the sanity of iallocator and node arguments and use the
1338 cluster-wide iallocator if appropriate.
1340 Check that at most one of (iallocator, node) is specified. If none is
1341 specified, then the LU's opcode's iallocator slot is filled with the
1342 cluster-wide default iallocator.
1344 @type iallocator_slot: string
1345 @param iallocator_slot: the name of the opcode iallocator slot
1346 @type node_slot: string
1347 @param node_slot: the name of the opcode target node slot
1350 node = getattr(lu.op, node_slot, None)
1351 iallocator = getattr(lu.op, iallocator_slot, None)
1353 if node is not None and iallocator is not None:
1354 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1356 elif node is None and iallocator is None:
1357 default_iallocator = lu.cfg.GetDefaultIAllocator()
1358 if default_iallocator:
1359 setattr(lu.op, iallocator_slot, default_iallocator)
1361 raise errors.OpPrereqError("No iallocator or node given and no"
1362 " cluster-wide default iallocator found;"
1363 " please specify either an iallocator or a"
1364 " node, or set a cluster-wide default"
1368 def _GetDefaultIAllocator(cfg, iallocator):
1369 """Decides on which iallocator to use.
1371 @type cfg: L{config.ConfigWriter}
1372 @param cfg: Cluster configuration object
1373 @type iallocator: string or None
1374 @param iallocator: Iallocator specified in opcode
1376 @return: Iallocator name
1380 # Use default iallocator
1381 iallocator = cfg.GetDefaultIAllocator()
1384 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1385 " opcode nor as a cluster-wide default",
1391 class LUClusterPostInit(LogicalUnit):
1392 """Logical unit for running hooks after cluster initialization.
1395 HPATH = "cluster-init"
1396 HTYPE = constants.HTYPE_CLUSTER
1398 def BuildHooksEnv(self):
1403 "OP_TARGET": self.cfg.GetClusterName(),
1406 def BuildHooksNodes(self):
1407 """Build hooks nodes.
1410 return ([], [self.cfg.GetMasterNode()])
1412 def Exec(self, feedback_fn):
1419 class LUClusterDestroy(LogicalUnit):
1420 """Logical unit for destroying the cluster.
1423 HPATH = "cluster-destroy"
1424 HTYPE = constants.HTYPE_CLUSTER
1426 def BuildHooksEnv(self):
1431 "OP_TARGET": self.cfg.GetClusterName(),
1434 def BuildHooksNodes(self):
1435 """Build hooks nodes.
1440 def CheckPrereq(self):
1441 """Check prerequisites.
1443 This checks whether the cluster is empty.
1445 Any errors are signaled by raising errors.OpPrereqError.
1448 master = self.cfg.GetMasterNode()
1450 nodelist = self.cfg.GetNodeList()
1451 if len(nodelist) != 1 or nodelist[0] != master:
1452 raise errors.OpPrereqError("There are still %d node(s) in"
1453 " this cluster." % (len(nodelist) - 1),
1455 instancelist = self.cfg.GetInstanceList()
1457 raise errors.OpPrereqError("There are still %d instance(s) in"
1458 " this cluster." % len(instancelist),
1461 def Exec(self, feedback_fn):
1462 """Destroys the cluster.
1465 master_params = self.cfg.GetMasterNetworkParameters()
1467 # Run post hooks on master node before it's removed
1468 _RunPostHook(self, master_params.name)
1470 ems = self.cfg.GetUseExternalMipScript()
1471 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1473 result.Raise("Could not disable the master role")
1475 return master_params.name
1478 def _VerifyCertificate(filename):
1479 """Verifies a certificate for L{LUClusterVerifyConfig}.
1481 @type filename: string
1482 @param filename: Path to PEM file
1486 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1487 utils.ReadFile(filename))
1488 except Exception, err: # pylint: disable=W0703
1489 return (LUClusterVerifyConfig.ETYPE_ERROR,
1490 "Failed to load X509 certificate %s: %s" % (filename, err))
1493 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1494 constants.SSL_CERT_EXPIRATION_ERROR)
1497 fnamemsg = "While verifying %s: %s" % (filename, msg)
1502 return (None, fnamemsg)
1503 elif errcode == utils.CERT_WARNING:
1504 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1505 elif errcode == utils.CERT_ERROR:
1506 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1508 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1511 def _GetAllHypervisorParameters(cluster, instances):
1512 """Compute the set of all hypervisor parameters.
1514 @type cluster: L{objects.Cluster}
1515 @param cluster: the cluster object
1516 @param instances: list of L{objects.Instance}
1517 @param instances: additional instances from which to obtain parameters
1518 @rtype: list of (origin, hypervisor, parameters)
1519 @return: a list with all parameters found, indicating the hypervisor they
1520 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1525 for hv_name in cluster.enabled_hypervisors:
1526 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1528 for os_name, os_hvp in cluster.os_hvp.items():
1529 for hv_name, hv_params in os_hvp.items():
1531 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1532 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1534 # TODO: collapse identical parameter values in a single one
1535 for instance in instances:
1536 if instance.hvparams:
1537 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1538 cluster.FillHV(instance)))
1543 class _VerifyErrors(object):
1544 """Mix-in for cluster/group verify LUs.
1546 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1547 self.op and self._feedback_fn to be available.)
1551 ETYPE_FIELD = "code"
1552 ETYPE_ERROR = "ERROR"
1553 ETYPE_WARNING = "WARNING"
1555 def _Error(self, ecode, item, msg, *args, **kwargs):
1556 """Format an error message.
1558 Based on the opcode's error_codes parameter, either format a
1559 parseable error code, or a simpler error string.
1561 This must be called only from Exec and functions called from Exec.
1564 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1565 itype, etxt, _ = ecode
1566 # first complete the msg
1569 # then format the whole message
1570 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1571 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1577 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1578 # and finally report it via the feedback_fn
1579 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1581 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1582 """Log an error message if the passed condition is True.
1586 or self.op.debug_simulate_errors) # pylint: disable=E1101
1588 # If the error code is in the list of ignored errors, demote the error to a
1590 (_, etxt, _) = ecode
1591 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1592 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1595 self._Error(ecode, *args, **kwargs)
1597 # do not mark the operation as failed for WARN cases only
1598 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1599 self.bad = self.bad or cond
1602 class LUClusterVerify(NoHooksLU):
1603 """Submits all jobs necessary to verify the cluster.
1608 def ExpandNames(self):
1609 self.needed_locks = {}
1611 def Exec(self, feedback_fn):
1614 if self.op.group_name:
1615 groups = [self.op.group_name]
1616 depends_fn = lambda: None
1618 groups = self.cfg.GetNodeGroupList()
1620 # Verify global configuration
1622 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1625 # Always depend on global verification
1626 depends_fn = lambda: [(-len(jobs), [])]
1628 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1629 ignore_errors=self.op.ignore_errors,
1630 depends=depends_fn())]
1631 for group in groups)
1633 # Fix up all parameters
1634 for op in itertools.chain(*jobs): # pylint: disable=W0142
1635 op.debug_simulate_errors = self.op.debug_simulate_errors
1636 op.verbose = self.op.verbose
1637 op.error_codes = self.op.error_codes
1639 op.skip_checks = self.op.skip_checks
1640 except AttributeError:
1641 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1643 return ResultWithJobs(jobs)
1646 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1647 """Verifies the cluster config.
1652 def _VerifyHVP(self, hvp_data):
1653 """Verifies locally the syntax of the hypervisor parameters.
1656 for item, hv_name, hv_params in hvp_data:
1657 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1660 hv_class = hypervisor.GetHypervisor(hv_name)
1661 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1662 hv_class.CheckParameterSyntax(hv_params)
1663 except errors.GenericError, err:
1664 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1666 def ExpandNames(self):
1667 # Information can be safely retrieved as the BGL is acquired in exclusive
1669 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1670 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1671 self.all_node_info = self.cfg.GetAllNodesInfo()
1672 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1673 self.needed_locks = {}
1675 def Exec(self, feedback_fn):
1676 """Verify integrity of cluster, performing various test on nodes.
1680 self._feedback_fn = feedback_fn
1682 feedback_fn("* Verifying cluster config")
1684 for msg in self.cfg.VerifyConfig():
1685 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1687 feedback_fn("* Verifying cluster certificate files")
1689 for cert_filename in constants.ALL_CERT_FILES:
1690 (errcode, msg) = _VerifyCertificate(cert_filename)
1691 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1693 feedback_fn("* Verifying hypervisor parameters")
1695 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1696 self.all_inst_info.values()))
1698 feedback_fn("* Verifying all nodes belong to an existing group")
1700 # We do this verification here because, should this bogus circumstance
1701 # occur, it would never be caught by VerifyGroup, which only acts on
1702 # nodes/instances reachable from existing node groups.
1704 dangling_nodes = set(node.name for node in self.all_node_info.values()
1705 if node.group not in self.all_group_info)
1707 dangling_instances = {}
1708 no_node_instances = []
1710 for inst in self.all_inst_info.values():
1711 if inst.primary_node in dangling_nodes:
1712 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1713 elif inst.primary_node not in self.all_node_info:
1714 no_node_instances.append(inst.name)
1719 utils.CommaJoin(dangling_instances.get(node.name,
1721 for node in dangling_nodes]
1723 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1725 "the following nodes (and their instances) belong to a non"
1726 " existing group: %s", utils.CommaJoin(pretty_dangling))
1728 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1730 "the following instances have a non-existing primary-node:"
1731 " %s", utils.CommaJoin(no_node_instances))
1736 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1737 """Verifies the status of a node group.
1740 HPATH = "cluster-verify"
1741 HTYPE = constants.HTYPE_CLUSTER
1744 _HOOKS_INDENT_RE = re.compile("^", re.M)
1746 class NodeImage(object):
1747 """A class representing the logical and physical status of a node.
1750 @ivar name: the node name to which this object refers
1751 @ivar volumes: a structure as returned from
1752 L{ganeti.backend.GetVolumeList} (runtime)
1753 @ivar instances: a list of running instances (runtime)
1754 @ivar pinst: list of configured primary instances (config)
1755 @ivar sinst: list of configured secondary instances (config)
1756 @ivar sbp: dictionary of {primary-node: list of instances} for all
1757 instances for which this node is secondary (config)
1758 @ivar mfree: free memory, as reported by hypervisor (runtime)
1759 @ivar dfree: free disk, as reported by the node (runtime)
1760 @ivar offline: the offline status (config)
1761 @type rpc_fail: boolean
1762 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1763 not whether the individual keys were correct) (runtime)
1764 @type lvm_fail: boolean
1765 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1766 @type hyp_fail: boolean
1767 @ivar hyp_fail: whether the RPC call didn't return the instance list
1768 @type ghost: boolean
1769 @ivar ghost: whether this is a known node or not (config)
1770 @type os_fail: boolean
1771 @ivar os_fail: whether the RPC call didn't return valid OS data
1773 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1774 @type vm_capable: boolean
1775 @ivar vm_capable: whether the node can host instances
1778 def __init__(self, offline=False, name=None, vm_capable=True):
1787 self.offline = offline
1788 self.vm_capable = vm_capable
1789 self.rpc_fail = False
1790 self.lvm_fail = False
1791 self.hyp_fail = False
1793 self.os_fail = False
1796 def ExpandNames(self):
1797 # This raises errors.OpPrereqError on its own:
1798 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1800 # Get instances in node group; this is unsafe and needs verification later
1801 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1803 self.needed_locks = {
1804 locking.LEVEL_INSTANCE: inst_names,
1805 locking.LEVEL_NODEGROUP: [self.group_uuid],
1806 locking.LEVEL_NODE: [],
1809 self.share_locks = _ShareAll()
1811 def DeclareLocks(self, level):
1812 if level == locking.LEVEL_NODE:
1813 # Get members of node group; this is unsafe and needs verification later
1814 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1816 all_inst_info = self.cfg.GetAllInstancesInfo()
1818 # In Exec(), we warn about mirrored instances that have primary and
1819 # secondary living in separate node groups. To fully verify that
1820 # volumes for these instances are healthy, we will need to do an
1821 # extra call to their secondaries. We ensure here those nodes will
1823 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1824 # Important: access only the instances whose lock is owned
1825 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1826 nodes.update(all_inst_info[inst].secondary_nodes)
1828 self.needed_locks[locking.LEVEL_NODE] = nodes
1830 def CheckPrereq(self):
1831 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1832 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1834 group_nodes = set(self.group_info.members)
1835 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1838 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1840 unlocked_instances = \
1841 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1844 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1845 utils.CommaJoin(unlocked_nodes))
1847 if unlocked_instances:
1848 raise errors.OpPrereqError("Missing lock for instances: %s" %
1849 utils.CommaJoin(unlocked_instances))
1851 self.all_node_info = self.cfg.GetAllNodesInfo()
1852 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1854 self.my_node_names = utils.NiceSort(group_nodes)
1855 self.my_inst_names = utils.NiceSort(group_instances)
1857 self.my_node_info = dict((name, self.all_node_info[name])
1858 for name in self.my_node_names)
1860 self.my_inst_info = dict((name, self.all_inst_info[name])
1861 for name in self.my_inst_names)
1863 # We detect here the nodes that will need the extra RPC calls for verifying
1864 # split LV volumes; they should be locked.
1865 extra_lv_nodes = set()
1867 for inst in self.my_inst_info.values():
1868 if inst.disk_template in constants.DTS_INT_MIRROR:
1869 group = self.my_node_info[inst.primary_node].group
1870 for nname in inst.secondary_nodes:
1871 if self.all_node_info[nname].group != group:
1872 extra_lv_nodes.add(nname)
1874 unlocked_lv_nodes = \
1875 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1877 if unlocked_lv_nodes:
1878 raise errors.OpPrereqError("these nodes could be locked: %s" %
1879 utils.CommaJoin(unlocked_lv_nodes))
1880 self.extra_lv_nodes = list(extra_lv_nodes)
1882 def _VerifyNode(self, ninfo, nresult):
1883 """Perform some basic validation on data returned from a node.
1885 - check the result data structure is well formed and has all the
1887 - check ganeti version
1889 @type ninfo: L{objects.Node}
1890 @param ninfo: the node to check
1891 @param nresult: the results from the node
1893 @return: whether overall this call was successful (and we can expect
1894 reasonable values in the respose)
1898 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1900 # main result, nresult should be a non-empty dict
1901 test = not nresult or not isinstance(nresult, dict)
1902 _ErrorIf(test, constants.CV_ENODERPC, node,
1903 "unable to verify node: no data returned")
1907 # compares ganeti version
1908 local_version = constants.PROTOCOL_VERSION
1909 remote_version = nresult.get("version", None)
1910 test = not (remote_version and
1911 isinstance(remote_version, (list, tuple)) and
1912 len(remote_version) == 2)
1913 _ErrorIf(test, constants.CV_ENODERPC, node,
1914 "connection to node returned invalid data")
1918 test = local_version != remote_version[0]
1919 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1920 "incompatible protocol versions: master %s,"
1921 " node %s", local_version, remote_version[0])
1925 # node seems compatible, we can actually try to look into its results
1927 # full package version
1928 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1929 constants.CV_ENODEVERSION, node,
1930 "software version mismatch: master %s, node %s",
1931 constants.RELEASE_VERSION, remote_version[1],
1932 code=self.ETYPE_WARNING)
1934 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1935 if ninfo.vm_capable and isinstance(hyp_result, dict):
1936 for hv_name, hv_result in hyp_result.iteritems():
1937 test = hv_result is not None
1938 _ErrorIf(test, constants.CV_ENODEHV, node,
1939 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1941 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1942 if ninfo.vm_capable and isinstance(hvp_result, list):
1943 for item, hv_name, hv_result in hvp_result:
1944 _ErrorIf(True, constants.CV_ENODEHV, node,
1945 "hypervisor %s parameter verify failure (source %s): %s",
1946 hv_name, item, hv_result)
1948 test = nresult.get(constants.NV_NODESETUP,
1949 ["Missing NODESETUP results"])
1950 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1955 def _VerifyNodeTime(self, ninfo, nresult,
1956 nvinfo_starttime, nvinfo_endtime):
1957 """Check the node time.
1959 @type ninfo: L{objects.Node}
1960 @param ninfo: the node to check
1961 @param nresult: the remote results for the node
1962 @param nvinfo_starttime: the start time of the RPC call
1963 @param nvinfo_endtime: the end time of the RPC call
1967 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1969 ntime = nresult.get(constants.NV_TIME, None)
1971 ntime_merged = utils.MergeTime(ntime)
1972 except (ValueError, TypeError):
1973 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1976 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1977 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1978 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1979 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1983 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1984 "Node time diverges by at least %s from master node time",
1987 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1988 """Check the node LVM results.
1990 @type ninfo: L{objects.Node}
1991 @param ninfo: the node to check
1992 @param nresult: the remote results for the node
1993 @param vg_name: the configured VG name
2000 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2002 # checks vg existence and size > 20G
2003 vglist = nresult.get(constants.NV_VGLIST, None)
2005 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2007 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2008 constants.MIN_VG_SIZE)
2009 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2012 pvlist = nresult.get(constants.NV_PVLIST, None)
2013 test = pvlist is None
2014 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2016 # check that ':' is not present in PV names, since it's a
2017 # special character for lvcreate (denotes the range of PEs to
2019 for _, pvname, owner_vg in pvlist:
2020 test = ":" in pvname
2021 _ErrorIf(test, constants.CV_ENODELVM, node,
2022 "Invalid character ':' in PV '%s' of VG '%s'",
2025 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2026 """Check the node bridges.
2028 @type ninfo: L{objects.Node}
2029 @param ninfo: the node to check
2030 @param nresult: the remote results for the node
2031 @param bridges: the expected list of bridges
2038 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2040 missing = nresult.get(constants.NV_BRIDGES, None)
2041 test = not isinstance(missing, list)
2042 _ErrorIf(test, constants.CV_ENODENET, node,
2043 "did not return valid bridge information")
2045 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2046 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2048 def _VerifyNodeUserScripts(self, ninfo, nresult):
2049 """Check the results of user scripts presence and executability on the node
2051 @type ninfo: L{objects.Node}
2052 @param ninfo: the node to check
2053 @param nresult: the remote results for the node
2058 test = not constants.NV_USERSCRIPTS in nresult
2059 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2060 "did not return user scripts information")
2062 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2064 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2065 "user scripts not present or not executable: %s" %
2066 utils.CommaJoin(sorted(broken_scripts)))
2068 def _VerifyNodeNetwork(self, ninfo, nresult):
2069 """Check the node network connectivity results.
2071 @type ninfo: L{objects.Node}
2072 @param ninfo: the node to check
2073 @param nresult: the remote results for the node
2077 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2079 test = constants.NV_NODELIST not in nresult
2080 _ErrorIf(test, constants.CV_ENODESSH, node,
2081 "node hasn't returned node ssh connectivity data")
2083 if nresult[constants.NV_NODELIST]:
2084 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2085 _ErrorIf(True, constants.CV_ENODESSH, node,
2086 "ssh communication with node '%s': %s", a_node, a_msg)
2088 test = constants.NV_NODENETTEST not in nresult
2089 _ErrorIf(test, constants.CV_ENODENET, node,
2090 "node hasn't returned node tcp connectivity data")
2092 if nresult[constants.NV_NODENETTEST]:
2093 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2095 _ErrorIf(True, constants.CV_ENODENET, node,
2096 "tcp communication with node '%s': %s",
2097 anode, nresult[constants.NV_NODENETTEST][anode])
2099 test = constants.NV_MASTERIP not in nresult
2100 _ErrorIf(test, constants.CV_ENODENET, node,
2101 "node hasn't returned node master IP reachability data")
2103 if not nresult[constants.NV_MASTERIP]:
2104 if node == self.master_node:
2105 msg = "the master node cannot reach the master IP (not configured?)"
2107 msg = "cannot reach the master IP"
2108 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2110 def _VerifyInstance(self, instance, instanceconfig, node_image,
2112 """Verify an instance.
2114 This function checks to see if the required block devices are
2115 available on the instance's node.
2118 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119 node_current = instanceconfig.primary_node
2121 node_vol_should = {}
2122 instanceconfig.MapLVsByNode(node_vol_should)
2124 for node in node_vol_should:
2125 n_img = node_image[node]
2126 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2127 # ignore missing volumes on offline or broken nodes
2129 for volume in node_vol_should[node]:
2130 test = volume not in n_img.volumes
2131 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2132 "volume %s missing on node %s", volume, node)
2134 if instanceconfig.admin_state == constants.ADMINST_UP:
2135 pri_img = node_image[node_current]
2136 test = instance not in pri_img.instances and not pri_img.offline
2137 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2138 "instance not running on its primary node %s",
2141 diskdata = [(nname, success, status, idx)
2142 for (nname, disks) in diskstatus.items()
2143 for idx, (success, status) in enumerate(disks)]
2145 for nname, success, bdev_status, idx in diskdata:
2146 # the 'ghost node' construction in Exec() ensures that we have a
2148 snode = node_image[nname]
2149 bad_snode = snode.ghost or snode.offline
2150 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2151 not success and not bad_snode,
2152 constants.CV_EINSTANCEFAULTYDISK, instance,
2153 "couldn't retrieve status for disk/%s on %s: %s",
2154 idx, nname, bdev_status)
2155 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2156 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2157 constants.CV_EINSTANCEFAULTYDISK, instance,
2158 "disk/%s on %s is faulty", idx, nname)
2160 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2161 """Verify if there are any unknown volumes in the cluster.
2163 The .os, .swap and backup volumes are ignored. All other volumes are
2164 reported as unknown.
2166 @type reserved: L{ganeti.utils.FieldSet}
2167 @param reserved: a FieldSet of reserved volume names
2170 for node, n_img in node_image.items():
2171 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2172 # skip non-healthy nodes
2174 for volume in n_img.volumes:
2175 test = ((node not in node_vol_should or
2176 volume not in node_vol_should[node]) and
2177 not reserved.Matches(volume))
2178 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2179 "volume %s is unknown", volume)
2181 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2182 """Verify N+1 Memory Resilience.
2184 Check that if one single node dies we can still start all the
2185 instances it was primary for.
2188 cluster_info = self.cfg.GetClusterInfo()
2189 for node, n_img in node_image.items():
2190 # This code checks that every node which is now listed as
2191 # secondary has enough memory to host all instances it is
2192 # supposed to should a single other node in the cluster fail.
2193 # FIXME: not ready for failover to an arbitrary node
2194 # FIXME: does not support file-backed instances
2195 # WARNING: we currently take into account down instances as well
2196 # as up ones, considering that even if they're down someone
2197 # might want to start them even in the event of a node failure.
2199 # we're skipping offline nodes from the N+1 warning, since
2200 # most likely we don't have good memory infromation from them;
2201 # we already list instances living on such nodes, and that's
2204 #TODO(dynmem): use MINMEM for checking
2205 #TODO(dynmem): also consider ballooning out other instances
2206 for prinode, instances in n_img.sbp.items():
2208 for instance in instances:
2209 bep = cluster_info.FillBE(instance_cfg[instance])
2210 if bep[constants.BE_AUTO_BALANCE]:
2211 needed_mem += bep[constants.BE_MAXMEM]
2212 test = n_img.mfree < needed_mem
2213 self._ErrorIf(test, constants.CV_ENODEN1, node,
2214 "not enough memory to accomodate instance failovers"
2215 " should node %s fail (%dMiB needed, %dMiB available)",
2216 prinode, needed_mem, n_img.mfree)
2219 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2220 (files_all, files_opt, files_mc, files_vm)):
2221 """Verifies file checksums collected from all nodes.
2223 @param errorif: Callback for reporting errors
2224 @param nodeinfo: List of L{objects.Node} objects
2225 @param master_node: Name of master node
2226 @param all_nvinfo: RPC results
2229 # Define functions determining which nodes to consider for a file
2232 (files_mc, lambda node: (node.master_candidate or
2233 node.name == master_node)),
2234 (files_vm, lambda node: node.vm_capable),
2237 # Build mapping from filename to list of nodes which should have the file
2239 for (files, fn) in files2nodefn:
2241 filenodes = nodeinfo
2243 filenodes = filter(fn, nodeinfo)
2244 nodefiles.update((filename,
2245 frozenset(map(operator.attrgetter("name"), filenodes)))
2246 for filename in files)
2248 assert set(nodefiles) == (files_all | files_mc | files_vm)
2250 fileinfo = dict((filename, {}) for filename in nodefiles)
2251 ignore_nodes = set()
2253 for node in nodeinfo:
2255 ignore_nodes.add(node.name)
2258 nresult = all_nvinfo[node.name]
2260 if nresult.fail_msg or not nresult.payload:
2263 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2265 test = not (node_files and isinstance(node_files, dict))
2266 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2267 "Node did not return file checksum data")
2269 ignore_nodes.add(node.name)
2272 # Build per-checksum mapping from filename to nodes having it
2273 for (filename, checksum) in node_files.items():
2274 assert filename in nodefiles
2275 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2277 for (filename, checksums) in fileinfo.items():
2278 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2280 # Nodes having the file
2281 with_file = frozenset(node_name
2282 for nodes in fileinfo[filename].values()
2283 for node_name in nodes) - ignore_nodes
2285 expected_nodes = nodefiles[filename] - ignore_nodes
2287 # Nodes missing file
2288 missing_file = expected_nodes - with_file
2290 if filename in files_opt:
2292 errorif(missing_file and missing_file != expected_nodes,
2293 constants.CV_ECLUSTERFILECHECK, None,
2294 "File %s is optional, but it must exist on all or no"
2295 " nodes (not found on %s)",
2296 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2298 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2299 "File %s is missing from node(s) %s", filename,
2300 utils.CommaJoin(utils.NiceSort(missing_file)))
2302 # Warn if a node has a file it shouldn't
2303 unexpected = with_file - expected_nodes
2305 constants.CV_ECLUSTERFILECHECK, None,
2306 "File %s should not exist on node(s) %s",
2307 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2309 # See if there are multiple versions of the file
2310 test = len(checksums) > 1
2312 variants = ["variant %s on %s" %
2313 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2314 for (idx, (checksum, nodes)) in
2315 enumerate(sorted(checksums.items()))]
2319 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2320 "File %s found with %s different checksums (%s)",
2321 filename, len(checksums), "; ".join(variants))
2323 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2325 """Verifies and the node DRBD status.
2327 @type ninfo: L{objects.Node}
2328 @param ninfo: the node to check
2329 @param nresult: the remote results for the node
2330 @param instanceinfo: the dict of instances
2331 @param drbd_helper: the configured DRBD usermode helper
2332 @param drbd_map: the DRBD map as returned by
2333 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2337 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2340 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2341 test = (helper_result == None)
2342 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2343 "no drbd usermode helper returned")
2345 status, payload = helper_result
2347 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2348 "drbd usermode helper check unsuccessful: %s", payload)
2349 test = status and (payload != drbd_helper)
2350 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2351 "wrong drbd usermode helper: %s", payload)
2353 # compute the DRBD minors
2355 for minor, instance in drbd_map[node].items():
2356 test = instance not in instanceinfo
2357 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2358 "ghost instance '%s' in temporary DRBD map", instance)
2359 # ghost instance should not be running, but otherwise we
2360 # don't give double warnings (both ghost instance and
2361 # unallocated minor in use)
2363 node_drbd[minor] = (instance, False)
2365 instance = instanceinfo[instance]
2366 node_drbd[minor] = (instance.name,
2367 instance.admin_state == constants.ADMINST_UP)
2369 # and now check them
2370 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2371 test = not isinstance(used_minors, (tuple, list))
2372 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2373 "cannot parse drbd status file: %s", str(used_minors))
2375 # we cannot check drbd status
2378 for minor, (iname, must_exist) in node_drbd.items():
2379 test = minor not in used_minors and must_exist
2380 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2381 "drbd minor %d of instance %s is not active", minor, iname)
2382 for minor in used_minors:
2383 test = minor not in node_drbd
2384 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2385 "unallocated drbd minor %d is in use", minor)
2387 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2388 """Builds the node OS structures.
2390 @type ninfo: L{objects.Node}
2391 @param ninfo: the node to check
2392 @param nresult: the remote results for the node
2393 @param nimg: the node image object
2397 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2399 remote_os = nresult.get(constants.NV_OSLIST, None)
2400 test = (not isinstance(remote_os, list) or
2401 not compat.all(isinstance(v, list) and len(v) == 7
2402 for v in remote_os))
2404 _ErrorIf(test, constants.CV_ENODEOS, node,
2405 "node hasn't returned valid OS data")
2414 for (name, os_path, status, diagnose,
2415 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2417 if name not in os_dict:
2420 # parameters is a list of lists instead of list of tuples due to
2421 # JSON lacking a real tuple type, fix it:
2422 parameters = [tuple(v) for v in parameters]
2423 os_dict[name].append((os_path, status, diagnose,
2424 set(variants), set(parameters), set(api_ver)))
2426 nimg.oslist = os_dict
2428 def _VerifyNodeOS(self, ninfo, nimg, base):
2429 """Verifies the node OS list.
2431 @type ninfo: L{objects.Node}
2432 @param ninfo: the node to check
2433 @param nimg: the node image object
2434 @param base: the 'template' node we match against (e.g. from the master)
2438 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2440 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2442 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2443 for os_name, os_data in nimg.oslist.items():
2444 assert os_data, "Empty OS status for OS %s?!" % os_name
2445 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2446 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2447 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2448 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2449 "OS '%s' has multiple entries (first one shadows the rest): %s",
2450 os_name, utils.CommaJoin([v[0] for v in os_data]))
2451 # comparisons with the 'base' image
2452 test = os_name not in base.oslist
2453 _ErrorIf(test, constants.CV_ENODEOS, node,
2454 "Extra OS %s not present on reference node (%s)",
2458 assert base.oslist[os_name], "Base node has empty OS status?"
2459 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2461 # base OS is invalid, skipping
2463 for kind, a, b in [("API version", f_api, b_api),
2464 ("variants list", f_var, b_var),
2465 ("parameters", beautify_params(f_param),
2466 beautify_params(b_param))]:
2467 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2468 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2469 kind, os_name, base.name,
2470 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2472 # check any missing OSes
2473 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2474 _ErrorIf(missing, constants.CV_ENODEOS, node,
2475 "OSes present on reference node %s but missing on this node: %s",
2476 base.name, utils.CommaJoin(missing))
2478 def _VerifyOob(self, ninfo, nresult):
2479 """Verifies out of band functionality of a node.
2481 @type ninfo: L{objects.Node}
2482 @param ninfo: the node to check
2483 @param nresult: the remote results for the node
2487 # We just have to verify the paths on master and/or master candidates
2488 # as the oob helper is invoked on the master
2489 if ((ninfo.master_candidate or ninfo.master_capable) and
2490 constants.NV_OOB_PATHS in nresult):
2491 for path_result in nresult[constants.NV_OOB_PATHS]:
2492 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2494 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2495 """Verifies and updates the node volume data.
2497 This function will update a L{NodeImage}'s internal structures
2498 with data from the remote call.
2500 @type ninfo: L{objects.Node}
2501 @param ninfo: the node to check
2502 @param nresult: the remote results for the node
2503 @param nimg: the node image object
2504 @param vg_name: the configured VG name
2508 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2510 nimg.lvm_fail = True
2511 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2514 elif isinstance(lvdata, basestring):
2515 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2516 utils.SafeEncode(lvdata))
2517 elif not isinstance(lvdata, dict):
2518 _ErrorIf(True, constants.CV_ENODELVM, node,
2519 "rpc call to node failed (lvlist)")
2521 nimg.volumes = lvdata
2522 nimg.lvm_fail = False
2524 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2525 """Verifies and updates the node instance list.
2527 If the listing was successful, then updates this node's instance
2528 list. Otherwise, it marks the RPC call as failed for the instance
2531 @type ninfo: L{objects.Node}
2532 @param ninfo: the node to check
2533 @param nresult: the remote results for the node
2534 @param nimg: the node image object
2537 idata = nresult.get(constants.NV_INSTANCELIST, None)
2538 test = not isinstance(idata, list)
2539 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2540 "rpc call to node failed (instancelist): %s",
2541 utils.SafeEncode(str(idata)))
2543 nimg.hyp_fail = True
2545 nimg.instances = idata
2547 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2548 """Verifies and computes a node information map
2550 @type ninfo: L{objects.Node}
2551 @param ninfo: the node to check
2552 @param nresult: the remote results for the node
2553 @param nimg: the node image object
2554 @param vg_name: the configured VG name
2558 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2560 # try to read free memory (from the hypervisor)
2561 hv_info = nresult.get(constants.NV_HVINFO, None)
2562 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2563 _ErrorIf(test, constants.CV_ENODEHV, node,
2564 "rpc call to node failed (hvinfo)")
2567 nimg.mfree = int(hv_info["memory_free"])
2568 except (ValueError, TypeError):
2569 _ErrorIf(True, constants.CV_ENODERPC, node,
2570 "node returned invalid nodeinfo, check hypervisor")
2572 # FIXME: devise a free space model for file based instances as well
2573 if vg_name is not None:
2574 test = (constants.NV_VGLIST not in nresult or
2575 vg_name not in nresult[constants.NV_VGLIST])
2576 _ErrorIf(test, constants.CV_ENODELVM, node,
2577 "node didn't return data for the volume group '%s'"
2578 " - it is either missing or broken", vg_name)
2581 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2582 except (ValueError, TypeError):
2583 _ErrorIf(True, constants.CV_ENODERPC, node,
2584 "node returned invalid LVM info, check LVM status")
2586 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2587 """Gets per-disk status information for all instances.
2589 @type nodelist: list of strings
2590 @param nodelist: Node names
2591 @type node_image: dict of (name, L{objects.Node})
2592 @param node_image: Node objects
2593 @type instanceinfo: dict of (name, L{objects.Instance})
2594 @param instanceinfo: Instance objects
2595 @rtype: {instance: {node: [(succes, payload)]}}
2596 @return: a dictionary of per-instance dictionaries with nodes as
2597 keys and disk information as values; the disk information is a
2598 list of tuples (success, payload)
2601 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2604 node_disks_devonly = {}
2605 diskless_instances = set()
2606 diskless = constants.DT_DISKLESS
2608 for nname in nodelist:
2609 node_instances = list(itertools.chain(node_image[nname].pinst,
2610 node_image[nname].sinst))
2611 diskless_instances.update(inst for inst in node_instances
2612 if instanceinfo[inst].disk_template == diskless)
2613 disks = [(inst, disk)
2614 for inst in node_instances
2615 for disk in instanceinfo[inst].disks]
2618 # No need to collect data
2621 node_disks[nname] = disks
2623 # Creating copies as SetDiskID below will modify the objects and that can
2624 # lead to incorrect data returned from nodes
2625 devonly = [dev.Copy() for (_, dev) in disks]
2628 self.cfg.SetDiskID(dev, nname)
2630 node_disks_devonly[nname] = devonly
2632 assert len(node_disks) == len(node_disks_devonly)
2634 # Collect data from all nodes with disks
2635 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2638 assert len(result) == len(node_disks)
2642 for (nname, nres) in result.items():
2643 disks = node_disks[nname]
2646 # No data from this node
2647 data = len(disks) * [(False, "node offline")]
2650 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2651 "while getting disk information: %s", msg)
2653 # No data from this node
2654 data = len(disks) * [(False, msg)]
2657 for idx, i in enumerate(nres.payload):
2658 if isinstance(i, (tuple, list)) and len(i) == 2:
2661 logging.warning("Invalid result from node %s, entry %d: %s",
2663 data.append((False, "Invalid result from the remote node"))
2665 for ((inst, _), status) in zip(disks, data):
2666 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2668 # Add empty entries for diskless instances.
2669 for inst in diskless_instances:
2670 assert inst not in instdisk
2673 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2674 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2675 compat.all(isinstance(s, (tuple, list)) and
2676 len(s) == 2 for s in statuses)
2677 for inst, nnames in instdisk.items()
2678 for nname, statuses in nnames.items())
2679 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2684 def _SshNodeSelector(group_uuid, all_nodes):
2685 """Create endless iterators for all potential SSH check hosts.
2688 nodes = [node for node in all_nodes
2689 if (node.group != group_uuid and
2691 keyfunc = operator.attrgetter("group")
2693 return map(itertools.cycle,
2694 [sorted(map(operator.attrgetter("name"), names))
2695 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2699 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2700 """Choose which nodes should talk to which other nodes.
2702 We will make nodes contact all nodes in their group, and one node from
2705 @warning: This algorithm has a known issue if one node group is much
2706 smaller than others (e.g. just one node). In such a case all other
2707 nodes will talk to the single node.
2710 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2711 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2713 return (online_nodes,
2714 dict((name, sorted([i.next() for i in sel]))
2715 for name in online_nodes))
2717 def BuildHooksEnv(self):
2720 Cluster-Verify hooks just ran in the post phase and their failure makes
2721 the output be logged in the verify output and the verification to fail.
2725 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2728 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2729 for node in self.my_node_info.values())
2733 def BuildHooksNodes(self):
2734 """Build hooks nodes.
2737 return ([], self.my_node_names)
2739 def Exec(self, feedback_fn):
2740 """Verify integrity of the node group, performing various test on nodes.
2743 # This method has too many local variables. pylint: disable=R0914
2744 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2746 if not self.my_node_names:
2748 feedback_fn("* Empty node group, skipping verification")
2752 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2753 verbose = self.op.verbose
2754 self._feedback_fn = feedback_fn
2756 vg_name = self.cfg.GetVGName()
2757 drbd_helper = self.cfg.GetDRBDHelper()
2758 cluster = self.cfg.GetClusterInfo()
2759 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2760 hypervisors = cluster.enabled_hypervisors
2761 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2763 i_non_redundant = [] # Non redundant instances
2764 i_non_a_balanced = [] # Non auto-balanced instances
2765 i_offline = 0 # Count of offline instances
2766 n_offline = 0 # Count of offline nodes
2767 n_drained = 0 # Count of nodes being drained
2768 node_vol_should = {}
2770 # FIXME: verify OS list
2773 filemap = _ComputeAncillaryFiles(cluster, False)
2775 # do local checksums
2776 master_node = self.master_node = self.cfg.GetMasterNode()
2777 master_ip = self.cfg.GetMasterIP()
2779 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2782 if self.cfg.GetUseExternalMipScript():
2783 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2785 node_verify_param = {
2786 constants.NV_FILELIST:
2787 utils.UniqueSequence(filename
2788 for files in filemap
2789 for filename in files),
2790 constants.NV_NODELIST:
2791 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2792 self.all_node_info.values()),
2793 constants.NV_HYPERVISOR: hypervisors,
2794 constants.NV_HVPARAMS:
2795 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2796 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2797 for node in node_data_list
2798 if not node.offline],
2799 constants.NV_INSTANCELIST: hypervisors,
2800 constants.NV_VERSION: None,
2801 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2802 constants.NV_NODESETUP: None,
2803 constants.NV_TIME: None,
2804 constants.NV_MASTERIP: (master_node, master_ip),
2805 constants.NV_OSLIST: None,
2806 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2807 constants.NV_USERSCRIPTS: user_scripts,
2810 if vg_name is not None:
2811 node_verify_param[constants.NV_VGLIST] = None
2812 node_verify_param[constants.NV_LVLIST] = vg_name
2813 node_verify_param[constants.NV_PVLIST] = [vg_name]
2814 node_verify_param[constants.NV_DRBDLIST] = None
2817 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2820 # FIXME: this needs to be changed per node-group, not cluster-wide
2822 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2823 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2824 bridges.add(default_nicpp[constants.NIC_LINK])
2825 for instance in self.my_inst_info.values():
2826 for nic in instance.nics:
2827 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2828 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2829 bridges.add(full_nic[constants.NIC_LINK])
2832 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2834 # Build our expected cluster state
2835 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2837 vm_capable=node.vm_capable))
2838 for node in node_data_list)
2842 for node in self.all_node_info.values():
2843 path = _SupportsOob(self.cfg, node)
2844 if path and path not in oob_paths:
2845 oob_paths.append(path)
2848 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2850 for instance in self.my_inst_names:
2851 inst_config = self.my_inst_info[instance]
2853 for nname in inst_config.all_nodes:
2854 if nname not in node_image:
2855 gnode = self.NodeImage(name=nname)
2856 gnode.ghost = (nname not in self.all_node_info)
2857 node_image[nname] = gnode
2859 inst_config.MapLVsByNode(node_vol_should)
2861 pnode = inst_config.primary_node
2862 node_image[pnode].pinst.append(instance)
2864 for snode in inst_config.secondary_nodes:
2865 nimg = node_image[snode]
2866 nimg.sinst.append(instance)
2867 if pnode not in nimg.sbp:
2868 nimg.sbp[pnode] = []
2869 nimg.sbp[pnode].append(instance)
2871 # At this point, we have the in-memory data structures complete,
2872 # except for the runtime information, which we'll gather next
2874 # Due to the way our RPC system works, exact response times cannot be
2875 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2876 # time before and after executing the request, we can at least have a time
2878 nvinfo_starttime = time.time()
2879 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2881 self.cfg.GetClusterName())
2882 nvinfo_endtime = time.time()
2884 if self.extra_lv_nodes and vg_name is not None:
2886 self.rpc.call_node_verify(self.extra_lv_nodes,
2887 {constants.NV_LVLIST: vg_name},
2888 self.cfg.GetClusterName())
2890 extra_lv_nvinfo = {}
2892 all_drbd_map = self.cfg.ComputeDRBDMap()
2894 feedback_fn("* Gathering disk information (%s nodes)" %
2895 len(self.my_node_names))
2896 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2899 feedback_fn("* Verifying configuration file consistency")
2901 # If not all nodes are being checked, we need to make sure the master node
2902 # and a non-checked vm_capable node are in the list.
2903 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2905 vf_nvinfo = all_nvinfo.copy()
2906 vf_node_info = list(self.my_node_info.values())
2907 additional_nodes = []
2908 if master_node not in self.my_node_info:
2909 additional_nodes.append(master_node)
2910 vf_node_info.append(self.all_node_info[master_node])
2911 # Add the first vm_capable node we find which is not included
2912 for node in absent_nodes:
2913 nodeinfo = self.all_node_info[node]
2914 if nodeinfo.vm_capable and not nodeinfo.offline:
2915 additional_nodes.append(node)
2916 vf_node_info.append(self.all_node_info[node])
2918 key = constants.NV_FILELIST
2919 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2920 {key: node_verify_param[key]},
2921 self.cfg.GetClusterName()))
2923 vf_nvinfo = all_nvinfo
2924 vf_node_info = self.my_node_info.values()
2926 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2928 feedback_fn("* Verifying node status")
2932 for node_i in node_data_list:
2934 nimg = node_image[node]
2938 feedback_fn("* Skipping offline node %s" % (node,))
2942 if node == master_node:
2944 elif node_i.master_candidate:
2945 ntype = "master candidate"
2946 elif node_i.drained:
2952 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2954 msg = all_nvinfo[node].fail_msg
2955 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2958 nimg.rpc_fail = True
2961 nresult = all_nvinfo[node].payload
2963 nimg.call_ok = self._VerifyNode(node_i, nresult)
2964 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2965 self._VerifyNodeNetwork(node_i, nresult)
2966 self._VerifyNodeUserScripts(node_i, nresult)
2967 self._VerifyOob(node_i, nresult)
2970 self._VerifyNodeLVM(node_i, nresult, vg_name)
2971 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2974 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2975 self._UpdateNodeInstances(node_i, nresult, nimg)
2976 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2977 self._UpdateNodeOS(node_i, nresult, nimg)
2979 if not nimg.os_fail:
2980 if refos_img is None:
2982 self._VerifyNodeOS(node_i, nimg, refos_img)
2983 self._VerifyNodeBridges(node_i, nresult, bridges)
2985 # Check whether all running instancies are primary for the node. (This
2986 # can no longer be done from _VerifyInstance below, since some of the
2987 # wrong instances could be from other node groups.)
2988 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2990 for inst in non_primary_inst:
2991 # FIXME: investigate best way to handle offline insts
2992 if inst.admin_state == constants.ADMINST_OFFLINE:
2994 feedback_fn("* Skipping offline instance %s" % inst.name)
2997 test = inst in self.all_inst_info
2998 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2999 "instance should not run on node %s", node_i.name)
3000 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3001 "node is running unknown instance %s", inst)
3003 for node, result in extra_lv_nvinfo.items():
3004 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3005 node_image[node], vg_name)
3007 feedback_fn("* Verifying instance status")
3008 for instance in self.my_inst_names:
3010 feedback_fn("* Verifying instance %s" % instance)
3011 inst_config = self.my_inst_info[instance]
3012 self._VerifyInstance(instance, inst_config, node_image,
3014 inst_nodes_offline = []
3016 pnode = inst_config.primary_node
3017 pnode_img = node_image[pnode]
3018 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3019 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3020 " primary node failed", instance)
3022 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3024 constants.CV_EINSTANCEBADNODE, instance,
3025 "instance is marked as running and lives on offline node %s",
3026 inst_config.primary_node)
3028 # If the instance is non-redundant we cannot survive losing its primary
3029 # node, so we are not N+1 compliant. On the other hand we have no disk
3030 # templates with more than one secondary so that situation is not well
3032 # FIXME: does not support file-backed instances
3033 if not inst_config.secondary_nodes:
3034 i_non_redundant.append(instance)
3036 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3037 constants.CV_EINSTANCELAYOUT,
3038 instance, "instance has multiple secondary nodes: %s",
3039 utils.CommaJoin(inst_config.secondary_nodes),
3040 code=self.ETYPE_WARNING)
3042 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3043 pnode = inst_config.primary_node
3044 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3045 instance_groups = {}
3047 for node in instance_nodes:
3048 instance_groups.setdefault(self.all_node_info[node].group,
3052 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3053 # Sort so that we always list the primary node first.
3054 for group, nodes in sorted(instance_groups.items(),
3055 key=lambda (_, nodes): pnode in nodes,
3058 self._ErrorIf(len(instance_groups) > 1,
3059 constants.CV_EINSTANCESPLITGROUPS,
3060 instance, "instance has primary and secondary nodes in"
3061 " different groups: %s", utils.CommaJoin(pretty_list),
3062 code=self.ETYPE_WARNING)
3064 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3065 i_non_a_balanced.append(instance)
3067 for snode in inst_config.secondary_nodes:
3068 s_img = node_image[snode]
3069 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3070 snode, "instance %s, connection to secondary node failed",
3074 inst_nodes_offline.append(snode)
3076 # warn that the instance lives on offline nodes
3077 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3078 "instance has offline secondary node(s) %s",
3079 utils.CommaJoin(inst_nodes_offline))
3080 # ... or ghost/non-vm_capable nodes
3081 for node in inst_config.all_nodes:
3082 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3083 instance, "instance lives on ghost node %s", node)
3084 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3085 instance, "instance lives on non-vm_capable node %s", node)
3087 feedback_fn("* Verifying orphan volumes")
3088 reserved = utils.FieldSet(*cluster.reserved_lvs)
3090 # We will get spurious "unknown volume" warnings if any node of this group
3091 # is secondary for an instance whose primary is in another group. To avoid
3092 # them, we find these instances and add their volumes to node_vol_should.
3093 for inst in self.all_inst_info.values():
3094 for secondary in inst.secondary_nodes:
3095 if (secondary in self.my_node_info
3096 and inst.name not in self.my_inst_info):
3097 inst.MapLVsByNode(node_vol_should)
3100 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3102 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3103 feedback_fn("* Verifying N+1 Memory redundancy")
3104 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3106 feedback_fn("* Other Notes")
3108 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3109 % len(i_non_redundant))
3111 if i_non_a_balanced:
3112 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3113 % len(i_non_a_balanced))
3116 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3119 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3122 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3126 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3127 """Analyze the post-hooks' result
3129 This method analyses the hook result, handles it, and sends some
3130 nicely-formatted feedback back to the user.
3132 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3133 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3134 @param hooks_results: the results of the multi-node hooks rpc call
3135 @param feedback_fn: function used send feedback back to the caller
3136 @param lu_result: previous Exec result
3137 @return: the new Exec result, based on the previous result
3141 # We only really run POST phase hooks, only for non-empty groups,
3142 # and are only interested in their results
3143 if not self.my_node_names:
3146 elif phase == constants.HOOKS_PHASE_POST:
3147 # Used to change hooks' output to proper indentation
3148 feedback_fn("* Hooks Results")
3149 assert hooks_results, "invalid result from hooks"
3151 for node_name in hooks_results:
3152 res = hooks_results[node_name]
3154 test = msg and not res.offline
3155 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3156 "Communication failure in hooks execution: %s", msg)
3157 if res.offline or msg:
3158 # No need to investigate payload if node is offline or gave
3161 for script, hkr, output in res.payload:
3162 test = hkr == constants.HKR_FAIL
3163 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3164 "Script %s failed, output:", script)
3166 output = self._HOOKS_INDENT_RE.sub(" ", output)
3167 feedback_fn("%s" % output)
3173 class LUClusterVerifyDisks(NoHooksLU):
3174 """Verifies the cluster disks status.
3179 def ExpandNames(self):
3180 self.share_locks = _ShareAll()
3181 self.needed_locks = {
3182 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3185 def Exec(self, feedback_fn):
3186 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3188 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3189 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3190 for group in group_names])
3193 class LUGroupVerifyDisks(NoHooksLU):
3194 """Verifies the status of all disks in a node group.
3199 def ExpandNames(self):
3200 # Raises errors.OpPrereqError on its own if group can't be found
3201 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3203 self.share_locks = _ShareAll()
3204 self.needed_locks = {
3205 locking.LEVEL_INSTANCE: [],
3206 locking.LEVEL_NODEGROUP: [],
3207 locking.LEVEL_NODE: [],
3210 def DeclareLocks(self, level):
3211 if level == locking.LEVEL_INSTANCE:
3212 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3214 # Lock instances optimistically, needs verification once node and group
3215 # locks have been acquired
3216 self.needed_locks[locking.LEVEL_INSTANCE] = \
3217 self.cfg.GetNodeGroupInstances(self.group_uuid)
3219 elif level == locking.LEVEL_NODEGROUP:
3220 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3222 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3223 set([self.group_uuid] +
3224 # Lock all groups used by instances optimistically; this requires
3225 # going via the node before it's locked, requiring verification
3228 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3229 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3231 elif level == locking.LEVEL_NODE:
3232 # This will only lock the nodes in the group to be verified which contain
3234 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3235 self._LockInstancesNodes()
3237 # Lock all nodes in group to be verified
3238 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3239 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3240 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3242 def CheckPrereq(self):
3243 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3244 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3245 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3247 assert self.group_uuid in owned_groups
3249 # Check if locked instances are still correct
3250 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3252 # Get instance information
3253 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3255 # Check if node groups for locked instances are still correct
3256 for (instance_name, inst) in self.instances.items():
3257 assert owned_nodes.issuperset(inst.all_nodes), \
3258 "Instance %s's nodes changed while we kept the lock" % instance_name
3260 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3263 assert self.group_uuid in inst_groups, \
3264 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3266 def Exec(self, feedback_fn):
3267 """Verify integrity of cluster disks.
3269 @rtype: tuple of three items
3270 @return: a tuple of (dict of node-to-node_error, list of instances
3271 which need activate-disks, dict of instance: (node, volume) for
3276 res_instances = set()
3279 nv_dict = _MapInstanceDisksToNodes([inst
3280 for inst in self.instances.values()
3281 if inst.admin_state == constants.ADMINST_UP])
3284 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3285 set(self.cfg.GetVmCapableNodeList()))
3287 node_lvs = self.rpc.call_lv_list(nodes, [])
3289 for (node, node_res) in node_lvs.items():
3290 if node_res.offline:
3293 msg = node_res.fail_msg
3295 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3296 res_nodes[node] = msg
3299 for lv_name, (_, _, lv_online) in node_res.payload.items():
3300 inst = nv_dict.pop((node, lv_name), None)
3301 if not (lv_online or inst is None):
3302 res_instances.add(inst)
3304 # any leftover items in nv_dict are missing LVs, let's arrange the data
3306 for key, inst in nv_dict.iteritems():
3307 res_missing.setdefault(inst, []).append(list(key))
3309 return (res_nodes, list(res_instances), res_missing)
3312 class LUClusterRepairDiskSizes(NoHooksLU):
3313 """Verifies the cluster disks sizes.
3318 def ExpandNames(self):
3319 if self.op.instances:
3320 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3321 self.needed_locks = {
3322 locking.LEVEL_NODE_RES: [],
3323 locking.LEVEL_INSTANCE: self.wanted_names,
3325 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3327 self.wanted_names = None
3328 self.needed_locks = {
3329 locking.LEVEL_NODE_RES: locking.ALL_SET,
3330 locking.LEVEL_INSTANCE: locking.ALL_SET,
3332 self.share_locks = {
3333 locking.LEVEL_NODE_RES: 1,
3334 locking.LEVEL_INSTANCE: 0,
3337 def DeclareLocks(self, level):
3338 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3339 self._LockInstancesNodes(primary_only=True, level=level)
3341 def CheckPrereq(self):
3342 """Check prerequisites.
3344 This only checks the optional instance list against the existing names.
3347 if self.wanted_names is None:
3348 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3350 self.wanted_instances = \
3351 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3353 def _EnsureChildSizes(self, disk):
3354 """Ensure children of the disk have the needed disk size.
3356 This is valid mainly for DRBD8 and fixes an issue where the
3357 children have smaller disk size.
3359 @param disk: an L{ganeti.objects.Disk} object
3362 if disk.dev_type == constants.LD_DRBD8:
3363 assert disk.children, "Empty children for DRBD8?"
3364 fchild = disk.children[0]
3365 mismatch = fchild.size < disk.size
3367 self.LogInfo("Child disk has size %d, parent %d, fixing",
3368 fchild.size, disk.size)
3369 fchild.size = disk.size
3371 # and we recurse on this child only, not on the metadev
3372 return self._EnsureChildSizes(fchild) or mismatch
3376 def Exec(self, feedback_fn):
3377 """Verify the size of cluster disks.
3380 # TODO: check child disks too
3381 # TODO: check differences in size between primary/secondary nodes
3383 for instance in self.wanted_instances:
3384 pnode = instance.primary_node
3385 if pnode not in per_node_disks:
3386 per_node_disks[pnode] = []
3387 for idx, disk in enumerate(instance.disks):
3388 per_node_disks[pnode].append((instance, idx, disk))
3390 assert not (frozenset(per_node_disks.keys()) -
3391 self.owned_locks(locking.LEVEL_NODE_RES)), \
3392 "Not owning correct locks"
3393 assert not self.owned_locks(locking.LEVEL_NODE)
3396 for node, dskl in per_node_disks.items():
3397 newl = [v[2].Copy() for v in dskl]
3399 self.cfg.SetDiskID(dsk, node)
3400 result = self.rpc.call_blockdev_getsize(node, newl)
3402 self.LogWarning("Failure in blockdev_getsize call to node"
3403 " %s, ignoring", node)
3405 if len(result.payload) != len(dskl):
3406 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3407 " result.payload=%s", node, len(dskl), result.payload)
3408 self.LogWarning("Invalid result from node %s, ignoring node results",
3411 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3413 self.LogWarning("Disk %d of instance %s did not return size"
3414 " information, ignoring", idx, instance.name)
3416 if not isinstance(size, (int, long)):
3417 self.LogWarning("Disk %d of instance %s did not return valid"
3418 " size information, ignoring", idx, instance.name)
3421 if size != disk.size:
3422 self.LogInfo("Disk %d of instance %s has mismatched size,"
3423 " correcting: recorded %d, actual %d", idx,
3424 instance.name, disk.size, size)
3426 self.cfg.Update(instance, feedback_fn)
3427 changed.append((instance.name, idx, size))
3428 if self._EnsureChildSizes(disk):
3429 self.cfg.Update(instance, feedback_fn)
3430 changed.append((instance.name, idx, disk.size))
3434 class LUClusterRename(LogicalUnit):
3435 """Rename the cluster.
3438 HPATH = "cluster-rename"
3439 HTYPE = constants.HTYPE_CLUSTER
3441 def BuildHooksEnv(self):
3446 "OP_TARGET": self.cfg.GetClusterName(),
3447 "NEW_NAME": self.op.name,
3450 def BuildHooksNodes(self):
3451 """Build hooks nodes.
3454 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3456 def CheckPrereq(self):
3457 """Verify that the passed name is a valid one.
3460 hostname = netutils.GetHostname(name=self.op.name,
3461 family=self.cfg.GetPrimaryIPFamily())
3463 new_name = hostname.name
3464 self.ip = new_ip = hostname.ip
3465 old_name = self.cfg.GetClusterName()
3466 old_ip = self.cfg.GetMasterIP()
3467 if new_name == old_name and new_ip == old_ip:
3468 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3469 " cluster has changed",
3471 if new_ip != old_ip:
3472 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3473 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3474 " reachable on the network" %
3475 new_ip, errors.ECODE_NOTUNIQUE)
3477 self.op.name = new_name
3479 def Exec(self, feedback_fn):
3480 """Rename the cluster.
3483 clustername = self.op.name
3486 # shutdown the master IP
3487 master_params = self.cfg.GetMasterNetworkParameters()
3488 ems = self.cfg.GetUseExternalMipScript()
3489 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3491 result.Raise("Could not disable the master role")
3494 cluster = self.cfg.GetClusterInfo()
3495 cluster.cluster_name = clustername
3496 cluster.master_ip = new_ip
3497 self.cfg.Update(cluster, feedback_fn)
3499 # update the known hosts file
3500 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3501 node_list = self.cfg.GetOnlineNodeList()
3503 node_list.remove(master_params.name)
3506 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3508 master_params.ip = new_ip
3509 result = self.rpc.call_node_activate_master_ip(master_params.name,
3511 msg = result.fail_msg
3513 self.LogWarning("Could not re-enable the master role on"
3514 " the master, please restart manually: %s", msg)
3519 def _ValidateNetmask(cfg, netmask):
3520 """Checks if a netmask is valid.
3522 @type cfg: L{config.ConfigWriter}
3523 @param cfg: The cluster configuration
3525 @param netmask: the netmask to be verified
3526 @raise errors.OpPrereqError: if the validation fails
3529 ip_family = cfg.GetPrimaryIPFamily()
3531 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3532 except errors.ProgrammerError:
3533 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3535 if not ipcls.ValidateNetmask(netmask):
3536 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3540 class LUClusterSetParams(LogicalUnit):
3541 """Change the parameters of the cluster.
3544 HPATH = "cluster-modify"
3545 HTYPE = constants.HTYPE_CLUSTER
3548 def CheckArguments(self):
3552 if self.op.uid_pool:
3553 uidpool.CheckUidPool(self.op.uid_pool)
3555 if self.op.add_uids:
3556 uidpool.CheckUidPool(self.op.add_uids)
3558 if self.op.remove_uids:
3559 uidpool.CheckUidPool(self.op.remove_uids)
3561 if self.op.master_netmask is not None:
3562 _ValidateNetmask(self.cfg, self.op.master_netmask)
3564 if self.op.diskparams:
3565 for dt_params in self.op.diskparams.values():
3566 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3568 def ExpandNames(self):
3569 # FIXME: in the future maybe other cluster params won't require checking on
3570 # all nodes to be modified.
3571 self.needed_locks = {
3572 locking.LEVEL_NODE: locking.ALL_SET,
3574 self.share_locks[locking.LEVEL_NODE] = 1
3576 def BuildHooksEnv(self):
3581 "OP_TARGET": self.cfg.GetClusterName(),
3582 "NEW_VG_NAME": self.op.vg_name,
3585 def BuildHooksNodes(self):
3586 """Build hooks nodes.
3589 mn = self.cfg.GetMasterNode()
3592 def CheckPrereq(self):
3593 """Check prerequisites.
3595 This checks whether the given params don't conflict and
3596 if the given volume group is valid.
3599 if self.op.vg_name is not None and not self.op.vg_name:
3600 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3601 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3602 " instances exist", errors.ECODE_INVAL)
3604 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3605 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3606 raise errors.OpPrereqError("Cannot disable drbd helper while"
3607 " drbd-based instances exist",
3610 node_list = self.owned_locks(locking.LEVEL_NODE)
3612 # if vg_name not None, checks given volume group on all nodes
3614 vglist = self.rpc.call_vg_list(node_list)
3615 for node in node_list:
3616 msg = vglist[node].fail_msg
3618 # ignoring down node
3619 self.LogWarning("Error while gathering data on node %s"
3620 " (ignoring node): %s", node, msg)
3622 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3624 constants.MIN_VG_SIZE)
3626 raise errors.OpPrereqError("Error on node '%s': %s" %
3627 (node, vgstatus), errors.ECODE_ENVIRON)
3629 if self.op.drbd_helper:
3630 # checks given drbd helper on all nodes
3631 helpers = self.rpc.call_drbd_helper(node_list)
3632 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3634 self.LogInfo("Not checking drbd helper on offline node %s", node)
3636 msg = helpers[node].fail_msg
3638 raise errors.OpPrereqError("Error checking drbd helper on node"
3639 " '%s': %s" % (node, msg),
3640 errors.ECODE_ENVIRON)
3641 node_helper = helpers[node].payload
3642 if node_helper != self.op.drbd_helper:
3643 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3644 (node, node_helper), errors.ECODE_ENVIRON)
3646 self.cluster = cluster = self.cfg.GetClusterInfo()
3647 # validate params changes
3648 if self.op.beparams:
3649 objects.UpgradeBeParams(self.op.beparams)
3650 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3651 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3653 if self.op.ndparams:
3654 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3655 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3657 # TODO: we need a more general way to handle resetting
3658 # cluster-level parameters to default values
3659 if self.new_ndparams["oob_program"] == "":
3660 self.new_ndparams["oob_program"] = \
3661 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3663 if self.op.hv_state:
3664 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3665 self.cluster.hv_state_static)
3666 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3667 for hv, values in new_hv_state.items())
3669 if self.op.disk_state:
3670 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3671 self.cluster.disk_state_static)
3672 self.new_disk_state = \
3673 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3674 for name, values in svalues.items()))
3675 for storage, svalues in new_disk_state.items())
3679 for key, value in self.op.ipolicy.items():
3680 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3681 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3683 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3684 self.new_ipolicy = ipolicy
3686 if self.op.nicparams:
3687 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3688 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3689 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3692 # check all instances for consistency
3693 for instance in self.cfg.GetAllInstancesInfo().values():
3694 for nic_idx, nic in enumerate(instance.nics):
3695 params_copy = copy.deepcopy(nic.nicparams)
3696 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3698 # check parameter syntax
3700 objects.NIC.CheckParameterSyntax(params_filled)
3701 except errors.ConfigurationError, err:
3702 nic_errors.append("Instance %s, nic/%d: %s" %
3703 (instance.name, nic_idx, err))
3705 # if we're moving instances to routed, check that they have an ip
3706 target_mode = params_filled[constants.NIC_MODE]
3707 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3708 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3709 " address" % (instance.name, nic_idx))
3711 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3712 "\n".join(nic_errors))
3714 # hypervisor list/parameters
3715 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3716 if self.op.hvparams:
3717 for hv_name, hv_dict in self.op.hvparams.items():
3718 if hv_name not in self.new_hvparams:
3719 self.new_hvparams[hv_name] = hv_dict
3721 self.new_hvparams[hv_name].update(hv_dict)
3723 # disk template parameters
3724 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3725 if self.op.diskparams:
3726 for dt_name, dt_params in self.op.diskparams.items():
3727 if dt_name not in self.op.diskparams:
3728 self.new_diskparams[dt_name] = dt_params
3730 self.new_diskparams[dt_name].update(dt_params)
3732 # os hypervisor parameters
3733 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3735 for os_name, hvs in self.op.os_hvp.items():
3736 if os_name not in self.new_os_hvp:
3737 self.new_os_hvp[os_name] = hvs
3739 for hv_name, hv_dict in hvs.items():
3740 if hv_name not in self.new_os_hvp[os_name]:
3741 self.new_os_hvp[os_name][hv_name] = hv_dict
3743 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3746 self.new_osp = objects.FillDict(cluster.osparams, {})
3747 if self.op.osparams:
3748 for os_name, osp in self.op.osparams.items():
3749 if os_name not in self.new_osp:
3750 self.new_osp[os_name] = {}
3752 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3755 if not self.new_osp[os_name]:
3756 # we removed all parameters
3757 del self.new_osp[os_name]
3759 # check the parameter validity (remote check)
3760 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3761 os_name, self.new_osp[os_name])
3763 # changes to the hypervisor list
3764 if self.op.enabled_hypervisors is not None:
3765 self.hv_list = self.op.enabled_hypervisors
3766 for hv in self.hv_list:
3767 # if the hypervisor doesn't already exist in the cluster
3768 # hvparams, we initialize it to empty, and then (in both
3769 # cases) we make sure to fill the defaults, as we might not
3770 # have a complete defaults list if the hypervisor wasn't
3772 if hv not in new_hvp:
3774 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3775 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3777 self.hv_list = cluster.enabled_hypervisors
3779 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3780 # either the enabled list has changed, or the parameters have, validate
3781 for hv_name, hv_params in self.new_hvparams.items():
3782 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3783 (self.op.enabled_hypervisors and
3784 hv_name in self.op.enabled_hypervisors)):
3785 # either this is a new hypervisor, or its parameters have changed
3786 hv_class = hypervisor.GetHypervisor(hv_name)
3787 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3788 hv_class.CheckParameterSyntax(hv_params)
3789 _CheckHVParams(self, node_list, hv_name, hv_params)
3792 # no need to check any newly-enabled hypervisors, since the
3793 # defaults have already been checked in the above code-block
3794 for os_name, os_hvp in self.new_os_hvp.items():
3795 for hv_name, hv_params in os_hvp.items():
3796 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3797 # we need to fill in the new os_hvp on top of the actual hv_p
3798 cluster_defaults = self.new_hvparams.get(hv_name, {})
3799 new_osp = objects.FillDict(cluster_defaults, hv_params)
3800 hv_class = hypervisor.GetHypervisor(hv_name)
3801 hv_class.CheckParameterSyntax(new_osp)
3802 _CheckHVParams(self, node_list, hv_name, new_osp)
3804 if self.op.default_iallocator:
3805 alloc_script = utils.FindFile(self.op.default_iallocator,
3806 constants.IALLOCATOR_SEARCH_PATH,
3808 if alloc_script is None:
3809 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3810 " specified" % self.op.default_iallocator,
3813 def Exec(self, feedback_fn):
3814 """Change the parameters of the cluster.
3817 if self.op.vg_name is not None:
3818 new_volume = self.op.vg_name
3821 if new_volume != self.cfg.GetVGName():
3822 self.cfg.SetVGName(new_volume)
3824 feedback_fn("Cluster LVM configuration already in desired"
3825 " state, not changing")
3826 if self.op.drbd_helper is not None:
3827 new_helper = self.op.drbd_helper
3830 if new_helper != self.cfg.GetDRBDHelper():
3831 self.cfg.SetDRBDHelper(new_helper)
3833 feedback_fn("Cluster DRBD helper already in desired state,"
3835 if self.op.hvparams:
3836 self.cluster.hvparams = self.new_hvparams
3838 self.cluster.os_hvp = self.new_os_hvp
3839 if self.op.enabled_hypervisors is not None:
3840 self.cluster.hvparams = self.new_hvparams
3841 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3842 if self.op.beparams:
3843 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3844 if self.op.nicparams:
3845 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3847 self.cluster.ipolicy = self.new_ipolicy
3848 if self.op.osparams:
3849 self.cluster.osparams = self.new_osp
3850 if self.op.ndparams:
3851 self.cluster.ndparams = self.new_ndparams
3852 if self.op.diskparams:
3853 self.cluster.diskparams = self.new_diskparams
3854 if self.op.hv_state:
3855 self.cluster.hv_state_static = self.new_hv_state
3856 if self.op.disk_state:
3857 self.cluster.disk_state_static = self.new_disk_state
3859 if self.op.candidate_pool_size is not None:
3860 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3861 # we need to update the pool size here, otherwise the save will fail
3862 _AdjustCandidatePool(self, [])
3864 if self.op.maintain_node_health is not None:
3865 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3866 feedback_fn("Note: CONFD was disabled at build time, node health"
3867 " maintenance is not useful (still enabling it)")
3868 self.cluster.maintain_node_health = self.op.maintain_node_health
3870 if self.op.prealloc_wipe_disks is not None:
3871 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3873 if self.op.add_uids is not None:
3874 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3876 if self.op.remove_uids is not None:
3877 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3879 if self.op.uid_pool is not None:
3880 self.cluster.uid_pool = self.op.uid_pool
3882 if self.op.default_iallocator is not None:
3883 self.cluster.default_iallocator = self.op.default_iallocator
3885 if self.op.reserved_lvs is not None:
3886 self.cluster.reserved_lvs = self.op.reserved_lvs
3888 if self.op.use_external_mip_script is not None:
3889 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3891 def helper_os(aname, mods, desc):
3893 lst = getattr(self.cluster, aname)
3894 for key, val in mods:
3895 if key == constants.DDM_ADD:
3897 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3900 elif key == constants.DDM_REMOVE:
3904 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3906 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3908 if self.op.hidden_os:
3909 helper_os("hidden_os", self.op.hidden_os, "hidden")
3911 if self.op.blacklisted_os:
3912 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3914 if self.op.master_netdev:
3915 master_params = self.cfg.GetMasterNetworkParameters()
3916 ems = self.cfg.GetUseExternalMipScript()
3917 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3918 self.cluster.master_netdev)
3919 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3921 result.Raise("Could not disable the master ip")
3922 feedback_fn("Changing master_netdev from %s to %s" %
3923 (master_params.netdev, self.op.master_netdev))
3924 self.cluster.master_netdev = self.op.master_netdev
3926 if self.op.master_netmask:
3927 master_params = self.cfg.GetMasterNetworkParameters()
3928 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3929 result = self.rpc.call_node_change_master_netmask(master_params.name,
3930 master_params.netmask,
3931 self.op.master_netmask,
3933 master_params.netdev)
3935 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3938 self.cluster.master_netmask = self.op.master_netmask
3940 self.cfg.Update(self.cluster, feedback_fn)
3942 if self.op.master_netdev:
3943 master_params = self.cfg.GetMasterNetworkParameters()
3944 feedback_fn("Starting the master ip on the new master netdev (%s)" %
3945 self.op.master_netdev)
3946 ems = self.cfg.GetUseExternalMipScript()
3947 result = self.rpc.call_node_activate_master_ip(master_params.name,
3950 self.LogWarning("Could not re-enable the master ip on"
3951 " the master, please restart manually: %s",
3955 def _UploadHelper(lu, nodes, fname):
3956 """Helper for uploading a file and showing warnings.
3959 if os.path.exists(fname):
3960 result = lu.rpc.call_upload_file(nodes, fname)
3961 for to_node, to_result in result.items():
3962 msg = to_result.fail_msg
3964 msg = ("Copy of file %s to node %s failed: %s" %
3965 (fname, to_node, msg))
3966 lu.proc.LogWarning(msg)
3969 def _ComputeAncillaryFiles(cluster, redist):
3970 """Compute files external to Ganeti which need to be consistent.
3972 @type redist: boolean
3973 @param redist: Whether to include files which need to be redistributed
3976 # Compute files for all nodes
3978 constants.SSH_KNOWN_HOSTS_FILE,
3979 constants.CONFD_HMAC_KEY,
3980 constants.CLUSTER_DOMAIN_SECRET_FILE,
3981 constants.SPICE_CERT_FILE,
3982 constants.SPICE_CACERT_FILE,
3983 constants.RAPI_USERS_FILE,
3987 files_all.update(constants.ALL_CERT_FILES)
3988 files_all.update(ssconf.SimpleStore().GetFileList())
3990 # we need to ship at least the RAPI certificate
3991 files_all.add(constants.RAPI_CERT_FILE)
3993 if cluster.modify_etc_hosts:
3994 files_all.add(constants.ETC_HOSTS)
3996 # Files which are optional, these must:
3997 # - be present in one other category as well
3998 # - either exist or not exist on all nodes of that category (mc, vm all)
4000 constants.RAPI_USERS_FILE,
4003 # Files which should only be on master candidates
4007 files_mc.add(constants.CLUSTER_CONF_FILE)
4009 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4011 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4013 # Files which should only be on VM-capable nodes
4014 files_vm = set(filename
4015 for hv_name in cluster.enabled_hypervisors
4016 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4018 files_opt |= set(filename
4019 for hv_name in cluster.enabled_hypervisors
4020 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4022 # Filenames in each category must be unique
4023 all_files_set = files_all | files_mc | files_vm
4024 assert (len(all_files_set) ==
4025 sum(map(len, [files_all, files_mc, files_vm]))), \
4026 "Found file listed in more than one file list"
4028 # Optional files must be present in one other category
4029 assert all_files_set.issuperset(files_opt), \
4030 "Optional file not in a different required list"
4032 return (files_all, files_opt, files_mc, files_vm)
4035 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4036 """Distribute additional files which are part of the cluster configuration.
4038 ConfigWriter takes care of distributing the config and ssconf files, but
4039 there are more files which should be distributed to all nodes. This function
4040 makes sure those are copied.
4042 @param lu: calling logical unit
4043 @param additional_nodes: list of nodes not in the config to distribute to
4044 @type additional_vm: boolean
4045 @param additional_vm: whether the additional nodes are vm-capable or not
4048 # Gather target nodes
4049 cluster = lu.cfg.GetClusterInfo()
4050 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4052 online_nodes = lu.cfg.GetOnlineNodeList()
4053 vm_nodes = lu.cfg.GetVmCapableNodeList()
4055 if additional_nodes is not None:
4056 online_nodes.extend(additional_nodes)
4058 vm_nodes.extend(additional_nodes)
4060 # Never distribute to master node
4061 for nodelist in [online_nodes, vm_nodes]:
4062 if master_info.name in nodelist:
4063 nodelist.remove(master_info.name)
4066 (files_all, _, files_mc, files_vm) = \
4067 _ComputeAncillaryFiles(cluster, True)
4069 # Never re-distribute configuration file from here
4070 assert not (constants.CLUSTER_CONF_FILE in files_all or
4071 constants.CLUSTER_CONF_FILE in files_vm)
4072 assert not files_mc, "Master candidates not handled in this function"
4075 (online_nodes, files_all),
4076 (vm_nodes, files_vm),
4080 for (node_list, files) in filemap:
4082 _UploadHelper(lu, node_list, fname)
4085 class LUClusterRedistConf(NoHooksLU):
4086 """Force the redistribution of cluster configuration.
4088 This is a very simple LU.
4093 def ExpandNames(self):
4094 self.needed_locks = {
4095 locking.LEVEL_NODE: locking.ALL_SET,
4097 self.share_locks[locking.LEVEL_NODE] = 1
4099 def Exec(self, feedback_fn):
4100 """Redistribute the configuration.
4103 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4104 _RedistributeAncillaryFiles(self)
4107 class LUClusterActivateMasterIp(NoHooksLU):
4108 """Activate the master IP on the master node.
4111 def Exec(self, feedback_fn):
4112 """Activate the master IP.
4115 master_params = self.cfg.GetMasterNetworkParameters()
4116 ems = self.cfg.GetUseExternalMipScript()
4117 result = self.rpc.call_node_activate_master_ip(master_params.name,
4119 result.Raise("Could not activate the master IP")
4122 class LUClusterDeactivateMasterIp(NoHooksLU):
4123 """Deactivate the master IP on the master node.
4126 def Exec(self, feedback_fn):
4127 """Deactivate the master IP.
4130 master_params = self.cfg.GetMasterNetworkParameters()
4131 ems = self.cfg.GetUseExternalMipScript()
4132 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4134 result.Raise("Could not deactivate the master IP")
4137 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4138 """Sleep and poll for an instance's disk to sync.
4141 if not instance.disks or disks is not None and not disks:
4144 disks = _ExpandCheckDisks(instance, disks)
4147 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4149 node = instance.primary_node
4152 lu.cfg.SetDiskID(dev, node)
4154 # TODO: Convert to utils.Retry
4157 degr_retries = 10 # in seconds, as we sleep 1 second each time
4161 cumul_degraded = False
4162 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4163 msg = rstats.fail_msg
4165 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4168 raise errors.RemoteError("Can't contact node %s for mirror data,"
4169 " aborting." % node)
4172 rstats = rstats.payload
4174 for i, mstat in enumerate(rstats):
4176 lu.LogWarning("Can't compute data for node %s/%s",
4177 node, disks[i].iv_name)
4180 cumul_degraded = (cumul_degraded or
4181 (mstat.is_degraded and mstat.sync_percent is None))
4182 if mstat.sync_percent is not None:
4184 if mstat.estimated_time is not None:
4185 rem_time = ("%s remaining (estimated)" %
4186 utils.FormatSeconds(mstat.estimated_time))
4187 max_time = mstat.estimated_time
4189 rem_time = "no time estimate"
4190 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4191 (disks[i].iv_name, mstat.sync_percent, rem_time))
4193 # if we're done but degraded, let's do a few small retries, to
4194 # make sure we see a stable and not transient situation; therefore
4195 # we force restart of the loop
4196 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4197 logging.info("Degraded disks found, %d retries left", degr_retries)
4205 time.sleep(min(60, max_time))
4208 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4209 return not cumul_degraded
4212 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4213 """Check that mirrors are not degraded.
4215 The ldisk parameter, if True, will change the test from the
4216 is_degraded attribute (which represents overall non-ok status for
4217 the device(s)) to the ldisk (representing the local storage status).
4220 lu.cfg.SetDiskID(dev, node)
4224 if on_primary or dev.AssembleOnSecondary():
4225 rstats = lu.rpc.call_blockdev_find(node, dev)
4226 msg = rstats.fail_msg
4228 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4230 elif not rstats.payload:
4231 lu.LogWarning("Can't find disk on node %s", node)
4235 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4237 result = result and not rstats.payload.is_degraded
4240 for child in dev.children:
4241 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4246 class LUOobCommand(NoHooksLU):
4247 """Logical unit for OOB handling.
4251 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4253 def ExpandNames(self):
4254 """Gather locks we need.
4257 if self.op.node_names:
4258 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4259 lock_names = self.op.node_names
4261 lock_names = locking.ALL_SET
4263 self.needed_locks = {
4264 locking.LEVEL_NODE: lock_names,
4267 def CheckPrereq(self):
4268 """Check prerequisites.
4271 - the node exists in the configuration
4274 Any errors are signaled by raising errors.OpPrereqError.
4278 self.master_node = self.cfg.GetMasterNode()
4280 assert self.op.power_delay >= 0.0
4282 if self.op.node_names:
4283 if (self.op.command in self._SKIP_MASTER and
4284 self.master_node in self.op.node_names):
4285 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4286 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4288 if master_oob_handler:
4289 additional_text = ("run '%s %s %s' if you want to operate on the"
4290 " master regardless") % (master_oob_handler,
4294 additional_text = "it does not support out-of-band operations"
4296 raise errors.OpPrereqError(("Operating on the master node %s is not"
4297 " allowed for %s; %s") %
4298 (self.master_node, self.op.command,
4299 additional_text), errors.ECODE_INVAL)
4301 self.op.node_names = self.cfg.GetNodeList()
4302 if self.op.command in self._SKIP_MASTER:
4303 self.op.node_names.remove(self.master_node)
4305 if self.op.command in self._SKIP_MASTER:
4306 assert self.master_node not in self.op.node_names
4308 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4310 raise errors.OpPrereqError("Node %s not found" % node_name,
4313 self.nodes.append(node)
4315 if (not self.op.ignore_status and
4316 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4317 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4318 " not marked offline") % node_name,
4321 def Exec(self, feedback_fn):
4322 """Execute OOB and return result if we expect any.
4325 master_node = self.master_node
4328 for idx, node in enumerate(utils.NiceSort(self.nodes,
4329 key=lambda node: node.name)):
4330 node_entry = [(constants.RS_NORMAL, node.name)]
4331 ret.append(node_entry)
4333 oob_program = _SupportsOob(self.cfg, node)
4336 node_entry.append((constants.RS_UNAVAIL, None))
4339 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4340 self.op.command, oob_program, node.name)
4341 result = self.rpc.call_run_oob(master_node, oob_program,
4342 self.op.command, node.name,
4346 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4347 node.name, result.fail_msg)
4348 node_entry.append((constants.RS_NODATA, None))
4351 self._CheckPayload(result)
4352 except errors.OpExecError, err:
4353 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4355 node_entry.append((constants.RS_NODATA, None))
4357 if self.op.command == constants.OOB_HEALTH:
4358 # For health we should log important events
4359 for item, status in result.payload:
4360 if status in [constants.OOB_STATUS_WARNING,
4361 constants.OOB_STATUS_CRITICAL]:
4362 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4363 item, node.name, status)
4365 if self.op.command == constants.OOB_POWER_ON:
4367 elif self.op.command == constants.OOB_POWER_OFF:
4368 node.powered = False
4369 elif self.op.command == constants.OOB_POWER_STATUS:
4370 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4371 if powered != node.powered:
4372 logging.warning(("Recorded power state (%s) of node '%s' does not"
4373 " match actual power state (%s)"), node.powered,
4376 # For configuration changing commands we should update the node
4377 if self.op.command in (constants.OOB_POWER_ON,
4378 constants.OOB_POWER_OFF):
4379 self.cfg.Update(node, feedback_fn)
4381 node_entry.append((constants.RS_NORMAL, result.payload))
4383 if (self.op.command == constants.OOB_POWER_ON and
4384 idx < len(self.nodes) - 1):
4385 time.sleep(self.op.power_delay)
4389 def _CheckPayload(self, result):
4390 """Checks if the payload is valid.
4392 @param result: RPC result
4393 @raises errors.OpExecError: If payload is not valid
4397 if self.op.command == constants.OOB_HEALTH:
4398 if not isinstance(result.payload, list):
4399 errs.append("command 'health' is expected to return a list but got %s" %
4400 type(result.payload))
4402 for item, status in result.payload:
4403 if status not in constants.OOB_STATUSES:
4404 errs.append("health item '%s' has invalid status '%s'" %
4407 if self.op.command == constants.OOB_POWER_STATUS:
4408 if not isinstance(result.payload, dict):
4409 errs.append("power-status is expected to return a dict but got %s" %
4410 type(result.payload))
4412 if self.op.command in [
4413 constants.OOB_POWER_ON,
4414 constants.OOB_POWER_OFF,
4415 constants.OOB_POWER_CYCLE,
4417 if result.payload is not None:
4418 errs.append("%s is expected to not return payload but got '%s'" %
4419 (self.op.command, result.payload))
4422 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4423 utils.CommaJoin(errs))
4426 class _OsQuery(_QueryBase):
4427 FIELDS = query.OS_FIELDS
4429 def ExpandNames(self, lu):
4430 # Lock all nodes in shared mode
4431 # Temporary removal of locks, should be reverted later
4432 # TODO: reintroduce locks when they are lighter-weight
4433 lu.needed_locks = {}
4434 #self.share_locks[locking.LEVEL_NODE] = 1
4435 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4437 # The following variables interact with _QueryBase._GetNames
4439 self.wanted = self.names
4441 self.wanted = locking.ALL_SET
4443 self.do_locking = self.use_locking
4445 def DeclareLocks(self, lu, level):
4449 def _DiagnoseByOS(rlist):
4450 """Remaps a per-node return list into an a per-os per-node dictionary
4452 @param rlist: a map with node names as keys and OS objects as values
4455 @return: a dictionary with osnames as keys and as value another
4456 map, with nodes as keys and tuples of (path, status, diagnose,
4457 variants, parameters, api_versions) as values, eg::
4459 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4460 (/srv/..., False, "invalid api")],
4461 "node2": [(/srv/..., True, "", [], [])]}
4466 # we build here the list of nodes that didn't fail the RPC (at RPC
4467 # level), so that nodes with a non-responding node daemon don't
4468 # make all OSes invalid
4469 good_nodes = [node_name for node_name in rlist
4470 if not rlist[node_name].fail_msg]
4471 for node_name, nr in rlist.items():
4472 if nr.fail_msg or not nr.payload:
4474 for (name, path, status, diagnose, variants,
4475 params, api_versions) in nr.payload:
4476 if name not in all_os:
4477 # build a list of nodes for this os containing empty lists
4478 # for each node in node_list
4480 for nname in good_nodes:
4481 all_os[name][nname] = []
4482 # convert params from [name, help] to (name, help)
4483 params = [tuple(v) for v in params]
4484 all_os[name][node_name].append((path, status, diagnose,
4485 variants, params, api_versions))
4488 def _GetQueryData(self, lu):
4489 """Computes the list of nodes and their attributes.
4492 # Locking is not used
4493 assert not (compat.any(lu.glm.is_owned(level)
4494 for level in locking.LEVELS
4495 if level != locking.LEVEL_CLUSTER) or
4496 self.do_locking or self.use_locking)
4498 valid_nodes = [node.name
4499 for node in lu.cfg.GetAllNodesInfo().values()
4500 if not node.offline and node.vm_capable]
4501 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4502 cluster = lu.cfg.GetClusterInfo()
4506 for (os_name, os_data) in pol.items():
4507 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4508 hidden=(os_name in cluster.hidden_os),
4509 blacklisted=(os_name in cluster.blacklisted_os))
4513 api_versions = set()
4515 for idx, osl in enumerate(os_data.values()):
4516 info.valid = bool(info.valid and osl and osl[0][1])
4520 (node_variants, node_params, node_api) = osl[0][3:6]
4523 variants.update(node_variants)
4524 parameters.update(node_params)
4525 api_versions.update(node_api)
4527 # Filter out inconsistent values
4528 variants.intersection_update(node_variants)
4529 parameters.intersection_update(node_params)
4530 api_versions.intersection_update(node_api)
4532 info.variants = list(variants)
4533 info.parameters = list(parameters)
4534 info.api_versions = list(api_versions)
4536 data[os_name] = info
4538 # Prepare data in requested order
4539 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4543 class LUOsDiagnose(NoHooksLU):
4544 """Logical unit for OS diagnose/query.
4550 def _BuildFilter(fields, names):
4551 """Builds a filter for querying OSes.
4554 name_filter = qlang.MakeSimpleFilter("name", names)
4556 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4557 # respective field is not requested
4558 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4559 for fname in ["hidden", "blacklisted"]
4560 if fname not in fields]
4561 if "valid" not in fields:
4562 status_filter.append([qlang.OP_TRUE, "valid"])
4565 status_filter.insert(0, qlang.OP_AND)
4567 status_filter = None
4569 if name_filter and status_filter:
4570 return [qlang.OP_AND, name_filter, status_filter]
4574 return status_filter
4576 def CheckArguments(self):
4577 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4578 self.op.output_fields, False)
4580 def ExpandNames(self):
4581 self.oq.ExpandNames(self)
4583 def Exec(self, feedback_fn):
4584 return self.oq.OldStyleQuery(self)
4587 class LUNodeRemove(LogicalUnit):
4588 """Logical unit for removing a node.
4591 HPATH = "node-remove"
4592 HTYPE = constants.HTYPE_NODE
4594 def BuildHooksEnv(self):
4597 This doesn't run on the target node in the pre phase as a failed
4598 node would then be impossible to remove.
4602 "OP_TARGET": self.op.node_name,
4603 "NODE_NAME": self.op.node_name,
4606 def BuildHooksNodes(self):
4607 """Build hooks nodes.
4610 all_nodes = self.cfg.GetNodeList()
4612 all_nodes.remove(self.op.node_name)
4614 logging.warning("Node '%s', which is about to be removed, was not found"
4615 " in the list of all nodes", self.op.node_name)
4616 return (all_nodes, all_nodes)
4618 def CheckPrereq(self):
4619 """Check prerequisites.
4622 - the node exists in the configuration
4623 - it does not have primary or secondary instances
4624 - it's not the master
4626 Any errors are signaled by raising errors.OpPrereqError.
4629 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4630 node = self.cfg.GetNodeInfo(self.op.node_name)
4631 assert node is not None
4633 masternode = self.cfg.GetMasterNode()
4634 if node.name == masternode:
4635 raise errors.OpPrereqError("Node is the master node, failover to another"
4636 " node is required", errors.ECODE_INVAL)
4638 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4639 if node.name in instance.all_nodes:
4640 raise errors.OpPrereqError("Instance %s is still running on the node,"
4641 " please remove first" % instance_name,
4643 self.op.node_name = node.name
4646 def Exec(self, feedback_fn):
4647 """Removes the node from the cluster.
4651 logging.info("Stopping the node daemon and removing configs from node %s",
4654 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4656 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4659 # Promote nodes to master candidate as needed
4660 _AdjustCandidatePool(self, exceptions=[node.name])
4661 self.context.RemoveNode(node.name)
4663 # Run post hooks on the node before it's removed
4664 _RunPostHook(self, node.name)
4666 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4667 msg = result.fail_msg
4669 self.LogWarning("Errors encountered on the remote node while leaving"
4670 " the cluster: %s", msg)
4672 # Remove node from our /etc/hosts
4673 if self.cfg.GetClusterInfo().modify_etc_hosts:
4674 master_node = self.cfg.GetMasterNode()
4675 result = self.rpc.call_etc_hosts_modify(master_node,
4676 constants.ETC_HOSTS_REMOVE,
4678 result.Raise("Can't update hosts file with new host data")
4679 _RedistributeAncillaryFiles(self)
4682 class _NodeQuery(_QueryBase):
4683 FIELDS = query.NODE_FIELDS
4685 def ExpandNames(self, lu):
4686 lu.needed_locks = {}
4687 lu.share_locks = _ShareAll()
4690 self.wanted = _GetWantedNodes(lu, self.names)
4692 self.wanted = locking.ALL_SET
4694 self.do_locking = (self.use_locking and
4695 query.NQ_LIVE in self.requested_data)
4698 # If any non-static field is requested we need to lock the nodes
4699 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4701 def DeclareLocks(self, lu, level):
4704 def _GetQueryData(self, lu):
4705 """Computes the list of nodes and their attributes.
4708 all_info = lu.cfg.GetAllNodesInfo()
4710 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4712 # Gather data as requested
4713 if query.NQ_LIVE in self.requested_data:
4714 # filter out non-vm_capable nodes
4715 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4717 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4718 [lu.cfg.GetHypervisorType()])
4719 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4720 for (name, nresult) in node_data.items()
4721 if not nresult.fail_msg and nresult.payload)
4725 if query.NQ_INST in self.requested_data:
4726 node_to_primary = dict([(name, set()) for name in nodenames])
4727 node_to_secondary = dict([(name, set()) for name in nodenames])
4729 inst_data = lu.cfg.GetAllInstancesInfo()
4731 for inst in inst_data.values():
4732 if inst.primary_node in node_to_primary:
4733 node_to_primary[inst.primary_node].add(inst.name)
4734 for secnode in inst.secondary_nodes:
4735 if secnode in node_to_secondary:
4736 node_to_secondary[secnode].add(inst.name)
4738 node_to_primary = None
4739 node_to_secondary = None
4741 if query.NQ_OOB in self.requested_data:
4742 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4743 for name, node in all_info.iteritems())
4747 if query.NQ_GROUP in self.requested_data:
4748 groups = lu.cfg.GetAllNodeGroupsInfo()
4752 return query.NodeQueryData([all_info[name] for name in nodenames],
4753 live_data, lu.cfg.GetMasterNode(),
4754 node_to_primary, node_to_secondary, groups,
4755 oob_support, lu.cfg.GetClusterInfo())
4758 class LUNodeQuery(NoHooksLU):
4759 """Logical unit for querying nodes.
4762 # pylint: disable=W0142
4765 def CheckArguments(self):
4766 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4767 self.op.output_fields, self.op.use_locking)
4769 def ExpandNames(self):
4770 self.nq.ExpandNames(self)
4772 def DeclareLocks(self, level):
4773 self.nq.DeclareLocks(self, level)
4775 def Exec(self, feedback_fn):
4776 return self.nq.OldStyleQuery(self)
4779 class LUNodeQueryvols(NoHooksLU):
4780 """Logical unit for getting volumes on node(s).
4784 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4785 _FIELDS_STATIC = utils.FieldSet("node")
4787 def CheckArguments(self):
4788 _CheckOutputFields(static=self._FIELDS_STATIC,
4789 dynamic=self._FIELDS_DYNAMIC,
4790 selected=self.op.output_fields)
4792 def ExpandNames(self):
4793 self.share_locks = _ShareAll()
4794 self.needed_locks = {}
4796 if not self.op.nodes:
4797 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4799 self.needed_locks[locking.LEVEL_NODE] = \
4800 _GetWantedNodes(self, self.op.nodes)
4802 def Exec(self, feedback_fn):
4803 """Computes the list of nodes and their attributes.
4806 nodenames = self.owned_locks(locking.LEVEL_NODE)
4807 volumes = self.rpc.call_node_volumes(nodenames)
4809 ilist = self.cfg.GetAllInstancesInfo()
4810 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4813 for node in nodenames:
4814 nresult = volumes[node]
4817 msg = nresult.fail_msg
4819 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4822 node_vols = sorted(nresult.payload,
4823 key=operator.itemgetter("dev"))
4825 for vol in node_vols:
4827 for field in self.op.output_fields:
4830 elif field == "phys":
4834 elif field == "name":
4836 elif field == "size":
4837 val = int(float(vol["size"]))
4838 elif field == "instance":
4839 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4841 raise errors.ParameterError(field)
4842 node_output.append(str(val))
4844 output.append(node_output)
4849 class LUNodeQueryStorage(NoHooksLU):
4850 """Logical unit for getting information on storage units on node(s).
4853 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4856 def CheckArguments(self):
4857 _CheckOutputFields(static=self._FIELDS_STATIC,
4858 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4859 selected=self.op.output_fields)
4861 def ExpandNames(self):
4862 self.share_locks = _ShareAll()
4863 self.needed_locks = {}
4866 self.needed_locks[locking.LEVEL_NODE] = \
4867 _GetWantedNodes(self, self.op.nodes)
4869 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4871 def Exec(self, feedback_fn):
4872 """Computes the list of nodes and their attributes.
4875 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4877 # Always get name to sort by
4878 if constants.SF_NAME in self.op.output_fields:
4879 fields = self.op.output_fields[:]
4881 fields = [constants.SF_NAME] + self.op.output_fields
4883 # Never ask for node or type as it's only known to the LU
4884 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4885 while extra in fields:
4886 fields.remove(extra)
4888 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4889 name_idx = field_idx[constants.SF_NAME]
4891 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4892 data = self.rpc.call_storage_list(self.nodes,
4893 self.op.storage_type, st_args,
4894 self.op.name, fields)
4898 for node in utils.NiceSort(self.nodes):
4899 nresult = data[node]
4903 msg = nresult.fail_msg
4905 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4908 rows = dict([(row[name_idx], row) for row in nresult.payload])
4910 for name in utils.NiceSort(rows.keys()):
4915 for field in self.op.output_fields:
4916 if field == constants.SF_NODE:
4918 elif field == constants.SF_TYPE:
4919 val = self.op.storage_type
4920 elif field in field_idx:
4921 val = row[field_idx[field]]
4923 raise errors.ParameterError(field)
4932 class _InstanceQuery(_QueryBase):
4933 FIELDS = query.INSTANCE_FIELDS
4935 def ExpandNames(self, lu):
4936 lu.needed_locks = {}
4937 lu.share_locks = _ShareAll()
4940 self.wanted = _GetWantedInstances(lu, self.names)
4942 self.wanted = locking.ALL_SET
4944 self.do_locking = (self.use_locking and
4945 query.IQ_LIVE in self.requested_data)
4947 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4948 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4949 lu.needed_locks[locking.LEVEL_NODE] = []
4950 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4952 self.do_grouplocks = (self.do_locking and
4953 query.IQ_NODES in self.requested_data)
4955 def DeclareLocks(self, lu, level):
4957 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4958 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4960 # Lock all groups used by instances optimistically; this requires going
4961 # via the node before it's locked, requiring verification later on
4962 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4964 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4965 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4966 elif level == locking.LEVEL_NODE:
4967 lu._LockInstancesNodes() # pylint: disable=W0212
4970 def _CheckGroupLocks(lu):
4971 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4972 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4974 # Check if node groups for locked instances are still correct
4975 for instance_name in owned_instances:
4976 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4978 def _GetQueryData(self, lu):
4979 """Computes the list of instances and their attributes.
4982 if self.do_grouplocks:
4983 self._CheckGroupLocks(lu)
4985 cluster = lu.cfg.GetClusterInfo()
4986 all_info = lu.cfg.GetAllInstancesInfo()
4988 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4990 instance_list = [all_info[name] for name in instance_names]
4991 nodes = frozenset(itertools.chain(*(inst.all_nodes
4992 for inst in instance_list)))
4993 hv_list = list(set([inst.hypervisor for inst in instance_list]))
4996 wrongnode_inst = set()
4998 # Gather data as requested
4999 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5001 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5003 result = node_data[name]
5005 # offline nodes will be in both lists
5006 assert result.fail_msg
5007 offline_nodes.append(name)
5009 bad_nodes.append(name)
5010 elif result.payload:
5011 for inst in result.payload:
5012 if inst in all_info:
5013 if all_info[inst].primary_node == name:
5014 live_data.update(result.payload)
5016 wrongnode_inst.add(inst)
5018 # orphan instance; we don't list it here as we don't
5019 # handle this case yet in the output of instance listing
5020 logging.warning("Orphan instance '%s' found on node %s",
5022 # else no instance is alive
5026 if query.IQ_DISKUSAGE in self.requested_data:
5027 disk_usage = dict((inst.name,
5028 _ComputeDiskSize(inst.disk_template,
5029 [{constants.IDISK_SIZE: disk.size}
5030 for disk in inst.disks]))
5031 for inst in instance_list)
5035 if query.IQ_CONSOLE in self.requested_data:
5037 for inst in instance_list:
5038 if inst.name in live_data:
5039 # Instance is running
5040 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5042 consinfo[inst.name] = None
5043 assert set(consinfo.keys()) == set(instance_names)
5047 if query.IQ_NODES in self.requested_data:
5048 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5050 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5051 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5052 for uuid in set(map(operator.attrgetter("group"),
5058 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5059 disk_usage, offline_nodes, bad_nodes,
5060 live_data, wrongnode_inst, consinfo,
5064 class LUQuery(NoHooksLU):
5065 """Query for resources/items of a certain kind.
5068 # pylint: disable=W0142
5071 def CheckArguments(self):
5072 qcls = _GetQueryImplementation(self.op.what)
5074 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5076 def ExpandNames(self):
5077 self.impl.ExpandNames(self)
5079 def DeclareLocks(self, level):
5080 self.impl.DeclareLocks(self, level)
5082 def Exec(self, feedback_fn):
5083 return self.impl.NewStyleQuery(self)
5086 class LUQueryFields(NoHooksLU):
5087 """Query for resources/items of a certain kind.
5090 # pylint: disable=W0142
5093 def CheckArguments(self):
5094 self.qcls = _GetQueryImplementation(self.op.what)
5096 def ExpandNames(self):
5097 self.needed_locks = {}
5099 def Exec(self, feedback_fn):
5100 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5103 class LUNodeModifyStorage(NoHooksLU):
5104 """Logical unit for modifying a storage volume on a node.
5109 def CheckArguments(self):
5110 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5112 storage_type = self.op.storage_type
5115 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5117 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5118 " modified" % storage_type,
5121 diff = set(self.op.changes.keys()) - modifiable
5123 raise errors.OpPrereqError("The following fields can not be modified for"
5124 " storage units of type '%s': %r" %
5125 (storage_type, list(diff)),
5128 def ExpandNames(self):
5129 self.needed_locks = {
5130 locking.LEVEL_NODE: self.op.node_name,
5133 def Exec(self, feedback_fn):
5134 """Computes the list of nodes and their attributes.
5137 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5138 result = self.rpc.call_storage_modify(self.op.node_name,
5139 self.op.storage_type, st_args,
5140 self.op.name, self.op.changes)
5141 result.Raise("Failed to modify storage unit '%s' on %s" %
5142 (self.op.name, self.op.node_name))
5145 class LUNodeAdd(LogicalUnit):
5146 """Logical unit for adding node to the cluster.
5150 HTYPE = constants.HTYPE_NODE
5151 _NFLAGS = ["master_capable", "vm_capable"]
5153 def CheckArguments(self):
5154 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5155 # validate/normalize the node name
5156 self.hostname = netutils.GetHostname(name=self.op.node_name,
5157 family=self.primary_ip_family)
5158 self.op.node_name = self.hostname.name
5160 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5161 raise errors.OpPrereqError("Cannot readd the master node",
5164 if self.op.readd and self.op.group:
5165 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5166 " being readded", errors.ECODE_INVAL)
5168 def BuildHooksEnv(self):
5171 This will run on all nodes before, and on all nodes + the new node after.
5175 "OP_TARGET": self.op.node_name,
5176 "NODE_NAME": self.op.node_name,
5177 "NODE_PIP": self.op.primary_ip,
5178 "NODE_SIP": self.op.secondary_ip,
5179 "MASTER_CAPABLE": str(self.op.master_capable),
5180 "VM_CAPABLE": str(self.op.vm_capable),
5183 def BuildHooksNodes(self):
5184 """Build hooks nodes.
5187 # Exclude added node
5188 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5189 post_nodes = pre_nodes + [self.op.node_name, ]
5191 return (pre_nodes, post_nodes)
5193 def CheckPrereq(self):
5194 """Check prerequisites.
5197 - the new node is not already in the config
5199 - its parameters (single/dual homed) matches the cluster
5201 Any errors are signaled by raising errors.OpPrereqError.
5205 hostname = self.hostname
5206 node = hostname.name
5207 primary_ip = self.op.primary_ip = hostname.ip
5208 if self.op.secondary_ip is None:
5209 if self.primary_ip_family == netutils.IP6Address.family:
5210 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5211 " IPv4 address must be given as secondary",
5213 self.op.secondary_ip = primary_ip
5215 secondary_ip = self.op.secondary_ip
5216 if not netutils.IP4Address.IsValid(secondary_ip):
5217 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5218 " address" % secondary_ip, errors.ECODE_INVAL)
5220 node_list = cfg.GetNodeList()
5221 if not self.op.readd and node in node_list:
5222 raise errors.OpPrereqError("Node %s is already in the configuration" %
5223 node, errors.ECODE_EXISTS)
5224 elif self.op.readd and node not in node_list:
5225 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5228 self.changed_primary_ip = False
5230 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5231 if self.op.readd and node == existing_node_name:
5232 if existing_node.secondary_ip != secondary_ip:
5233 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5234 " address configuration as before",
5236 if existing_node.primary_ip != primary_ip:
5237 self.changed_primary_ip = True
5241 if (existing_node.primary_ip == primary_ip or
5242 existing_node.secondary_ip == primary_ip or
5243 existing_node.primary_ip == secondary_ip or
5244 existing_node.secondary_ip == secondary_ip):
5245 raise errors.OpPrereqError("New node ip address(es) conflict with"
5246 " existing node %s" % existing_node.name,
5247 errors.ECODE_NOTUNIQUE)
5249 # After this 'if' block, None is no longer a valid value for the
5250 # _capable op attributes
5252 old_node = self.cfg.GetNodeInfo(node)
5253 assert old_node is not None, "Can't retrieve locked node %s" % node
5254 for attr in self._NFLAGS:
5255 if getattr(self.op, attr) is None:
5256 setattr(self.op, attr, getattr(old_node, attr))
5258 for attr in self._NFLAGS:
5259 if getattr(self.op, attr) is None:
5260 setattr(self.op, attr, True)
5262 if self.op.readd and not self.op.vm_capable:
5263 pri, sec = cfg.GetNodeInstances(node)
5265 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5266 " flag set to false, but it already holds"
5267 " instances" % node,
5270 # check that the type of the node (single versus dual homed) is the
5271 # same as for the master
5272 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5273 master_singlehomed = myself.secondary_ip == myself.primary_ip
5274 newbie_singlehomed = secondary_ip == primary_ip
5275 if master_singlehomed != newbie_singlehomed:
5276 if master_singlehomed:
5277 raise errors.OpPrereqError("The master has no secondary ip but the"
5278 " new node has one",
5281 raise errors.OpPrereqError("The master has a secondary ip but the"
5282 " new node doesn't have one",
5285 # checks reachability
5286 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5287 raise errors.OpPrereqError("Node not reachable by ping",
5288 errors.ECODE_ENVIRON)
5290 if not newbie_singlehomed:
5291 # check reachability from my secondary ip to newbie's secondary ip
5292 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5293 source=myself.secondary_ip):
5294 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5295 " based ping to node daemon port",
5296 errors.ECODE_ENVIRON)
5303 if self.op.master_capable:
5304 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5306 self.master_candidate = False
5309 self.new_node = old_node
5311 node_group = cfg.LookupNodeGroup(self.op.group)
5312 self.new_node = objects.Node(name=node,
5313 primary_ip=primary_ip,
5314 secondary_ip=secondary_ip,
5315 master_candidate=self.master_candidate,
5316 offline=False, drained=False,
5319 if self.op.ndparams:
5320 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5322 def Exec(self, feedback_fn):
5323 """Adds the new node to the cluster.
5326 new_node = self.new_node
5327 node = new_node.name
5329 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5332 # We adding a new node so we assume it's powered
5333 new_node.powered = True
5335 # for re-adds, reset the offline/drained/master-candidate flags;
5336 # we need to reset here, otherwise offline would prevent RPC calls
5337 # later in the procedure; this also means that if the re-add
5338 # fails, we are left with a non-offlined, broken node
5340 new_node.drained = new_node.offline = False # pylint: disable=W0201
5341 self.LogInfo("Readding a node, the offline/drained flags were reset")
5342 # if we demote the node, we do cleanup later in the procedure
5343 new_node.master_candidate = self.master_candidate
5344 if self.changed_primary_ip:
5345 new_node.primary_ip = self.op.primary_ip
5347 # copy the master/vm_capable flags
5348 for attr in self._NFLAGS:
5349 setattr(new_node, attr, getattr(self.op, attr))
5351 # notify the user about any possible mc promotion
5352 if new_node.master_candidate:
5353 self.LogInfo("Node will be a master candidate")
5355 if self.op.ndparams:
5356 new_node.ndparams = self.op.ndparams
5358 new_node.ndparams = {}
5360 # check connectivity
5361 result = self.rpc.call_version([node])[node]
5362 result.Raise("Can't get version information from node %s" % node)
5363 if constants.PROTOCOL_VERSION == result.payload:
5364 logging.info("Communication to node %s fine, sw version %s match",
5365 node, result.payload)
5367 raise errors.OpExecError("Version mismatch master version %s,"
5368 " node version %s" %
5369 (constants.PROTOCOL_VERSION, result.payload))
5371 # Add node to our /etc/hosts, and add key to known_hosts
5372 if self.cfg.GetClusterInfo().modify_etc_hosts:
5373 master_node = self.cfg.GetMasterNode()
5374 result = self.rpc.call_etc_hosts_modify(master_node,
5375 constants.ETC_HOSTS_ADD,
5378 result.Raise("Can't update hosts file with new host data")
5380 if new_node.secondary_ip != new_node.primary_ip:
5381 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5384 node_verify_list = [self.cfg.GetMasterNode()]
5385 node_verify_param = {
5386 constants.NV_NODELIST: ([node], {}),
5387 # TODO: do a node-net-test as well?
5390 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5391 self.cfg.GetClusterName())
5392 for verifier in node_verify_list:
5393 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5394 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5396 for failed in nl_payload:
5397 feedback_fn("ssh/hostname verification failed"
5398 " (checking from %s): %s" %
5399 (verifier, nl_payload[failed]))
5400 raise errors.OpExecError("ssh/hostname verification failed")
5403 _RedistributeAncillaryFiles(self)
5404 self.context.ReaddNode(new_node)
5405 # make sure we redistribute the config
5406 self.cfg.Update(new_node, feedback_fn)
5407 # and make sure the new node will not have old files around
5408 if not new_node.master_candidate:
5409 result = self.rpc.call_node_demote_from_mc(new_node.name)
5410 msg = result.fail_msg
5412 self.LogWarning("Node failed to demote itself from master"
5413 " candidate status: %s" % msg)
5415 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5416 additional_vm=self.op.vm_capable)
5417 self.context.AddNode(new_node, self.proc.GetECId())
5420 class LUNodeSetParams(LogicalUnit):
5421 """Modifies the parameters of a node.
5423 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5424 to the node role (as _ROLE_*)
5425 @cvar _R2F: a dictionary from node role to tuples of flags
5426 @cvar _FLAGS: a list of attribute names corresponding to the flags
5429 HPATH = "node-modify"
5430 HTYPE = constants.HTYPE_NODE
5432 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5434 (True, False, False): _ROLE_CANDIDATE,
5435 (False, True, False): _ROLE_DRAINED,
5436 (False, False, True): _ROLE_OFFLINE,
5437 (False, False, False): _ROLE_REGULAR,
5439 _R2F = dict((v, k) for k, v in _F2R.items())
5440 _FLAGS = ["master_candidate", "drained", "offline"]
5442 def CheckArguments(self):
5443 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5444 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5445 self.op.master_capable, self.op.vm_capable,
5446 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5448 if all_mods.count(None) == len(all_mods):
5449 raise errors.OpPrereqError("Please pass at least one modification",
5451 if all_mods.count(True) > 1:
5452 raise errors.OpPrereqError("Can't set the node into more than one"
5453 " state at the same time",
5456 # Boolean value that tells us whether we might be demoting from MC
5457 self.might_demote = (self.op.master_candidate == False or
5458 self.op.offline == True or
5459 self.op.drained == True or
5460 self.op.master_capable == False)
5462 if self.op.secondary_ip:
5463 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5464 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5465 " address" % self.op.secondary_ip,
5468 self.lock_all = self.op.auto_promote and self.might_demote
5469 self.lock_instances = self.op.secondary_ip is not None
5471 def _InstanceFilter(self, instance):
5472 """Filter for getting affected instances.
5475 return (instance.disk_template in constants.DTS_INT_MIRROR and
5476 self.op.node_name in instance.all_nodes)
5478 def ExpandNames(self):
5480 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5482 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5484 # Since modifying a node can have severe effects on currently running
5485 # operations the resource lock is at least acquired in shared mode
5486 self.needed_locks[locking.LEVEL_NODE_RES] = \
5487 self.needed_locks[locking.LEVEL_NODE]
5489 # Get node resource and instance locks in shared mode; they are not used
5490 # for anything but read-only access
5491 self.share_locks[locking.LEVEL_NODE_RES] = 1
5492 self.share_locks[locking.LEVEL_INSTANCE] = 1
5494 if self.lock_instances:
5495 self.needed_locks[locking.LEVEL_INSTANCE] = \
5496 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5498 def BuildHooksEnv(self):
5501 This runs on the master node.
5505 "OP_TARGET": self.op.node_name,
5506 "MASTER_CANDIDATE": str(self.op.master_candidate),
5507 "OFFLINE": str(self.op.offline),
5508 "DRAINED": str(self.op.drained),
5509 "MASTER_CAPABLE": str(self.op.master_capable),
5510 "VM_CAPABLE": str(self.op.vm_capable),
5513 def BuildHooksNodes(self):
5514 """Build hooks nodes.
5517 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5520 def CheckPrereq(self):
5521 """Check prerequisites.
5523 This only checks the instance list against the existing names.
5526 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5528 if self.lock_instances:
5529 affected_instances = \
5530 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5532 # Verify instance locks
5533 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5534 wanted_instances = frozenset(affected_instances.keys())
5535 if wanted_instances - owned_instances:
5536 raise errors.OpPrereqError("Instances affected by changing node %s's"
5537 " secondary IP address have changed since"
5538 " locks were acquired, wanted '%s', have"
5539 " '%s'; retry the operation" %
5541 utils.CommaJoin(wanted_instances),
5542 utils.CommaJoin(owned_instances)),
5545 affected_instances = None
5547 if (self.op.master_candidate is not None or
5548 self.op.drained is not None or
5549 self.op.offline is not None):
5550 # we can't change the master's node flags
5551 if self.op.node_name == self.cfg.GetMasterNode():
5552 raise errors.OpPrereqError("The master role can be changed"
5553 " only via master-failover",
5556 if self.op.master_candidate and not node.master_capable:
5557 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5558 " it a master candidate" % node.name,
5561 if self.op.vm_capable == False:
5562 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5564 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5565 " the vm_capable flag" % node.name,
5568 if node.master_candidate and self.might_demote and not self.lock_all:
5569 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5570 # check if after removing the current node, we're missing master
5572 (mc_remaining, mc_should, _) = \
5573 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5574 if mc_remaining < mc_should:
5575 raise errors.OpPrereqError("Not enough master candidates, please"
5576 " pass auto promote option to allow"
5577 " promotion", errors.ECODE_STATE)
5579 self.old_flags = old_flags = (node.master_candidate,
5580 node.drained, node.offline)
5581 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5582 self.old_role = old_role = self._F2R[old_flags]
5584 # Check for ineffective changes
5585 for attr in self._FLAGS:
5586 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5587 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5588 setattr(self.op, attr, None)
5590 # Past this point, any flag change to False means a transition
5591 # away from the respective state, as only real changes are kept
5593 # TODO: We might query the real power state if it supports OOB
5594 if _SupportsOob(self.cfg, node):
5595 if self.op.offline is False and not (node.powered or
5596 self.op.powered == True):
5597 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5598 " offline status can be reset") %
5600 elif self.op.powered is not None:
5601 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5602 " as it does not support out-of-band"
5603 " handling") % self.op.node_name)
5605 # If we're being deofflined/drained, we'll MC ourself if needed
5606 if (self.op.drained == False or self.op.offline == False or
5607 (self.op.master_capable and not node.master_capable)):
5608 if _DecideSelfPromotion(self):
5609 self.op.master_candidate = True
5610 self.LogInfo("Auto-promoting node to master candidate")
5612 # If we're no longer master capable, we'll demote ourselves from MC
5613 if self.op.master_capable == False and node.master_candidate:
5614 self.LogInfo("Demoting from master candidate")
5615 self.op.master_candidate = False
5618 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5619 if self.op.master_candidate:
5620 new_role = self._ROLE_CANDIDATE
5621 elif self.op.drained:
5622 new_role = self._ROLE_DRAINED
5623 elif self.op.offline:
5624 new_role = self._ROLE_OFFLINE
5625 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5626 # False is still in new flags, which means we're un-setting (the
5628 new_role = self._ROLE_REGULAR
5629 else: # no new flags, nothing, keep old role
5632 self.new_role = new_role
5634 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5635 # Trying to transition out of offline status
5636 # TODO: Use standard RPC runner, but make sure it works when the node is
5637 # still marked offline
5638 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5640 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5641 " to report its version: %s" %
5642 (node.name, result.fail_msg),
5645 self.LogWarning("Transitioning node from offline to online state"
5646 " without using re-add. Please make sure the node"
5649 if self.op.secondary_ip:
5650 # Ok even without locking, because this can't be changed by any LU
5651 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5652 master_singlehomed = master.secondary_ip == master.primary_ip
5653 if master_singlehomed and self.op.secondary_ip:
5654 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5655 " homed cluster", errors.ECODE_INVAL)
5657 assert not (frozenset(affected_instances) -
5658 self.owned_locks(locking.LEVEL_INSTANCE))
5661 if affected_instances:
5662 raise errors.OpPrereqError("Cannot change secondary IP address:"
5663 " offline node has instances (%s)"
5664 " configured to use it" %
5665 utils.CommaJoin(affected_instances.keys()))
5667 # On online nodes, check that no instances are running, and that
5668 # the node has the new ip and we can reach it.
5669 for instance in affected_instances.values():
5670 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5671 msg="cannot change secondary ip")
5673 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5674 if master.name != node.name:
5675 # check reachability from master secondary ip to new secondary ip
5676 if not netutils.TcpPing(self.op.secondary_ip,
5677 constants.DEFAULT_NODED_PORT,
5678 source=master.secondary_ip):
5679 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5680 " based ping to node daemon port",
5681 errors.ECODE_ENVIRON)
5683 if self.op.ndparams:
5684 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5685 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5686 self.new_ndparams = new_ndparams
5688 if self.op.hv_state:
5689 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5690 self.node.hv_state_static)
5692 if self.op.disk_state:
5693 self.new_disk_state = \
5694 _MergeAndVerifyDiskState(self.op.disk_state,
5695 self.node.disk_state_static)
5697 def Exec(self, feedback_fn):
5702 old_role = self.old_role
5703 new_role = self.new_role
5707 if self.op.ndparams:
5708 node.ndparams = self.new_ndparams
5710 if self.op.powered is not None:
5711 node.powered = self.op.powered
5713 if self.op.hv_state:
5714 node.hv_state_static = self.new_hv_state
5716 if self.op.disk_state:
5717 node.disk_state_static = self.new_disk_state
5719 for attr in ["master_capable", "vm_capable"]:
5720 val = getattr(self.op, attr)
5722 setattr(node, attr, val)
5723 result.append((attr, str(val)))
5725 if new_role != old_role:
5726 # Tell the node to demote itself, if no longer MC and not offline
5727 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5728 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5730 self.LogWarning("Node failed to demote itself: %s", msg)
5732 new_flags = self._R2F[new_role]
5733 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5735 result.append((desc, str(nf)))
5736 (node.master_candidate, node.drained, node.offline) = new_flags
5738 # we locked all nodes, we adjust the CP before updating this node
5740 _AdjustCandidatePool(self, [node.name])
5742 if self.op.secondary_ip:
5743 node.secondary_ip = self.op.secondary_ip
5744 result.append(("secondary_ip", self.op.secondary_ip))
5746 # this will trigger configuration file update, if needed
5747 self.cfg.Update(node, feedback_fn)
5749 # this will trigger job queue propagation or cleanup if the mc
5751 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5752 self.context.ReaddNode(node)
5757 class LUNodePowercycle(NoHooksLU):
5758 """Powercycles a node.
5763 def CheckArguments(self):
5764 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5765 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5766 raise errors.OpPrereqError("The node is the master and the force"
5767 " parameter was not set",
5770 def ExpandNames(self):
5771 """Locking for PowercycleNode.
5773 This is a last-resort option and shouldn't block on other
5774 jobs. Therefore, we grab no locks.
5777 self.needed_locks = {}
5779 def Exec(self, feedback_fn):
5783 result = self.rpc.call_node_powercycle(self.op.node_name,
5784 self.cfg.GetHypervisorType())
5785 result.Raise("Failed to schedule the reboot")
5786 return result.payload
5789 class LUClusterQuery(NoHooksLU):
5790 """Query cluster configuration.
5795 def ExpandNames(self):
5796 self.needed_locks = {}
5798 def Exec(self, feedback_fn):
5799 """Return cluster config.
5802 cluster = self.cfg.GetClusterInfo()
5805 # Filter just for enabled hypervisors
5806 for os_name, hv_dict in cluster.os_hvp.items():
5807 os_hvp[os_name] = {}
5808 for hv_name, hv_params in hv_dict.items():
5809 if hv_name in cluster.enabled_hypervisors:
5810 os_hvp[os_name][hv_name] = hv_params
5812 # Convert ip_family to ip_version
5813 primary_ip_version = constants.IP4_VERSION
5814 if cluster.primary_ip_family == netutils.IP6Address.family:
5815 primary_ip_version = constants.IP6_VERSION
5818 "software_version": constants.RELEASE_VERSION,
5819 "protocol_version": constants.PROTOCOL_VERSION,
5820 "config_version": constants.CONFIG_VERSION,
5821 "os_api_version": max(constants.OS_API_VERSIONS),
5822 "export_version": constants.EXPORT_VERSION,
5823 "architecture": (platform.architecture()[0], platform.machine()),
5824 "name": cluster.cluster_name,
5825 "master": cluster.master_node,
5826 "default_hypervisor": cluster.primary_hypervisor,
5827 "enabled_hypervisors": cluster.enabled_hypervisors,
5828 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5829 for hypervisor_name in cluster.enabled_hypervisors]),
5831 "beparams": cluster.beparams,
5832 "osparams": cluster.osparams,
5833 "nicparams": cluster.nicparams,
5834 "ndparams": cluster.ndparams,
5835 "candidate_pool_size": cluster.candidate_pool_size,
5836 "master_netdev": cluster.master_netdev,
5837 "master_netmask": cluster.master_netmask,
5838 "use_external_mip_script": cluster.use_external_mip_script,
5839 "volume_group_name": cluster.volume_group_name,
5840 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5841 "file_storage_dir": cluster.file_storage_dir,
5842 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5843 "maintain_node_health": cluster.maintain_node_health,
5844 "ctime": cluster.ctime,
5845 "mtime": cluster.mtime,
5846 "uuid": cluster.uuid,
5847 "tags": list(cluster.GetTags()),
5848 "uid_pool": cluster.uid_pool,
5849 "default_iallocator": cluster.default_iallocator,
5850 "reserved_lvs": cluster.reserved_lvs,
5851 "primary_ip_version": primary_ip_version,
5852 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5853 "hidden_os": cluster.hidden_os,
5854 "blacklisted_os": cluster.blacklisted_os,
5860 class LUClusterConfigQuery(NoHooksLU):
5861 """Return configuration values.
5865 _FIELDS_DYNAMIC = utils.FieldSet()
5866 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5867 "watcher_pause", "volume_group_name")
5869 def CheckArguments(self):
5870 _CheckOutputFields(static=self._FIELDS_STATIC,
5871 dynamic=self._FIELDS_DYNAMIC,
5872 selected=self.op.output_fields)
5874 def ExpandNames(self):
5875 self.needed_locks = {}
5877 def Exec(self, feedback_fn):
5878 """Dump a representation of the cluster config to the standard output.
5882 for field in self.op.output_fields:
5883 if field == "cluster_name":
5884 entry = self.cfg.GetClusterName()
5885 elif field == "master_node":
5886 entry = self.cfg.GetMasterNode()
5887 elif field == "drain_flag":
5888 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5889 elif field == "watcher_pause":
5890 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5891 elif field == "volume_group_name":
5892 entry = self.cfg.GetVGName()
5894 raise errors.ParameterError(field)
5895 values.append(entry)
5899 class LUInstanceActivateDisks(NoHooksLU):
5900 """Bring up an instance's disks.
5905 def ExpandNames(self):
5906 self._ExpandAndLockInstance()
5907 self.needed_locks[locking.LEVEL_NODE] = []
5908 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5910 def DeclareLocks(self, level):
5911 if level == locking.LEVEL_NODE:
5912 self._LockInstancesNodes()
5914 def CheckPrereq(self):
5915 """Check prerequisites.
5917 This checks that the instance is in the cluster.
5920 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5921 assert self.instance is not None, \
5922 "Cannot retrieve locked instance %s" % self.op.instance_name
5923 _CheckNodeOnline(self, self.instance.primary_node)
5925 def Exec(self, feedback_fn):
5926 """Activate the disks.
5929 disks_ok, disks_info = \
5930 _AssembleInstanceDisks(self, self.instance,
5931 ignore_size=self.op.ignore_size)
5933 raise errors.OpExecError("Cannot activate block devices")
5938 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5940 """Prepare the block devices for an instance.
5942 This sets up the block devices on all nodes.
5944 @type lu: L{LogicalUnit}
5945 @param lu: the logical unit on whose behalf we execute
5946 @type instance: L{objects.Instance}
5947 @param instance: the instance for whose disks we assemble
5948 @type disks: list of L{objects.Disk} or None
5949 @param disks: which disks to assemble (or all, if None)
5950 @type ignore_secondaries: boolean
5951 @param ignore_secondaries: if true, errors on secondary nodes
5952 won't result in an error return from the function
5953 @type ignore_size: boolean
5954 @param ignore_size: if true, the current known size of the disk
5955 will not be used during the disk activation, useful for cases
5956 when the size is wrong
5957 @return: False if the operation failed, otherwise a list of
5958 (host, instance_visible_name, node_visible_name)
5959 with the mapping from node devices to instance devices
5964 iname = instance.name
5965 disks = _ExpandCheckDisks(instance, disks)
5967 # With the two passes mechanism we try to reduce the window of
5968 # opportunity for the race condition of switching DRBD to primary
5969 # before handshaking occured, but we do not eliminate it
5971 # The proper fix would be to wait (with some limits) until the
5972 # connection has been made and drbd transitions from WFConnection
5973 # into any other network-connected state (Connected, SyncTarget,
5976 # 1st pass, assemble on all nodes in secondary mode
5977 for idx, inst_disk in enumerate(disks):
5978 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5980 node_disk = node_disk.Copy()
5981 node_disk.UnsetSize()
5982 lu.cfg.SetDiskID(node_disk, node)
5983 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5984 msg = result.fail_msg
5986 lu.proc.LogWarning("Could not prepare block device %s on node %s"
5987 " (is_primary=False, pass=1): %s",
5988 inst_disk.iv_name, node, msg)
5989 if not ignore_secondaries:
5992 # FIXME: race condition on drbd migration to primary
5994 # 2nd pass, do only the primary node
5995 for idx, inst_disk in enumerate(disks):
5998 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5999 if node != instance.primary_node:
6002 node_disk = node_disk.Copy()
6003 node_disk.UnsetSize()
6004 lu.cfg.SetDiskID(node_disk, node)
6005 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6006 msg = result.fail_msg
6008 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6009 " (is_primary=True, pass=2): %s",
6010 inst_disk.iv_name, node, msg)
6013 dev_path = result.payload
6015 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6017 # leave the disks configured for the primary node
6018 # this is a workaround that would be fixed better by
6019 # improving the logical/physical id handling
6021 lu.cfg.SetDiskID(disk, instance.primary_node)
6023 return disks_ok, device_info
6026 def _StartInstanceDisks(lu, instance, force):
6027 """Start the disks of an instance.
6030 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6031 ignore_secondaries=force)
6033 _ShutdownInstanceDisks(lu, instance)
6034 if force is not None and not force:
6035 lu.proc.LogWarning("", hint="If the message above refers to a"
6037 " you can retry the operation using '--force'.")
6038 raise errors.OpExecError("Disk consistency error")
6041 class LUInstanceDeactivateDisks(NoHooksLU):
6042 """Shutdown an instance's disks.
6047 def ExpandNames(self):
6048 self._ExpandAndLockInstance()
6049 self.needed_locks[locking.LEVEL_NODE] = []
6050 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6052 def DeclareLocks(self, level):
6053 if level == locking.LEVEL_NODE:
6054 self._LockInstancesNodes()
6056 def CheckPrereq(self):
6057 """Check prerequisites.
6059 This checks that the instance is in the cluster.
6062 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6063 assert self.instance is not None, \
6064 "Cannot retrieve locked instance %s" % self.op.instance_name
6066 def Exec(self, feedback_fn):
6067 """Deactivate the disks
6070 instance = self.instance
6072 _ShutdownInstanceDisks(self, instance)
6074 _SafeShutdownInstanceDisks(self, instance)
6077 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6078 """Shutdown block devices of an instance.
6080 This function checks if an instance is running, before calling
6081 _ShutdownInstanceDisks.
6084 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6085 _ShutdownInstanceDisks(lu, instance, disks=disks)
6088 def _ExpandCheckDisks(instance, disks):
6089 """Return the instance disks selected by the disks list
6091 @type disks: list of L{objects.Disk} or None
6092 @param disks: selected disks
6093 @rtype: list of L{objects.Disk}
6094 @return: selected instance disks to act on
6098 return instance.disks
6100 if not set(disks).issubset(instance.disks):
6101 raise errors.ProgrammerError("Can only act on disks belonging to the"
6106 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6107 """Shutdown block devices of an instance.
6109 This does the shutdown on all nodes of the instance.
6111 If the ignore_primary is false, errors on the primary node are
6116 disks = _ExpandCheckDisks(instance, disks)
6119 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6120 lu.cfg.SetDiskID(top_disk, node)
6121 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6122 msg = result.fail_msg
6124 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6125 disk.iv_name, node, msg)
6126 if ((node == instance.primary_node and not ignore_primary) or
6127 (node != instance.primary_node and not result.offline)):
6132 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6133 """Checks if a node has enough free memory.
6135 This function check if a given node has the needed amount of free
6136 memory. In case the node has less memory or we cannot get the
6137 information from the node, this function raise an OpPrereqError
6140 @type lu: C{LogicalUnit}
6141 @param lu: a logical unit from which we get configuration data
6143 @param node: the node to check
6144 @type reason: C{str}
6145 @param reason: string to use in the error message
6146 @type requested: C{int}
6147 @param requested: the amount of memory in MiB to check for
6148 @type hypervisor_name: C{str}
6149 @param hypervisor_name: the hypervisor to ask for memory stats
6150 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6151 we cannot check the node
6154 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6155 nodeinfo[node].Raise("Can't get data from node %s" % node,
6156 prereq=True, ecode=errors.ECODE_ENVIRON)
6157 (_, _, (hv_info, )) = nodeinfo[node].payload
6159 free_mem = hv_info.get("memory_free", None)
6160 if not isinstance(free_mem, int):
6161 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6162 " was '%s'" % (node, free_mem),
6163 errors.ECODE_ENVIRON)
6164 if requested > free_mem:
6165 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6166 " needed %s MiB, available %s MiB" %
6167 (node, reason, requested, free_mem),
6171 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6172 """Checks if nodes have enough free disk space in the all VGs.
6174 This function check if all given nodes have the needed amount of
6175 free disk. In case any node has less disk or we cannot get the
6176 information from the node, this function raise an OpPrereqError
6179 @type lu: C{LogicalUnit}
6180 @param lu: a logical unit from which we get configuration data
6181 @type nodenames: C{list}
6182 @param nodenames: the list of node names to check
6183 @type req_sizes: C{dict}
6184 @param req_sizes: the hash of vg and corresponding amount of disk in
6186 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6187 or we cannot check the node
6190 for vg, req_size in req_sizes.items():
6191 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6194 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6195 """Checks if nodes have enough free disk space in the specified VG.
6197 This function check if all given nodes have the needed amount of
6198 free disk. In case any node has less disk or we cannot get the
6199 information from the node, this function raise an OpPrereqError
6202 @type lu: C{LogicalUnit}
6203 @param lu: a logical unit from which we get configuration data
6204 @type nodenames: C{list}
6205 @param nodenames: the list of node names to check
6207 @param vg: the volume group to check
6208 @type requested: C{int}
6209 @param requested: the amount of disk in MiB to check for
6210 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6211 or we cannot check the node
6214 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6215 for node in nodenames:
6216 info = nodeinfo[node]
6217 info.Raise("Cannot get current information from node %s" % node,
6218 prereq=True, ecode=errors.ECODE_ENVIRON)
6219 (_, (vg_info, ), _) = info.payload
6220 vg_free = vg_info.get("vg_free", None)
6221 if not isinstance(vg_free, int):
6222 raise errors.OpPrereqError("Can't compute free disk space on node"
6223 " %s for vg %s, result was '%s'" %
6224 (node, vg, vg_free), errors.ECODE_ENVIRON)
6225 if requested > vg_free:
6226 raise errors.OpPrereqError("Not enough disk space on target node %s"
6227 " vg %s: required %d MiB, available %d MiB" %
6228 (node, vg, requested, vg_free),
6232 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6233 """Checks if nodes have enough physical CPUs
6235 This function checks if all given nodes have the needed number of
6236 physical CPUs. In case any node has less CPUs or we cannot get the
6237 information from the node, this function raises an OpPrereqError
6240 @type lu: C{LogicalUnit}
6241 @param lu: a logical unit from which we get configuration data
6242 @type nodenames: C{list}
6243 @param nodenames: the list of node names to check
6244 @type requested: C{int}
6245 @param requested: the minimum acceptable number of physical CPUs
6246 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6247 or we cannot check the node
6250 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6251 for node in nodenames:
6252 info = nodeinfo[node]
6253 info.Raise("Cannot get current information from node %s" % node,
6254 prereq=True, ecode=errors.ECODE_ENVIRON)
6255 (_, _, (hv_info, )) = info.payload
6256 num_cpus = hv_info.get("cpu_total", None)
6257 if not isinstance(num_cpus, int):
6258 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6259 " on node %s, result was '%s'" %
6260 (node, num_cpus), errors.ECODE_ENVIRON)
6261 if requested > num_cpus:
6262 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6263 "required" % (node, num_cpus, requested),
6267 class LUInstanceStartup(LogicalUnit):
6268 """Starts an instance.
6271 HPATH = "instance-start"
6272 HTYPE = constants.HTYPE_INSTANCE
6275 def CheckArguments(self):
6277 if self.op.beparams:
6278 # fill the beparams dict
6279 objects.UpgradeBeParams(self.op.beparams)
6280 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6282 def ExpandNames(self):
6283 self._ExpandAndLockInstance()
6285 def BuildHooksEnv(self):
6288 This runs on master, primary and secondary nodes of the instance.
6292 "FORCE": self.op.force,
6295 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6299 def BuildHooksNodes(self):
6300 """Build hooks nodes.
6303 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6306 def CheckPrereq(self):
6307 """Check prerequisites.
6309 This checks that the instance is in the cluster.
6312 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6313 assert self.instance is not None, \
6314 "Cannot retrieve locked instance %s" % self.op.instance_name
6317 if self.op.hvparams:
6318 # check hypervisor parameter syntax (locally)
6319 cluster = self.cfg.GetClusterInfo()
6320 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6321 filled_hvp = cluster.FillHV(instance)
6322 filled_hvp.update(self.op.hvparams)
6323 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6324 hv_type.CheckParameterSyntax(filled_hvp)
6325 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6327 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6329 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6331 if self.primary_offline and self.op.ignore_offline_nodes:
6332 self.proc.LogWarning("Ignoring offline primary node")
6334 if self.op.hvparams or self.op.beparams:
6335 self.proc.LogWarning("Overridden parameters are ignored")
6337 _CheckNodeOnline(self, instance.primary_node)
6339 bep = self.cfg.GetClusterInfo().FillBE(instance)
6341 # check bridges existence
6342 _CheckInstanceBridgesExist(self, instance)
6344 remote_info = self.rpc.call_instance_info(instance.primary_node,
6346 instance.hypervisor)
6347 remote_info.Raise("Error checking node %s" % instance.primary_node,
6348 prereq=True, ecode=errors.ECODE_ENVIRON)
6349 if not remote_info.payload: # not running already
6350 _CheckNodeFreeMemory(self, instance.primary_node,
6351 "starting instance %s" % instance.name,
6352 bep[constants.BE_MAXMEM], instance.hypervisor)
6354 def Exec(self, feedback_fn):
6355 """Start the instance.
6358 instance = self.instance
6359 force = self.op.force
6361 if not self.op.no_remember:
6362 self.cfg.MarkInstanceUp(instance.name)
6364 if self.primary_offline:
6365 assert self.op.ignore_offline_nodes
6366 self.proc.LogInfo("Primary node offline, marked instance as started")
6368 node_current = instance.primary_node
6370 _StartInstanceDisks(self, instance, force)
6373 self.rpc.call_instance_start(node_current,
6374 (instance, self.op.hvparams,
6376 self.op.startup_paused)
6377 msg = result.fail_msg
6379 _ShutdownInstanceDisks(self, instance)
6380 raise errors.OpExecError("Could not start instance: %s" % msg)
6383 class LUInstanceReboot(LogicalUnit):
6384 """Reboot an instance.
6387 HPATH = "instance-reboot"
6388 HTYPE = constants.HTYPE_INSTANCE
6391 def ExpandNames(self):
6392 self._ExpandAndLockInstance()
6394 def BuildHooksEnv(self):
6397 This runs on master, primary and secondary nodes of the instance.
6401 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6402 "REBOOT_TYPE": self.op.reboot_type,
6403 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6406 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6410 def BuildHooksNodes(self):
6411 """Build hooks nodes.
6414 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6417 def CheckPrereq(self):
6418 """Check prerequisites.
6420 This checks that the instance is in the cluster.
6423 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6424 assert self.instance is not None, \
6425 "Cannot retrieve locked instance %s" % self.op.instance_name
6426 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6427 _CheckNodeOnline(self, instance.primary_node)
6429 # check bridges existence
6430 _CheckInstanceBridgesExist(self, instance)
6432 def Exec(self, feedback_fn):
6433 """Reboot the instance.
6436 instance = self.instance
6437 ignore_secondaries = self.op.ignore_secondaries
6438 reboot_type = self.op.reboot_type
6440 remote_info = self.rpc.call_instance_info(instance.primary_node,
6442 instance.hypervisor)
6443 remote_info.Raise("Error checking node %s" % instance.primary_node)
6444 instance_running = bool(remote_info.payload)
6446 node_current = instance.primary_node
6448 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6449 constants.INSTANCE_REBOOT_HARD]:
6450 for disk in instance.disks:
6451 self.cfg.SetDiskID(disk, node_current)
6452 result = self.rpc.call_instance_reboot(node_current, instance,
6454 self.op.shutdown_timeout)
6455 result.Raise("Could not reboot instance")
6457 if instance_running:
6458 result = self.rpc.call_instance_shutdown(node_current, instance,
6459 self.op.shutdown_timeout)
6460 result.Raise("Could not shutdown instance for full reboot")
6461 _ShutdownInstanceDisks(self, instance)
6463 self.LogInfo("Instance %s was already stopped, starting now",
6465 _StartInstanceDisks(self, instance, ignore_secondaries)
6466 result = self.rpc.call_instance_start(node_current,
6467 (instance, None, None), False)
6468 msg = result.fail_msg
6470 _ShutdownInstanceDisks(self, instance)
6471 raise errors.OpExecError("Could not start instance for"
6472 " full reboot: %s" % msg)
6474 self.cfg.MarkInstanceUp(instance.name)
6477 class LUInstanceShutdown(LogicalUnit):
6478 """Shutdown an instance.
6481 HPATH = "instance-stop"
6482 HTYPE = constants.HTYPE_INSTANCE
6485 def ExpandNames(self):
6486 self._ExpandAndLockInstance()
6488 def BuildHooksEnv(self):
6491 This runs on master, primary and secondary nodes of the instance.
6494 env = _BuildInstanceHookEnvByObject(self, self.instance)
6495 env["TIMEOUT"] = self.op.timeout
6498 def BuildHooksNodes(self):
6499 """Build hooks nodes.
6502 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6505 def CheckPrereq(self):
6506 """Check prerequisites.
6508 This checks that the instance is in the cluster.
6511 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6512 assert self.instance is not None, \
6513 "Cannot retrieve locked instance %s" % self.op.instance_name
6515 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6517 self.primary_offline = \
6518 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6520 if self.primary_offline and self.op.ignore_offline_nodes:
6521 self.proc.LogWarning("Ignoring offline primary node")
6523 _CheckNodeOnline(self, self.instance.primary_node)
6525 def Exec(self, feedback_fn):
6526 """Shutdown the instance.
6529 instance = self.instance
6530 node_current = instance.primary_node
6531 timeout = self.op.timeout
6533 if not self.op.no_remember:
6534 self.cfg.MarkInstanceDown(instance.name)
6536 if self.primary_offline:
6537 assert self.op.ignore_offline_nodes
6538 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6540 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6541 msg = result.fail_msg
6543 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6545 _ShutdownInstanceDisks(self, instance)
6548 class LUInstanceReinstall(LogicalUnit):
6549 """Reinstall an instance.
6552 HPATH = "instance-reinstall"
6553 HTYPE = constants.HTYPE_INSTANCE
6556 def ExpandNames(self):
6557 self._ExpandAndLockInstance()
6559 def BuildHooksEnv(self):
6562 This runs on master, primary and secondary nodes of the instance.
6565 return _BuildInstanceHookEnvByObject(self, self.instance)
6567 def BuildHooksNodes(self):
6568 """Build hooks nodes.
6571 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574 def CheckPrereq(self):
6575 """Check prerequisites.
6577 This checks that the instance is in the cluster and is not running.
6580 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6581 assert instance is not None, \
6582 "Cannot retrieve locked instance %s" % self.op.instance_name
6583 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6584 " offline, cannot reinstall")
6585 for node in instance.secondary_nodes:
6586 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6587 " cannot reinstall")
6589 if instance.disk_template == constants.DT_DISKLESS:
6590 raise errors.OpPrereqError("Instance '%s' has no disks" %
6591 self.op.instance_name,
6593 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6595 if self.op.os_type is not None:
6597 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6598 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6599 instance_os = self.op.os_type
6601 instance_os = instance.os
6603 nodelist = list(instance.all_nodes)
6605 if self.op.osparams:
6606 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6607 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6608 self.os_inst = i_osdict # the new dict (without defaults)
6612 self.instance = instance
6614 def Exec(self, feedback_fn):
6615 """Reinstall the instance.
6618 inst = self.instance
6620 if self.op.os_type is not None:
6621 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6622 inst.os = self.op.os_type
6623 # Write to configuration
6624 self.cfg.Update(inst, feedback_fn)
6626 _StartInstanceDisks(self, inst, None)
6628 feedback_fn("Running the instance OS create scripts...")
6629 # FIXME: pass debug option from opcode to backend
6630 result = self.rpc.call_instance_os_add(inst.primary_node,
6631 (inst, self.os_inst), True,
6632 self.op.debug_level)
6633 result.Raise("Could not install OS for instance %s on node %s" %
6634 (inst.name, inst.primary_node))
6636 _ShutdownInstanceDisks(self, inst)
6639 class LUInstanceRecreateDisks(LogicalUnit):
6640 """Recreate an instance's missing disks.
6643 HPATH = "instance-recreate-disks"
6644 HTYPE = constants.HTYPE_INSTANCE
6647 def CheckArguments(self):
6648 # normalise the disk list
6649 self.op.disks = sorted(frozenset(self.op.disks))
6651 def ExpandNames(self):
6652 self._ExpandAndLockInstance()
6653 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6655 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6656 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6658 self.needed_locks[locking.LEVEL_NODE] = []
6660 def DeclareLocks(self, level):
6661 if level == locking.LEVEL_NODE:
6662 # if we replace the nodes, we only need to lock the old primary,
6663 # otherwise we need to lock all nodes for disk re-creation
6664 primary_only = bool(self.op.nodes)
6665 self._LockInstancesNodes(primary_only=primary_only)
6666 elif level == locking.LEVEL_NODE_RES:
6668 self.needed_locks[locking.LEVEL_NODE_RES] = \
6669 self.needed_locks[locking.LEVEL_NODE][:]
6671 def BuildHooksEnv(self):
6674 This runs on master, primary and secondary nodes of the instance.
6677 return _BuildInstanceHookEnvByObject(self, self.instance)
6679 def BuildHooksNodes(self):
6680 """Build hooks nodes.
6683 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6686 def CheckPrereq(self):
6687 """Check prerequisites.
6689 This checks that the instance is in the cluster and is not running.
6692 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6693 assert instance is not None, \
6694 "Cannot retrieve locked instance %s" % self.op.instance_name
6696 if len(self.op.nodes) != len(instance.all_nodes):
6697 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6698 " %d replacement nodes were specified" %
6699 (instance.name, len(instance.all_nodes),
6700 len(self.op.nodes)),
6702 assert instance.disk_template != constants.DT_DRBD8 or \
6703 len(self.op.nodes) == 2
6704 assert instance.disk_template != constants.DT_PLAIN or \
6705 len(self.op.nodes) == 1
6706 primary_node = self.op.nodes[0]
6708 primary_node = instance.primary_node
6709 _CheckNodeOnline(self, primary_node)
6711 if instance.disk_template == constants.DT_DISKLESS:
6712 raise errors.OpPrereqError("Instance '%s' has no disks" %
6713 self.op.instance_name, errors.ECODE_INVAL)
6714 # if we replace nodes *and* the old primary is offline, we don't
6716 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6717 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6718 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6719 if not (self.op.nodes and old_pnode.offline):
6720 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6721 msg="cannot recreate disks")
6723 if not self.op.disks:
6724 self.op.disks = range(len(instance.disks))
6726 for idx in self.op.disks:
6727 if idx >= len(instance.disks):
6728 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6730 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6731 raise errors.OpPrereqError("Can't recreate disks partially and"
6732 " change the nodes at the same time",
6734 self.instance = instance
6736 def Exec(self, feedback_fn):
6737 """Recreate the disks.
6740 instance = self.instance
6742 assert (self.owned_locks(locking.LEVEL_NODE) ==
6743 self.owned_locks(locking.LEVEL_NODE_RES))
6746 mods = [] # keeps track of needed logical_id changes
6748 for idx, disk in enumerate(instance.disks):
6749 if idx not in self.op.disks: # disk idx has not been passed in
6752 # update secondaries for disks, if needed
6754 if disk.dev_type == constants.LD_DRBD8:
6755 # need to update the nodes and minors
6756 assert len(self.op.nodes) == 2
6757 assert len(disk.logical_id) == 6 # otherwise disk internals
6759 (_, _, old_port, _, _, old_secret) = disk.logical_id
6760 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6761 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6762 new_minors[0], new_minors[1], old_secret)
6763 assert len(disk.logical_id) == len(new_id)
6764 mods.append((idx, new_id))
6766 # now that we have passed all asserts above, we can apply the mods
6767 # in a single run (to avoid partial changes)
6768 for idx, new_id in mods:
6769 instance.disks[idx].logical_id = new_id
6771 # change primary node, if needed
6773 instance.primary_node = self.op.nodes[0]
6774 self.LogWarning("Changing the instance's nodes, you will have to"
6775 " remove any disks left on the older nodes manually")
6778 self.cfg.Update(instance, feedback_fn)
6780 _CreateDisks(self, instance, to_skip=to_skip)
6783 class LUInstanceRename(LogicalUnit):
6784 """Rename an instance.
6787 HPATH = "instance-rename"
6788 HTYPE = constants.HTYPE_INSTANCE
6790 def CheckArguments(self):
6794 if self.op.ip_check and not self.op.name_check:
6795 # TODO: make the ip check more flexible and not depend on the name check
6796 raise errors.OpPrereqError("IP address check requires a name check",
6799 def BuildHooksEnv(self):
6802 This runs on master, primary and secondary nodes of the instance.
6805 env = _BuildInstanceHookEnvByObject(self, self.instance)
6806 env["INSTANCE_NEW_NAME"] = self.op.new_name
6809 def BuildHooksNodes(self):
6810 """Build hooks nodes.
6813 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6816 def CheckPrereq(self):
6817 """Check prerequisites.
6819 This checks that the instance is in the cluster and is not running.
6822 self.op.instance_name = _ExpandInstanceName(self.cfg,
6823 self.op.instance_name)
6824 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6825 assert instance is not None
6826 _CheckNodeOnline(self, instance.primary_node)
6827 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6828 msg="cannot rename")
6829 self.instance = instance
6831 new_name = self.op.new_name
6832 if self.op.name_check:
6833 hostname = netutils.GetHostname(name=new_name)
6834 if hostname.name != new_name:
6835 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6837 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6838 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6839 " same as given hostname '%s'") %
6840 (hostname.name, self.op.new_name),
6842 new_name = self.op.new_name = hostname.name
6843 if (self.op.ip_check and
6844 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6845 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6846 (hostname.ip, new_name),
6847 errors.ECODE_NOTUNIQUE)
6849 instance_list = self.cfg.GetInstanceList()
6850 if new_name in instance_list and new_name != instance.name:
6851 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6852 new_name, errors.ECODE_EXISTS)
6854 def Exec(self, feedback_fn):
6855 """Rename the instance.
6858 inst = self.instance
6859 old_name = inst.name
6861 rename_file_storage = False
6862 if (inst.disk_template in constants.DTS_FILEBASED and
6863 self.op.new_name != inst.name):
6864 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6865 rename_file_storage = True
6867 self.cfg.RenameInstance(inst.name, self.op.new_name)
6868 # Change the instance lock. This is definitely safe while we hold the BGL.
6869 # Otherwise the new lock would have to be added in acquired mode.
6871 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6872 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6874 # re-read the instance from the configuration after rename
6875 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6877 if rename_file_storage:
6878 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6879 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6880 old_file_storage_dir,
6881 new_file_storage_dir)
6882 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6883 " (but the instance has been renamed in Ganeti)" %
6884 (inst.primary_node, old_file_storage_dir,
6885 new_file_storage_dir))
6887 _StartInstanceDisks(self, inst, None)
6889 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6890 old_name, self.op.debug_level)
6891 msg = result.fail_msg
6893 msg = ("Could not run OS rename script for instance %s on node %s"
6894 " (but the instance has been renamed in Ganeti): %s" %
6895 (inst.name, inst.primary_node, msg))
6896 self.proc.LogWarning(msg)
6898 _ShutdownInstanceDisks(self, inst)
6903 class LUInstanceRemove(LogicalUnit):
6904 """Remove an instance.
6907 HPATH = "instance-remove"
6908 HTYPE = constants.HTYPE_INSTANCE
6911 def ExpandNames(self):
6912 self._ExpandAndLockInstance()
6913 self.needed_locks[locking.LEVEL_NODE] = []
6914 self.needed_locks[locking.LEVEL_NODE_RES] = []
6915 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6917 def DeclareLocks(self, level):
6918 if level == locking.LEVEL_NODE:
6919 self._LockInstancesNodes()
6920 elif level == locking.LEVEL_NODE_RES:
6922 self.needed_locks[locking.LEVEL_NODE_RES] = \
6923 self.needed_locks[locking.LEVEL_NODE][:]
6925 def BuildHooksEnv(self):
6928 This runs on master, primary and secondary nodes of the instance.
6931 env = _BuildInstanceHookEnvByObject(self, self.instance)
6932 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6935 def BuildHooksNodes(self):
6936 """Build hooks nodes.
6939 nl = [self.cfg.GetMasterNode()]
6940 nl_post = list(self.instance.all_nodes) + nl
6941 return (nl, nl_post)
6943 def CheckPrereq(self):
6944 """Check prerequisites.
6946 This checks that the instance is in the cluster.
6949 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6950 assert self.instance is not None, \
6951 "Cannot retrieve locked instance %s" % self.op.instance_name
6953 def Exec(self, feedback_fn):
6954 """Remove the instance.
6957 instance = self.instance
6958 logging.info("Shutting down instance %s on node %s",
6959 instance.name, instance.primary_node)
6961 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6962 self.op.shutdown_timeout)
6963 msg = result.fail_msg
6965 if self.op.ignore_failures:
6966 feedback_fn("Warning: can't shutdown instance: %s" % msg)
6968 raise errors.OpExecError("Could not shutdown instance %s on"
6970 (instance.name, instance.primary_node, msg))
6972 assert (self.owned_locks(locking.LEVEL_NODE) ==
6973 self.owned_locks(locking.LEVEL_NODE_RES))
6974 assert not (set(instance.all_nodes) -
6975 self.owned_locks(locking.LEVEL_NODE)), \
6976 "Not owning correct locks"
6978 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6981 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6982 """Utility function to remove an instance.
6985 logging.info("Removing block devices for instance %s", instance.name)
6987 if not _RemoveDisks(lu, instance):
6988 if not ignore_failures:
6989 raise errors.OpExecError("Can't remove instance's disks")
6990 feedback_fn("Warning: can't remove instance's disks")
6992 logging.info("Removing instance %s out of cluster config", instance.name)
6994 lu.cfg.RemoveInstance(instance.name)
6996 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6997 "Instance lock removal conflict"
6999 # Remove lock for the instance
7000 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7003 class LUInstanceQuery(NoHooksLU):
7004 """Logical unit for querying instances.
7007 # pylint: disable=W0142
7010 def CheckArguments(self):
7011 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7012 self.op.output_fields, self.op.use_locking)
7014 def ExpandNames(self):
7015 self.iq.ExpandNames(self)
7017 def DeclareLocks(self, level):
7018 self.iq.DeclareLocks(self, level)
7020 def Exec(self, feedback_fn):
7021 return self.iq.OldStyleQuery(self)
7024 class LUInstanceFailover(LogicalUnit):
7025 """Failover an instance.
7028 HPATH = "instance-failover"
7029 HTYPE = constants.HTYPE_INSTANCE
7032 def CheckArguments(self):
7033 """Check the arguments.
7036 self.iallocator = getattr(self.op, "iallocator", None)
7037 self.target_node = getattr(self.op, "target_node", None)
7039 def ExpandNames(self):
7040 self._ExpandAndLockInstance()
7042 if self.op.target_node is not None:
7043 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7045 self.needed_locks[locking.LEVEL_NODE] = []
7046 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7048 ignore_consistency = self.op.ignore_consistency
7049 shutdown_timeout = self.op.shutdown_timeout
7050 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7053 ignore_consistency=ignore_consistency,
7054 shutdown_timeout=shutdown_timeout)
7055 self.tasklets = [self._migrater]
7057 def DeclareLocks(self, level):
7058 if level == locking.LEVEL_NODE:
7059 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7060 if instance.disk_template in constants.DTS_EXT_MIRROR:
7061 if self.op.target_node is None:
7062 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7064 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7065 self.op.target_node]
7066 del self.recalculate_locks[locking.LEVEL_NODE]
7068 self._LockInstancesNodes()
7070 def BuildHooksEnv(self):
7073 This runs on master, primary and secondary nodes of the instance.
7076 instance = self._migrater.instance
7077 source_node = instance.primary_node
7078 target_node = self.op.target_node
7080 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7081 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7082 "OLD_PRIMARY": source_node,
7083 "NEW_PRIMARY": target_node,
7086 if instance.disk_template in constants.DTS_INT_MIRROR:
7087 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7088 env["NEW_SECONDARY"] = source_node
7090 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7092 env.update(_BuildInstanceHookEnvByObject(self, instance))
7096 def BuildHooksNodes(self):
7097 """Build hooks nodes.
7100 instance = self._migrater.instance
7101 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7102 return (nl, nl + [instance.primary_node])
7105 class LUInstanceMigrate(LogicalUnit):
7106 """Migrate an instance.
7108 This is migration without shutting down, compared to the failover,
7109 which is done with shutdown.
7112 HPATH = "instance-migrate"
7113 HTYPE = constants.HTYPE_INSTANCE
7116 def ExpandNames(self):
7117 self._ExpandAndLockInstance()
7119 if self.op.target_node is not None:
7120 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7122 self.needed_locks[locking.LEVEL_NODE] = []
7123 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7125 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7126 cleanup=self.op.cleanup,
7128 fallback=self.op.allow_failover)
7129 self.tasklets = [self._migrater]
7131 def DeclareLocks(self, level):
7132 if level == locking.LEVEL_NODE:
7133 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7134 if instance.disk_template in constants.DTS_EXT_MIRROR:
7135 if self.op.target_node is None:
7136 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7138 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7139 self.op.target_node]
7140 del self.recalculate_locks[locking.LEVEL_NODE]
7142 self._LockInstancesNodes()
7144 def BuildHooksEnv(self):
7147 This runs on master, primary and secondary nodes of the instance.
7150 instance = self._migrater.instance
7151 source_node = instance.primary_node
7152 target_node = self.op.target_node
7153 env = _BuildInstanceHookEnvByObject(self, instance)
7155 "MIGRATE_LIVE": self._migrater.live,
7156 "MIGRATE_CLEANUP": self.op.cleanup,
7157 "OLD_PRIMARY": source_node,
7158 "NEW_PRIMARY": target_node,
7161 if instance.disk_template in constants.DTS_INT_MIRROR:
7162 env["OLD_SECONDARY"] = target_node
7163 env["NEW_SECONDARY"] = source_node
7165 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7169 def BuildHooksNodes(self):
7170 """Build hooks nodes.
7173 instance = self._migrater.instance
7174 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7175 return (nl, nl + [instance.primary_node])
7178 class LUInstanceMove(LogicalUnit):
7179 """Move an instance by data-copying.
7182 HPATH = "instance-move"
7183 HTYPE = constants.HTYPE_INSTANCE
7186 def ExpandNames(self):
7187 self._ExpandAndLockInstance()
7188 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7189 self.op.target_node = target_node
7190 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7191 self.needed_locks[locking.LEVEL_NODE_RES] = []
7192 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7194 def DeclareLocks(self, level):
7195 if level == locking.LEVEL_NODE:
7196 self._LockInstancesNodes(primary_only=True)
7197 elif level == locking.LEVEL_NODE_RES:
7199 self.needed_locks[locking.LEVEL_NODE_RES] = \
7200 self.needed_locks[locking.LEVEL_NODE][:]
7202 def BuildHooksEnv(self):
7205 This runs on master, primary and secondary nodes of the instance.
7209 "TARGET_NODE": self.op.target_node,
7210 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7212 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7215 def BuildHooksNodes(self):
7216 """Build hooks nodes.
7220 self.cfg.GetMasterNode(),
7221 self.instance.primary_node,
7222 self.op.target_node,
7226 def CheckPrereq(self):
7227 """Check prerequisites.
7229 This checks that the instance is in the cluster.
7232 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7233 assert self.instance is not None, \
7234 "Cannot retrieve locked instance %s" % self.op.instance_name
7236 node = self.cfg.GetNodeInfo(self.op.target_node)
7237 assert node is not None, \
7238 "Cannot retrieve locked node %s" % self.op.target_node
7240 self.target_node = target_node = node.name
7242 if target_node == instance.primary_node:
7243 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7244 (instance.name, target_node),
7247 bep = self.cfg.GetClusterInfo().FillBE(instance)
7249 for idx, dsk in enumerate(instance.disks):
7250 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7251 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7252 " cannot copy" % idx, errors.ECODE_STATE)
7254 _CheckNodeOnline(self, target_node)
7255 _CheckNodeNotDrained(self, target_node)
7256 _CheckNodeVmCapable(self, target_node)
7258 if instance.admin_state == constants.ADMINST_UP:
7259 # check memory requirements on the secondary node
7260 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7261 instance.name, bep[constants.BE_MAXMEM],
7262 instance.hypervisor)
7264 self.LogInfo("Not checking memory on the secondary node as"
7265 " instance will not be started")
7267 # check bridge existance
7268 _CheckInstanceBridgesExist(self, instance, node=target_node)
7270 def Exec(self, feedback_fn):
7271 """Move an instance.
7273 The move is done by shutting it down on its present node, copying
7274 the data over (slow) and starting it on the new node.
7277 instance = self.instance
7279 source_node = instance.primary_node
7280 target_node = self.target_node
7282 self.LogInfo("Shutting down instance %s on source node %s",
7283 instance.name, source_node)
7285 assert (self.owned_locks(locking.LEVEL_NODE) ==
7286 self.owned_locks(locking.LEVEL_NODE_RES))
7288 result = self.rpc.call_instance_shutdown(source_node, instance,
7289 self.op.shutdown_timeout)
7290 msg = result.fail_msg
7292 if self.op.ignore_consistency:
7293 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7294 " Proceeding anyway. Please make sure node"
7295 " %s is down. Error details: %s",
7296 instance.name, source_node, source_node, msg)
7298 raise errors.OpExecError("Could not shutdown instance %s on"
7300 (instance.name, source_node, msg))
7302 # create the target disks
7304 _CreateDisks(self, instance, target_node=target_node)
7305 except errors.OpExecError:
7306 self.LogWarning("Device creation failed, reverting...")
7308 _RemoveDisks(self, instance, target_node=target_node)
7310 self.cfg.ReleaseDRBDMinors(instance.name)
7313 cluster_name = self.cfg.GetClusterInfo().cluster_name
7316 # activate, get path, copy the data over
7317 for idx, disk in enumerate(instance.disks):
7318 self.LogInfo("Copying data for disk %d", idx)
7319 result = self.rpc.call_blockdev_assemble(target_node, disk,
7320 instance.name, True, idx)
7322 self.LogWarning("Can't assemble newly created disk %d: %s",
7323 idx, result.fail_msg)
7324 errs.append(result.fail_msg)
7326 dev_path = result.payload
7327 result = self.rpc.call_blockdev_export(source_node, disk,
7328 target_node, dev_path,
7331 self.LogWarning("Can't copy data over for disk %d: %s",
7332 idx, result.fail_msg)
7333 errs.append(result.fail_msg)
7337 self.LogWarning("Some disks failed to copy, aborting")
7339 _RemoveDisks(self, instance, target_node=target_node)
7341 self.cfg.ReleaseDRBDMinors(instance.name)
7342 raise errors.OpExecError("Errors during disk copy: %s" %
7345 instance.primary_node = target_node
7346 self.cfg.Update(instance, feedback_fn)
7348 self.LogInfo("Removing the disks on the original node")
7349 _RemoveDisks(self, instance, target_node=source_node)
7351 # Only start the instance if it's marked as up
7352 if instance.admin_state == constants.ADMINST_UP:
7353 self.LogInfo("Starting instance %s on node %s",
7354 instance.name, target_node)
7356 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7357 ignore_secondaries=True)
7359 _ShutdownInstanceDisks(self, instance)
7360 raise errors.OpExecError("Can't activate the instance's disks")
7362 result = self.rpc.call_instance_start(target_node,
7363 (instance, None, None), False)
7364 msg = result.fail_msg
7366 _ShutdownInstanceDisks(self, instance)
7367 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7368 (instance.name, target_node, msg))
7371 class LUNodeMigrate(LogicalUnit):
7372 """Migrate all instances from a node.
7375 HPATH = "node-migrate"
7376 HTYPE = constants.HTYPE_NODE
7379 def CheckArguments(self):
7382 def ExpandNames(self):
7383 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7385 self.share_locks = _ShareAll()
7386 self.needed_locks = {
7387 locking.LEVEL_NODE: [self.op.node_name],
7390 def BuildHooksEnv(self):
7393 This runs on the master, the primary and all the secondaries.
7397 "NODE_NAME": self.op.node_name,
7400 def BuildHooksNodes(self):
7401 """Build hooks nodes.
7404 nl = [self.cfg.GetMasterNode()]
7407 def CheckPrereq(self):
7410 def Exec(self, feedback_fn):
7411 # Prepare jobs for migration instances
7413 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7416 iallocator=self.op.iallocator,
7417 target_node=self.op.target_node)]
7418 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7421 # TODO: Run iallocator in this opcode and pass correct placement options to
7422 # OpInstanceMigrate. Since other jobs can modify the cluster between
7423 # running the iallocator and the actual migration, a good consistency model
7424 # will have to be found.
7426 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7427 frozenset([self.op.node_name]))
7429 return ResultWithJobs(jobs)
7432 class TLMigrateInstance(Tasklet):
7433 """Tasklet class for instance migration.
7436 @ivar live: whether the migration will be done live or non-live;
7437 this variable is initalized only after CheckPrereq has run
7438 @type cleanup: boolean
7439 @ivar cleanup: Wheater we cleanup from a failed migration
7440 @type iallocator: string
7441 @ivar iallocator: The iallocator used to determine target_node
7442 @type target_node: string
7443 @ivar target_node: If given, the target_node to reallocate the instance to
7444 @type failover: boolean
7445 @ivar failover: Whether operation results in failover or migration
7446 @type fallback: boolean
7447 @ivar fallback: Whether fallback to failover is allowed if migration not
7449 @type ignore_consistency: boolean
7450 @ivar ignore_consistency: Wheter we should ignore consistency between source
7452 @type shutdown_timeout: int
7453 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7458 _MIGRATION_POLL_INTERVAL = 1 # seconds
7459 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7461 def __init__(self, lu, instance_name, cleanup=False,
7462 failover=False, fallback=False,
7463 ignore_consistency=False,
7464 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7465 """Initializes this class.
7468 Tasklet.__init__(self, lu)
7471 self.instance_name = instance_name
7472 self.cleanup = cleanup
7473 self.live = False # will be overridden later
7474 self.failover = failover
7475 self.fallback = fallback
7476 self.ignore_consistency = ignore_consistency
7477 self.shutdown_timeout = shutdown_timeout
7479 def CheckPrereq(self):
7480 """Check prerequisites.
7482 This checks that the instance is in the cluster.
7485 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7486 instance = self.cfg.GetInstanceInfo(instance_name)
7487 assert instance is not None
7488 self.instance = instance
7490 if (not self.cleanup and
7491 not instance.admin_state == constants.ADMINST_UP and
7492 not self.failover and self.fallback):
7493 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7494 " switching to failover")
7495 self.failover = True
7497 if instance.disk_template not in constants.DTS_MIRRORED:
7502 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7503 " %s" % (instance.disk_template, text),
7506 if instance.disk_template in constants.DTS_EXT_MIRROR:
7507 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7509 if self.lu.op.iallocator:
7510 self._RunAllocator()
7512 # We set set self.target_node as it is required by
7514 self.target_node = self.lu.op.target_node
7516 # self.target_node is already populated, either directly or by the
7518 target_node = self.target_node
7519 if self.target_node == instance.primary_node:
7520 raise errors.OpPrereqError("Cannot migrate instance %s"
7521 " to its primary (%s)" %
7522 (instance.name, instance.primary_node))
7524 if len(self.lu.tasklets) == 1:
7525 # It is safe to release locks only when we're the only tasklet
7527 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7528 keep=[instance.primary_node, self.target_node])
7531 secondary_nodes = instance.secondary_nodes
7532 if not secondary_nodes:
7533 raise errors.ConfigurationError("No secondary node but using"
7534 " %s disk template" %
7535 instance.disk_template)
7536 target_node = secondary_nodes[0]
7537 if self.lu.op.iallocator or (self.lu.op.target_node and
7538 self.lu.op.target_node != target_node):
7540 text = "failed over"
7543 raise errors.OpPrereqError("Instances with disk template %s cannot"
7544 " be %s to arbitrary nodes"
7545 " (neither an iallocator nor a target"
7546 " node can be passed)" %
7547 (instance.disk_template, text),
7550 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7552 # check memory requirements on the secondary node
7553 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7554 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7555 instance.name, i_be[constants.BE_MAXMEM],
7556 instance.hypervisor)
7558 self.lu.LogInfo("Not checking memory on the secondary node as"
7559 " instance will not be started")
7561 # check if failover must be forced instead of migration
7562 if (not self.cleanup and not self.failover and
7563 i_be[constants.BE_ALWAYS_FAILOVER]):
7565 self.lu.LogInfo("Instance configured to always failover; fallback"
7567 self.failover = True
7569 raise errors.OpPrereqError("This instance has been configured to"
7570 " always failover, please allow failover",
7573 # check bridge existance
7574 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7576 if not self.cleanup:
7577 _CheckNodeNotDrained(self.lu, target_node)
7578 if not self.failover:
7579 result = self.rpc.call_instance_migratable(instance.primary_node,
7581 if result.fail_msg and self.fallback:
7582 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7584 self.failover = True
7586 result.Raise("Can't migrate, please use failover",
7587 prereq=True, ecode=errors.ECODE_STATE)
7589 assert not (self.failover and self.cleanup)
7591 if not self.failover:
7592 if self.lu.op.live is not None and self.lu.op.mode is not None:
7593 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7594 " parameters are accepted",
7596 if self.lu.op.live is not None:
7598 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7600 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7601 # reset the 'live' parameter to None so that repeated
7602 # invocations of CheckPrereq do not raise an exception
7603 self.lu.op.live = None
7604 elif self.lu.op.mode is None:
7605 # read the default value from the hypervisor
7606 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7608 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7610 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7612 # Failover is never live
7615 def _RunAllocator(self):
7616 """Run the allocator based on input opcode.
7619 ial = IAllocator(self.cfg, self.rpc,
7620 mode=constants.IALLOCATOR_MODE_RELOC,
7621 name=self.instance_name,
7622 # TODO See why hail breaks with a single node below
7623 relocate_from=[self.instance.primary_node,
7624 self.instance.primary_node],
7627 ial.Run(self.lu.op.iallocator)
7630 raise errors.OpPrereqError("Can't compute nodes using"
7631 " iallocator '%s': %s" %
7632 (self.lu.op.iallocator, ial.info),
7634 if len(ial.result) != ial.required_nodes:
7635 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7636 " of nodes (%s), required %s" %
7637 (self.lu.op.iallocator, len(ial.result),
7638 ial.required_nodes), errors.ECODE_FAULT)
7639 self.target_node = ial.result[0]
7640 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7641 self.instance_name, self.lu.op.iallocator,
7642 utils.CommaJoin(ial.result))
7644 def _WaitUntilSync(self):
7645 """Poll with custom rpc for disk sync.
7647 This uses our own step-based rpc call.
7650 self.feedback_fn("* wait until resync is done")
7654 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7656 self.instance.disks)
7658 for node, nres in result.items():
7659 nres.Raise("Cannot resync disks on node %s" % node)
7660 node_done, node_percent = nres.payload
7661 all_done = all_done and node_done
7662 if node_percent is not None:
7663 min_percent = min(min_percent, node_percent)
7665 if min_percent < 100:
7666 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7669 def _EnsureSecondary(self, node):
7670 """Demote a node to secondary.
7673 self.feedback_fn("* switching node %s to secondary mode" % node)
7675 for dev in self.instance.disks:
7676 self.cfg.SetDiskID(dev, node)
7678 result = self.rpc.call_blockdev_close(node, self.instance.name,
7679 self.instance.disks)
7680 result.Raise("Cannot change disk to secondary on node %s" % node)
7682 def _GoStandalone(self):
7683 """Disconnect from the network.
7686 self.feedback_fn("* changing into standalone mode")
7687 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7688 self.instance.disks)
7689 for node, nres in result.items():
7690 nres.Raise("Cannot disconnect disks node %s" % node)
7692 def _GoReconnect(self, multimaster):
7693 """Reconnect to the network.
7699 msg = "single-master"
7700 self.feedback_fn("* changing disks into %s mode" % msg)
7701 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7702 self.instance.disks,
7703 self.instance.name, multimaster)
7704 for node, nres in result.items():
7705 nres.Raise("Cannot change disks config on node %s" % node)
7707 def _ExecCleanup(self):
7708 """Try to cleanup after a failed migration.
7710 The cleanup is done by:
7711 - check that the instance is running only on one node
7712 (and update the config if needed)
7713 - change disks on its secondary node to secondary
7714 - wait until disks are fully synchronized
7715 - disconnect from the network
7716 - change disks into single-master mode
7717 - wait again until disks are fully synchronized
7720 instance = self.instance
7721 target_node = self.target_node
7722 source_node = self.source_node
7724 # check running on only one node
7725 self.feedback_fn("* checking where the instance actually runs"
7726 " (if this hangs, the hypervisor might be in"
7728 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7729 for node, result in ins_l.items():
7730 result.Raise("Can't contact node %s" % node)
7732 runningon_source = instance.name in ins_l[source_node].payload
7733 runningon_target = instance.name in ins_l[target_node].payload
7735 if runningon_source and runningon_target:
7736 raise errors.OpExecError("Instance seems to be running on two nodes,"
7737 " or the hypervisor is confused; you will have"
7738 " to ensure manually that it runs only on one"
7739 " and restart this operation")
7741 if not (runningon_source or runningon_target):
7742 raise errors.OpExecError("Instance does not seem to be running at all;"
7743 " in this case it's safer to repair by"
7744 " running 'gnt-instance stop' to ensure disk"
7745 " shutdown, and then restarting it")
7747 if runningon_target:
7748 # the migration has actually succeeded, we need to update the config
7749 self.feedback_fn("* instance running on secondary node (%s),"
7750 " updating config" % target_node)
7751 instance.primary_node = target_node
7752 self.cfg.Update(instance, self.feedback_fn)
7753 demoted_node = source_node
7755 self.feedback_fn("* instance confirmed to be running on its"
7756 " primary node (%s)" % source_node)
7757 demoted_node = target_node
7759 if instance.disk_template in constants.DTS_INT_MIRROR:
7760 self._EnsureSecondary(demoted_node)
7762 self._WaitUntilSync()
7763 except errors.OpExecError:
7764 # we ignore here errors, since if the device is standalone, it
7765 # won't be able to sync
7767 self._GoStandalone()
7768 self._GoReconnect(False)
7769 self._WaitUntilSync()
7771 self.feedback_fn("* done")
7773 def _RevertDiskStatus(self):
7774 """Try to revert the disk status after a failed migration.
7777 target_node = self.target_node
7778 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7782 self._EnsureSecondary(target_node)
7783 self._GoStandalone()
7784 self._GoReconnect(False)
7785 self._WaitUntilSync()
7786 except errors.OpExecError, err:
7787 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7788 " please try to recover the instance manually;"
7789 " error '%s'" % str(err))
7791 def _AbortMigration(self):
7792 """Call the hypervisor code to abort a started migration.
7795 instance = self.instance
7796 target_node = self.target_node
7797 source_node = self.source_node
7798 migration_info = self.migration_info
7800 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7804 abort_msg = abort_result.fail_msg
7806 logging.error("Aborting migration failed on target node %s: %s",
7807 target_node, abort_msg)
7808 # Don't raise an exception here, as we stil have to try to revert the
7809 # disk status, even if this step failed.
7811 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7812 instance, False, self.live)
7813 abort_msg = abort_result.fail_msg
7815 logging.error("Aborting migration failed on source node %s: %s",
7816 source_node, abort_msg)
7818 def _ExecMigration(self):
7819 """Migrate an instance.
7821 The migrate is done by:
7822 - change the disks into dual-master mode
7823 - wait until disks are fully synchronized again
7824 - migrate the instance
7825 - change disks on the new secondary node (the old primary) to secondary
7826 - wait until disks are fully synchronized
7827 - change disks into single-master mode
7830 instance = self.instance
7831 target_node = self.target_node
7832 source_node = self.source_node
7834 # Check for hypervisor version mismatch and warn the user.
7835 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7836 None, [self.instance.hypervisor])
7837 for ninfo in nodeinfo.values():
7838 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7840 (_, _, (src_info, )) = nodeinfo[source_node].payload
7841 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7843 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7844 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7845 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7846 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7847 if src_version != dst_version:
7848 self.feedback_fn("* warning: hypervisor version mismatch between"
7849 " source (%s) and target (%s) node" %
7850 (src_version, dst_version))
7852 self.feedback_fn("* checking disk consistency between source and target")
7853 for dev in instance.disks:
7854 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7855 raise errors.OpExecError("Disk %s is degraded or not fully"
7856 " synchronized on target node,"
7857 " aborting migration" % dev.iv_name)
7859 # First get the migration information from the remote node
7860 result = self.rpc.call_migration_info(source_node, instance)
7861 msg = result.fail_msg
7863 log_err = ("Failed fetching source migration information from %s: %s" %
7865 logging.error(log_err)
7866 raise errors.OpExecError(log_err)
7868 self.migration_info = migration_info = result.payload
7870 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7871 # Then switch the disks to master/master mode
7872 self._EnsureSecondary(target_node)
7873 self._GoStandalone()
7874 self._GoReconnect(True)
7875 self._WaitUntilSync()
7877 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7878 result = self.rpc.call_accept_instance(target_node,
7881 self.nodes_ip[target_node])
7883 msg = result.fail_msg
7885 logging.error("Instance pre-migration failed, trying to revert"
7886 " disk status: %s", msg)
7887 self.feedback_fn("Pre-migration failed, aborting")
7888 self._AbortMigration()
7889 self._RevertDiskStatus()
7890 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7891 (instance.name, msg))
7893 self.feedback_fn("* migrating instance to %s" % target_node)
7894 result = self.rpc.call_instance_migrate(source_node, instance,
7895 self.nodes_ip[target_node],
7897 msg = result.fail_msg
7899 logging.error("Instance migration failed, trying to revert"
7900 " disk status: %s", msg)
7901 self.feedback_fn("Migration failed, aborting")
7902 self._AbortMigration()
7903 self._RevertDiskStatus()
7904 raise errors.OpExecError("Could not migrate instance %s: %s" %
7905 (instance.name, msg))
7907 self.feedback_fn("* starting memory transfer")
7908 last_feedback = time.time()
7910 result = self.rpc.call_instance_get_migration_status(source_node,
7912 msg = result.fail_msg
7913 ms = result.payload # MigrationStatus instance
7914 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7915 logging.error("Instance migration failed, trying to revert"
7916 " disk status: %s", msg)
7917 self.feedback_fn("Migration failed, aborting")
7918 self._AbortMigration()
7919 self._RevertDiskStatus()
7920 raise errors.OpExecError("Could not migrate instance %s: %s" %
7921 (instance.name, msg))
7923 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7924 self.feedback_fn("* memory transfer complete")
7927 if (utils.TimeoutExpired(last_feedback,
7928 self._MIGRATION_FEEDBACK_INTERVAL) and
7929 ms.transferred_ram is not None):
7930 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7931 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7932 last_feedback = time.time()
7934 time.sleep(self._MIGRATION_POLL_INTERVAL)
7936 result = self.rpc.call_instance_finalize_migration_src(source_node,
7940 msg = result.fail_msg
7942 logging.error("Instance migration succeeded, but finalization failed"
7943 " on the source node: %s", msg)
7944 raise errors.OpExecError("Could not finalize instance migration: %s" %
7947 instance.primary_node = target_node
7949 # distribute new instance config to the other nodes
7950 self.cfg.Update(instance, self.feedback_fn)
7952 result = self.rpc.call_instance_finalize_migration_dst(target_node,
7956 msg = result.fail_msg
7958 logging.error("Instance migration succeeded, but finalization failed"
7959 " on the target node: %s", msg)
7960 raise errors.OpExecError("Could not finalize instance migration: %s" %
7963 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7964 self._EnsureSecondary(source_node)
7965 self._WaitUntilSync()
7966 self._GoStandalone()
7967 self._GoReconnect(False)
7968 self._WaitUntilSync()
7970 self.feedback_fn("* done")
7972 def _ExecFailover(self):
7973 """Failover an instance.
7975 The failover is done by shutting it down on its present node and
7976 starting it on the secondary.
7979 instance = self.instance
7980 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7982 source_node = instance.primary_node
7983 target_node = self.target_node
7985 if instance.admin_state == constants.ADMINST_UP:
7986 self.feedback_fn("* checking disk consistency between source and target")
7987 for dev in instance.disks:
7988 # for drbd, these are drbd over lvm
7989 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7990 if primary_node.offline:
7991 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7993 (primary_node.name, dev.iv_name, target_node))
7994 elif not self.ignore_consistency:
7995 raise errors.OpExecError("Disk %s is degraded on target node,"
7996 " aborting failover" % dev.iv_name)
7998 self.feedback_fn("* not checking disk consistency as instance is not"
8001 self.feedback_fn("* shutting down instance on source node")
8002 logging.info("Shutting down instance %s on node %s",
8003 instance.name, source_node)
8005 result = self.rpc.call_instance_shutdown(source_node, instance,
8006 self.shutdown_timeout)
8007 msg = result.fail_msg
8009 if self.ignore_consistency or primary_node.offline:
8010 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8011 " proceeding anyway; please make sure node"
8012 " %s is down; error details: %s",
8013 instance.name, source_node, source_node, msg)
8015 raise errors.OpExecError("Could not shutdown instance %s on"
8017 (instance.name, source_node, msg))
8019 self.feedback_fn("* deactivating the instance's disks on source node")
8020 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8021 raise errors.OpExecError("Can't shut down the instance's disks")
8023 instance.primary_node = target_node
8024 # distribute new instance config to the other nodes
8025 self.cfg.Update(instance, self.feedback_fn)
8027 # Only start the instance if it's marked as up
8028 if instance.admin_state == constants.ADMINST_UP:
8029 self.feedback_fn("* activating the instance's disks on target node %s" %
8031 logging.info("Starting instance %s on node %s",
8032 instance.name, target_node)
8034 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8035 ignore_secondaries=True)
8037 _ShutdownInstanceDisks(self.lu, instance)
8038 raise errors.OpExecError("Can't activate the instance's disks")
8040 self.feedback_fn("* starting the instance on the target node %s" %
8042 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8044 msg = result.fail_msg
8046 _ShutdownInstanceDisks(self.lu, instance)
8047 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8048 (instance.name, target_node, msg))
8050 def Exec(self, feedback_fn):
8051 """Perform the migration.
8054 self.feedback_fn = feedback_fn
8055 self.source_node = self.instance.primary_node
8057 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8058 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8059 self.target_node = self.instance.secondary_nodes[0]
8060 # Otherwise self.target_node has been populated either
8061 # directly, or through an iallocator.
8063 self.all_nodes = [self.source_node, self.target_node]
8064 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8065 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8068 feedback_fn("Failover instance %s" % self.instance.name)
8069 self._ExecFailover()
8071 feedback_fn("Migrating instance %s" % self.instance.name)
8074 return self._ExecCleanup()
8076 return self._ExecMigration()
8079 def _CreateBlockDev(lu, node, instance, device, force_create,
8081 """Create a tree of block devices on a given node.
8083 If this device type has to be created on secondaries, create it and
8086 If not, just recurse to children keeping the same 'force' value.
8088 @param lu: the lu on whose behalf we execute
8089 @param node: the node on which to create the device
8090 @type instance: L{objects.Instance}
8091 @param instance: the instance which owns the device
8092 @type device: L{objects.Disk}
8093 @param device: the device to create
8094 @type force_create: boolean
8095 @param force_create: whether to force creation of this device; this
8096 will be change to True whenever we find a device which has
8097 CreateOnSecondary() attribute
8098 @param info: the extra 'metadata' we should attach to the device
8099 (this will be represented as a LVM tag)
8100 @type force_open: boolean
8101 @param force_open: this parameter will be passes to the
8102 L{backend.BlockdevCreate} function where it specifies
8103 whether we run on primary or not, and it affects both
8104 the child assembly and the device own Open() execution
8107 if device.CreateOnSecondary():
8111 for child in device.children:
8112 _CreateBlockDev(lu, node, instance, child, force_create,
8115 if not force_create:
8118 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8121 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8122 """Create a single block device on a given node.
8124 This will not recurse over children of the device, so they must be
8127 @param lu: the lu on whose behalf we execute
8128 @param node: the node on which to create the device
8129 @type instance: L{objects.Instance}
8130 @param instance: the instance which owns the device
8131 @type device: L{objects.Disk}
8132 @param device: the device to create
8133 @param info: the extra 'metadata' we should attach to the device
8134 (this will be represented as a LVM tag)
8135 @type force_open: boolean
8136 @param force_open: this parameter will be passes to the
8137 L{backend.BlockdevCreate} function where it specifies
8138 whether we run on primary or not, and it affects both
8139 the child assembly and the device own Open() execution
8142 lu.cfg.SetDiskID(device, node)
8143 result = lu.rpc.call_blockdev_create(node, device, device.size,
8144 instance.name, force_open, info)
8145 result.Raise("Can't create block device %s on"
8146 " node %s for instance %s" % (device, node, instance.name))
8147 if device.physical_id is None:
8148 device.physical_id = result.payload
8151 def _GenerateUniqueNames(lu, exts):
8152 """Generate a suitable LV name.
8154 This will generate a logical volume name for the given instance.
8159 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8160 results.append("%s%s" % (new_id, val))
8164 def _ComputeLDParams(disk_template, disk_params):
8165 """Computes Logical Disk parameters from Disk Template parameters.
8167 @type disk_template: string
8168 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8169 @type disk_params: dict
8170 @param disk_params: disk template parameters; dict(template_name -> parameters
8172 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8173 contains the LD parameters of the node. The tree is flattened in-order.
8176 if disk_template not in constants.DISK_TEMPLATES:
8177 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8180 dt_params = disk_params[disk_template]
8181 if disk_template == constants.DT_DRBD8:
8183 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8184 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8185 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8186 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8187 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8188 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8192 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8195 result.append(drbd_params)
8199 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8202 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8204 result.append(data_params)
8208 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8211 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8213 result.append(meta_params)
8215 elif (disk_template == constants.DT_FILE or
8216 disk_template == constants.DT_SHARED_FILE):
8217 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8219 elif disk_template == constants.DT_PLAIN:
8221 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8224 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8226 result.append(params)
8228 elif disk_template == constants.DT_BLOCK:
8229 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8234 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8235 iv_name, p_minor, s_minor, drbd_params, data_params,
8237 """Generate a drbd8 device complete with its children.
8240 assert len(vgnames) == len(names) == 2
8241 port = lu.cfg.AllocatePort()
8242 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8244 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8245 logical_id=(vgnames[0], names[0]),
8247 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8248 logical_id=(vgnames[1], names[1]),
8250 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8251 logical_id=(primary, secondary, port,
8254 children=[dev_data, dev_meta],
8255 iv_name=iv_name, params=drbd_params)
8259 def _GenerateDiskTemplate(lu, template_name,
8260 instance_name, primary_node,
8261 secondary_nodes, disk_info,
8262 file_storage_dir, file_driver,
8263 base_index, feedback_fn, disk_params):
8264 """Generate the entire disk layout for a given template type.
8267 #TODO: compute space requirements
8269 vgname = lu.cfg.GetVGName()
8270 disk_count = len(disk_info)
8272 ld_params = _ComputeLDParams(template_name, disk_params)
8273 if template_name == constants.DT_DISKLESS:
8275 elif template_name == constants.DT_PLAIN:
8276 if len(secondary_nodes) != 0:
8277 raise errors.ProgrammerError("Wrong template configuration")
8279 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8280 for i in range(disk_count)])
8281 for idx, disk in enumerate(disk_info):
8282 disk_index = idx + base_index
8283 vg = disk.get(constants.IDISK_VG, vgname)
8284 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8285 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8286 size=disk[constants.IDISK_SIZE],
8287 logical_id=(vg, names[idx]),
8288 iv_name="disk/%d" % disk_index,
8289 mode=disk[constants.IDISK_MODE],
8290 params=ld_params[0])
8291 disks.append(disk_dev)
8292 elif template_name == constants.DT_DRBD8:
8293 drbd_params, data_params, meta_params = ld_params
8294 if len(secondary_nodes) != 1:
8295 raise errors.ProgrammerError("Wrong template configuration")
8296 remote_node = secondary_nodes[0]
8297 minors = lu.cfg.AllocateDRBDMinor(
8298 [primary_node, remote_node] * len(disk_info), instance_name)
8301 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8302 for i in range(disk_count)]):
8303 names.append(lv_prefix + "_data")
8304 names.append(lv_prefix + "_meta")
8305 for idx, disk in enumerate(disk_info):
8306 disk_index = idx + base_index
8307 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8308 data_vg = disk.get(constants.IDISK_VG, vgname)
8309 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8310 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8311 disk[constants.IDISK_SIZE],
8313 names[idx * 2:idx * 2 + 2],
8314 "disk/%d" % disk_index,
8315 minors[idx * 2], minors[idx * 2 + 1],
8316 drbd_params, data_params, meta_params)
8317 disk_dev.mode = disk[constants.IDISK_MODE]
8318 disks.append(disk_dev)
8319 elif template_name == constants.DT_FILE:
8320 if len(secondary_nodes) != 0:
8321 raise errors.ProgrammerError("Wrong template configuration")
8323 opcodes.RequireFileStorage()
8325 for idx, disk in enumerate(disk_info):
8326 disk_index = idx + base_index
8327 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8328 size=disk[constants.IDISK_SIZE],
8329 iv_name="disk/%d" % disk_index,
8330 logical_id=(file_driver,
8331 "%s/disk%d" % (file_storage_dir,
8333 mode=disk[constants.IDISK_MODE],
8334 params=ld_params[0])
8335 disks.append(disk_dev)
8336 elif template_name == constants.DT_SHARED_FILE:
8337 if len(secondary_nodes) != 0:
8338 raise errors.ProgrammerError("Wrong template configuration")
8340 opcodes.RequireSharedFileStorage()
8342 for idx, disk in enumerate(disk_info):
8343 disk_index = idx + base_index
8344 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8345 size=disk[constants.IDISK_SIZE],
8346 iv_name="disk/%d" % disk_index,
8347 logical_id=(file_driver,
8348 "%s/disk%d" % (file_storage_dir,
8350 mode=disk[constants.IDISK_MODE],
8351 params=ld_params[0])
8352 disks.append(disk_dev)
8353 elif template_name == constants.DT_BLOCK:
8354 if len(secondary_nodes) != 0:
8355 raise errors.ProgrammerError("Wrong template configuration")
8357 for idx, disk in enumerate(disk_info):
8358 disk_index = idx + base_index
8359 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8360 size=disk[constants.IDISK_SIZE],
8361 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8362 disk[constants.IDISK_ADOPT]),
8363 iv_name="disk/%d" % disk_index,
8364 mode=disk[constants.IDISK_MODE],
8365 params=ld_params[0])
8366 disks.append(disk_dev)
8369 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8373 def _GetInstanceInfoText(instance):
8374 """Compute that text that should be added to the disk's metadata.
8377 return "originstname+%s" % instance.name
8380 def _CalcEta(time_taken, written, total_size):
8381 """Calculates the ETA based on size written and total size.
8383 @param time_taken: The time taken so far
8384 @param written: amount written so far
8385 @param total_size: The total size of data to be written
8386 @return: The remaining time in seconds
8389 avg_time = time_taken / float(written)
8390 return (total_size - written) * avg_time
8393 def _WipeDisks(lu, instance):
8394 """Wipes instance disks.
8396 @type lu: L{LogicalUnit}
8397 @param lu: the logical unit on whose behalf we execute
8398 @type instance: L{objects.Instance}
8399 @param instance: the instance whose disks we should create
8400 @return: the success of the wipe
8403 node = instance.primary_node
8405 for device in instance.disks:
8406 lu.cfg.SetDiskID(device, node)
8408 logging.info("Pause sync of instance %s disks", instance.name)
8409 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8411 for idx, success in enumerate(result.payload):
8413 logging.warn("pause-sync of instance %s for disks %d failed",
8417 for idx, device in enumerate(instance.disks):
8418 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8419 # MAX_WIPE_CHUNK at max
8420 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8421 constants.MIN_WIPE_CHUNK_PERCENT)
8422 # we _must_ make this an int, otherwise rounding errors will
8424 wipe_chunk_size = int(wipe_chunk_size)
8426 lu.LogInfo("* Wiping disk %d", idx)
8427 logging.info("Wiping disk %d for instance %s, node %s using"
8428 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8433 start_time = time.time()
8435 while offset < size:
8436 wipe_size = min(wipe_chunk_size, size - offset)
8437 logging.debug("Wiping disk %d, offset %s, chunk %s",
8438 idx, offset, wipe_size)
8439 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8440 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8441 (idx, offset, wipe_size))
8444 if now - last_output >= 60:
8445 eta = _CalcEta(now - start_time, offset, size)
8446 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8447 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8450 logging.info("Resume sync of instance %s disks", instance.name)
8452 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8454 for idx, success in enumerate(result.payload):
8456 lu.LogWarning("Resume sync of disk %d failed, please have a"
8457 " look at the status and troubleshoot the issue", idx)
8458 logging.warn("resume-sync of instance %s for disks %d failed",
8462 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8463 """Create all disks for an instance.
8465 This abstracts away some work from AddInstance.
8467 @type lu: L{LogicalUnit}
8468 @param lu: the logical unit on whose behalf we execute
8469 @type instance: L{objects.Instance}
8470 @param instance: the instance whose disks we should create
8472 @param to_skip: list of indices to skip
8473 @type target_node: string
8474 @param target_node: if passed, overrides the target node for creation
8476 @return: the success of the creation
8479 info = _GetInstanceInfoText(instance)
8480 if target_node is None:
8481 pnode = instance.primary_node
8482 all_nodes = instance.all_nodes
8487 if instance.disk_template in constants.DTS_FILEBASED:
8488 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8489 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8491 result.Raise("Failed to create directory '%s' on"
8492 " node %s" % (file_storage_dir, pnode))
8494 # Note: this needs to be kept in sync with adding of disks in
8495 # LUInstanceSetParams
8496 for idx, device in enumerate(instance.disks):
8497 if to_skip and idx in to_skip:
8499 logging.info("Creating volume %s for instance %s",
8500 device.iv_name, instance.name)
8502 for node in all_nodes:
8503 f_create = node == pnode
8504 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8507 def _RemoveDisks(lu, instance, target_node=None):
8508 """Remove all disks for an instance.
8510 This abstracts away some work from `AddInstance()` and
8511 `RemoveInstance()`. Note that in case some of the devices couldn't
8512 be removed, the removal will continue with the other ones (compare
8513 with `_CreateDisks()`).
8515 @type lu: L{LogicalUnit}
8516 @param lu: the logical unit on whose behalf we execute
8517 @type instance: L{objects.Instance}
8518 @param instance: the instance whose disks we should remove
8519 @type target_node: string
8520 @param target_node: used to override the node on which to remove the disks
8522 @return: the success of the removal
8525 logging.info("Removing block devices for instance %s", instance.name)
8528 for device in instance.disks:
8530 edata = [(target_node, device)]
8532 edata = device.ComputeNodeTree(instance.primary_node)
8533 for node, disk in edata:
8534 lu.cfg.SetDiskID(disk, node)
8535 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8537 lu.LogWarning("Could not remove block device %s on node %s,"
8538 " continuing anyway: %s", device.iv_name, node, msg)
8541 # if this is a DRBD disk, return its port to the pool
8542 if device.dev_type in constants.LDS_DRBD:
8543 tcp_port = device.logical_id[2]
8544 lu.cfg.AddTcpUdpPort(tcp_port)
8546 if instance.disk_template == constants.DT_FILE:
8547 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8551 tgt = instance.primary_node
8552 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8554 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8555 file_storage_dir, instance.primary_node, result.fail_msg)
8561 def _ComputeDiskSizePerVG(disk_template, disks):
8562 """Compute disk size requirements in the volume group
8565 def _compute(disks, payload):
8566 """Universal algorithm.
8571 vgs[disk[constants.IDISK_VG]] = \
8572 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8576 # Required free disk space as a function of disk and swap space
8578 constants.DT_DISKLESS: {},
8579 constants.DT_PLAIN: _compute(disks, 0),
8580 # 128 MB are added for drbd metadata for each disk
8581 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8582 constants.DT_FILE: {},
8583 constants.DT_SHARED_FILE: {},
8586 if disk_template not in req_size_dict:
8587 raise errors.ProgrammerError("Disk template '%s' size requirement"
8588 " is unknown" % disk_template)
8590 return req_size_dict[disk_template]
8593 def _ComputeDiskSize(disk_template, disks):
8594 """Compute disk size requirements in the volume group
8597 # Required free disk space as a function of disk and swap space
8599 constants.DT_DISKLESS: None,
8600 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8601 # 128 MB are added for drbd metadata for each disk
8603 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8604 constants.DT_FILE: None,
8605 constants.DT_SHARED_FILE: 0,
8606 constants.DT_BLOCK: 0,
8609 if disk_template not in req_size_dict:
8610 raise errors.ProgrammerError("Disk template '%s' size requirement"
8611 " is unknown" % disk_template)
8613 return req_size_dict[disk_template]
8616 def _FilterVmNodes(lu, nodenames):
8617 """Filters out non-vm_capable nodes from a list.
8619 @type lu: L{LogicalUnit}
8620 @param lu: the logical unit for which we check
8621 @type nodenames: list
8622 @param nodenames: the list of nodes on which we should check
8624 @return: the list of vm-capable nodes
8627 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8628 return [name for name in nodenames if name not in vm_nodes]
8631 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8632 """Hypervisor parameter validation.
8634 This function abstract the hypervisor parameter validation to be
8635 used in both instance create and instance modify.
8637 @type lu: L{LogicalUnit}
8638 @param lu: the logical unit for which we check
8639 @type nodenames: list
8640 @param nodenames: the list of nodes on which we should check
8641 @type hvname: string
8642 @param hvname: the name of the hypervisor we should use
8643 @type hvparams: dict
8644 @param hvparams: the parameters which we need to check
8645 @raise errors.OpPrereqError: if the parameters are not valid
8648 nodenames = _FilterVmNodes(lu, nodenames)
8650 cluster = lu.cfg.GetClusterInfo()
8651 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8653 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8654 for node in nodenames:
8658 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8661 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8662 """OS parameters validation.
8664 @type lu: L{LogicalUnit}
8665 @param lu: the logical unit for which we check
8666 @type required: boolean
8667 @param required: whether the validation should fail if the OS is not
8669 @type nodenames: list
8670 @param nodenames: the list of nodes on which we should check
8671 @type osname: string
8672 @param osname: the name of the hypervisor we should use
8673 @type osparams: dict
8674 @param osparams: the parameters which we need to check
8675 @raise errors.OpPrereqError: if the parameters are not valid
8678 nodenames = _FilterVmNodes(lu, nodenames)
8679 result = lu.rpc.call_os_validate(nodenames, required, osname,
8680 [constants.OS_VALIDATE_PARAMETERS],
8682 for node, nres in result.items():
8683 # we don't check for offline cases since this should be run only
8684 # against the master node and/or an instance's nodes
8685 nres.Raise("OS Parameters validation failed on node %s" % node)
8686 if not nres.payload:
8687 lu.LogInfo("OS %s not found on node %s, validation skipped",
8691 class LUInstanceCreate(LogicalUnit):
8692 """Create an instance.
8695 HPATH = "instance-add"
8696 HTYPE = constants.HTYPE_INSTANCE
8699 def CheckArguments(self):
8703 # do not require name_check to ease forward/backward compatibility
8705 if self.op.no_install and self.op.start:
8706 self.LogInfo("No-installation mode selected, disabling startup")
8707 self.op.start = False
8708 # validate/normalize the instance name
8709 self.op.instance_name = \
8710 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8712 if self.op.ip_check and not self.op.name_check:
8713 # TODO: make the ip check more flexible and not depend on the name check
8714 raise errors.OpPrereqError("Cannot do IP address check without a name"
8715 " check", errors.ECODE_INVAL)
8717 # check nics' parameter names
8718 for nic in self.op.nics:
8719 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8721 # check disks. parameter names and consistent adopt/no-adopt strategy
8722 has_adopt = has_no_adopt = False
8723 for disk in self.op.disks:
8724 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8725 if constants.IDISK_ADOPT in disk:
8729 if has_adopt and has_no_adopt:
8730 raise errors.OpPrereqError("Either all disks are adopted or none is",
8733 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8734 raise errors.OpPrereqError("Disk adoption is not supported for the"
8735 " '%s' disk template" %
8736 self.op.disk_template,
8738 if self.op.iallocator is not None:
8739 raise errors.OpPrereqError("Disk adoption not allowed with an"
8740 " iallocator script", errors.ECODE_INVAL)
8741 if self.op.mode == constants.INSTANCE_IMPORT:
8742 raise errors.OpPrereqError("Disk adoption not allowed for"
8743 " instance import", errors.ECODE_INVAL)
8745 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8746 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8747 " but no 'adopt' parameter given" %
8748 self.op.disk_template,
8751 self.adopt_disks = has_adopt
8753 # instance name verification
8754 if self.op.name_check:
8755 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8756 self.op.instance_name = self.hostname1.name
8757 # used in CheckPrereq for ip ping check
8758 self.check_ip = self.hostname1.ip
8760 self.check_ip = None
8762 # file storage checks
8763 if (self.op.file_driver and
8764 not self.op.file_driver in constants.FILE_DRIVER):
8765 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8766 self.op.file_driver, errors.ECODE_INVAL)
8768 if self.op.disk_template == constants.DT_FILE:
8769 opcodes.RequireFileStorage()
8770 elif self.op.disk_template == constants.DT_SHARED_FILE:
8771 opcodes.RequireSharedFileStorage()
8773 ### Node/iallocator related checks
8774 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8776 if self.op.pnode is not None:
8777 if self.op.disk_template in constants.DTS_INT_MIRROR:
8778 if self.op.snode is None:
8779 raise errors.OpPrereqError("The networked disk templates need"
8780 " a mirror node", errors.ECODE_INVAL)
8782 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8784 self.op.snode = None
8786 self._cds = _GetClusterDomainSecret()
8788 if self.op.mode == constants.INSTANCE_IMPORT:
8789 # On import force_variant must be True, because if we forced it at
8790 # initial install, our only chance when importing it back is that it
8792 self.op.force_variant = True
8794 if self.op.no_install:
8795 self.LogInfo("No-installation mode has no effect during import")
8797 elif self.op.mode == constants.INSTANCE_CREATE:
8798 if self.op.os_type is None:
8799 raise errors.OpPrereqError("No guest OS specified",
8801 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8802 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8803 " installation" % self.op.os_type,
8805 if self.op.disk_template is None:
8806 raise errors.OpPrereqError("No disk template specified",
8809 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8810 # Check handshake to ensure both clusters have the same domain secret
8811 src_handshake = self.op.source_handshake
8812 if not src_handshake:
8813 raise errors.OpPrereqError("Missing source handshake",
8816 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8819 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8822 # Load and check source CA
8823 self.source_x509_ca_pem = self.op.source_x509_ca
8824 if not self.source_x509_ca_pem:
8825 raise errors.OpPrereqError("Missing source X509 CA",
8829 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8831 except OpenSSL.crypto.Error, err:
8832 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8833 (err, ), errors.ECODE_INVAL)
8835 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8836 if errcode is not None:
8837 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8840 self.source_x509_ca = cert
8842 src_instance_name = self.op.source_instance_name
8843 if not src_instance_name:
8844 raise errors.OpPrereqError("Missing source instance name",
8847 self.source_instance_name = \
8848 netutils.GetHostname(name=src_instance_name).name
8851 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8852 self.op.mode, errors.ECODE_INVAL)
8854 def ExpandNames(self):
8855 """ExpandNames for CreateInstance.
8857 Figure out the right locks for instance creation.
8860 self.needed_locks = {}
8862 instance_name = self.op.instance_name
8863 # this is just a preventive check, but someone might still add this
8864 # instance in the meantime, and creation will fail at lock-add time
8865 if instance_name in self.cfg.GetInstanceList():
8866 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8867 instance_name, errors.ECODE_EXISTS)
8869 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8871 if self.op.iallocator:
8872 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8873 # specifying a group on instance creation and then selecting nodes from
8875 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8876 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8878 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8879 nodelist = [self.op.pnode]
8880 if self.op.snode is not None:
8881 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8882 nodelist.append(self.op.snode)
8883 self.needed_locks[locking.LEVEL_NODE] = nodelist
8884 # Lock resources of instance's primary and secondary nodes (copy to
8885 # prevent accidential modification)
8886 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8888 # in case of import lock the source node too
8889 if self.op.mode == constants.INSTANCE_IMPORT:
8890 src_node = self.op.src_node
8891 src_path = self.op.src_path
8893 if src_path is None:
8894 self.op.src_path = src_path = self.op.instance_name
8896 if src_node is None:
8897 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8898 self.op.src_node = None
8899 if os.path.isabs(src_path):
8900 raise errors.OpPrereqError("Importing an instance from a path"
8901 " requires a source node option",
8904 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8905 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8906 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8907 if not os.path.isabs(src_path):
8908 self.op.src_path = src_path = \
8909 utils.PathJoin(constants.EXPORT_DIR, src_path)
8911 def _RunAllocator(self):
8912 """Run the allocator based on input opcode.
8915 nics = [n.ToDict() for n in self.nics]
8916 ial = IAllocator(self.cfg, self.rpc,
8917 mode=constants.IALLOCATOR_MODE_ALLOC,
8918 name=self.op.instance_name,
8919 disk_template=self.op.disk_template,
8922 vcpus=self.be_full[constants.BE_VCPUS],
8923 memory=self.be_full[constants.BE_MAXMEM],
8926 hypervisor=self.op.hypervisor,
8929 ial.Run(self.op.iallocator)
8932 raise errors.OpPrereqError("Can't compute nodes using"
8933 " iallocator '%s': %s" %
8934 (self.op.iallocator, ial.info),
8936 if len(ial.result) != ial.required_nodes:
8937 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8938 " of nodes (%s), required %s" %
8939 (self.op.iallocator, len(ial.result),
8940 ial.required_nodes), errors.ECODE_FAULT)
8941 self.op.pnode = ial.result[0]
8942 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8943 self.op.instance_name, self.op.iallocator,
8944 utils.CommaJoin(ial.result))
8945 if ial.required_nodes == 2:
8946 self.op.snode = ial.result[1]
8948 def BuildHooksEnv(self):
8951 This runs on master, primary and secondary nodes of the instance.
8955 "ADD_MODE": self.op.mode,
8957 if self.op.mode == constants.INSTANCE_IMPORT:
8958 env["SRC_NODE"] = self.op.src_node
8959 env["SRC_PATH"] = self.op.src_path
8960 env["SRC_IMAGES"] = self.src_images
8962 env.update(_BuildInstanceHookEnv(
8963 name=self.op.instance_name,
8964 primary_node=self.op.pnode,
8965 secondary_nodes=self.secondaries,
8966 status=self.op.start,
8967 os_type=self.op.os_type,
8968 minmem=self.be_full[constants.BE_MINMEM],
8969 maxmem=self.be_full[constants.BE_MAXMEM],
8970 vcpus=self.be_full[constants.BE_VCPUS],
8971 nics=_NICListToTuple(self, self.nics),
8972 disk_template=self.op.disk_template,
8973 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8974 for d in self.disks],
8977 hypervisor_name=self.op.hypervisor,
8983 def BuildHooksNodes(self):
8984 """Build hooks nodes.
8987 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8990 def _ReadExportInfo(self):
8991 """Reads the export information from disk.
8993 It will override the opcode source node and path with the actual
8994 information, if these two were not specified before.
8996 @return: the export information
8999 assert self.op.mode == constants.INSTANCE_IMPORT
9001 src_node = self.op.src_node
9002 src_path = self.op.src_path
9004 if src_node is None:
9005 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9006 exp_list = self.rpc.call_export_list(locked_nodes)
9008 for node in exp_list:
9009 if exp_list[node].fail_msg:
9011 if src_path in exp_list[node].payload:
9013 self.op.src_node = src_node = node
9014 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9018 raise errors.OpPrereqError("No export found for relative path %s" %
9019 src_path, errors.ECODE_INVAL)
9021 _CheckNodeOnline(self, src_node)
9022 result = self.rpc.call_export_info(src_node, src_path)
9023 result.Raise("No export or invalid export found in dir %s" % src_path)
9025 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9026 if not export_info.has_section(constants.INISECT_EXP):
9027 raise errors.ProgrammerError("Corrupted export config",
9028 errors.ECODE_ENVIRON)
9030 ei_version = export_info.get(constants.INISECT_EXP, "version")
9031 if (int(ei_version) != constants.EXPORT_VERSION):
9032 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9033 (ei_version, constants.EXPORT_VERSION),
9034 errors.ECODE_ENVIRON)
9037 def _ReadExportParams(self, einfo):
9038 """Use export parameters as defaults.
9040 In case the opcode doesn't specify (as in override) some instance
9041 parameters, then try to use them from the export information, if
9045 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9047 if self.op.disk_template is None:
9048 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9049 self.op.disk_template = einfo.get(constants.INISECT_INS,
9051 if self.op.disk_template not in constants.DISK_TEMPLATES:
9052 raise errors.OpPrereqError("Disk template specified in configuration"
9053 " file is not one of the allowed values:"
9054 " %s" % " ".join(constants.DISK_TEMPLATES))
9056 raise errors.OpPrereqError("No disk template specified and the export"
9057 " is missing the disk_template information",
9060 if not self.op.disks:
9062 # TODO: import the disk iv_name too
9063 for idx in range(constants.MAX_DISKS):
9064 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9065 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9066 disks.append({constants.IDISK_SIZE: disk_sz})
9067 self.op.disks = disks
9068 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9069 raise errors.OpPrereqError("No disk info specified and the export"
9070 " is missing the disk information",
9073 if not self.op.nics:
9075 for idx in range(constants.MAX_NICS):
9076 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9078 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9079 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9086 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9087 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9089 if (self.op.hypervisor is None and
9090 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9091 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9093 if einfo.has_section(constants.INISECT_HYP):
9094 # use the export parameters but do not override the ones
9095 # specified by the user
9096 for name, value in einfo.items(constants.INISECT_HYP):
9097 if name not in self.op.hvparams:
9098 self.op.hvparams[name] = value
9100 if einfo.has_section(constants.INISECT_BEP):
9101 # use the parameters, without overriding
9102 for name, value in einfo.items(constants.INISECT_BEP):
9103 if name not in self.op.beparams:
9104 self.op.beparams[name] = value
9105 # Compatibility for the old "memory" be param
9106 if name == constants.BE_MEMORY:
9107 if constants.BE_MAXMEM not in self.op.beparams:
9108 self.op.beparams[constants.BE_MAXMEM] = value
9109 if constants.BE_MINMEM not in self.op.beparams:
9110 self.op.beparams[constants.BE_MINMEM] = value
9112 # try to read the parameters old style, from the main section
9113 for name in constants.BES_PARAMETERS:
9114 if (name not in self.op.beparams and
9115 einfo.has_option(constants.INISECT_INS, name)):
9116 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9118 if einfo.has_section(constants.INISECT_OSP):
9119 # use the parameters, without overriding
9120 for name, value in einfo.items(constants.INISECT_OSP):
9121 if name not in self.op.osparams:
9122 self.op.osparams[name] = value
9124 def _RevertToDefaults(self, cluster):
9125 """Revert the instance parameters to the default values.
9129 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9130 for name in self.op.hvparams.keys():
9131 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9132 del self.op.hvparams[name]
9134 be_defs = cluster.SimpleFillBE({})
9135 for name in self.op.beparams.keys():
9136 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9137 del self.op.beparams[name]
9139 nic_defs = cluster.SimpleFillNIC({})
9140 for nic in self.op.nics:
9141 for name in constants.NICS_PARAMETERS:
9142 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9145 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9146 for name in self.op.osparams.keys():
9147 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9148 del self.op.osparams[name]
9150 def _CalculateFileStorageDir(self):
9151 """Calculate final instance file storage dir.
9154 # file storage dir calculation/check
9155 self.instance_file_storage_dir = None
9156 if self.op.disk_template in constants.DTS_FILEBASED:
9157 # build the full file storage dir path
9160 if self.op.disk_template == constants.DT_SHARED_FILE:
9161 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9163 get_fsd_fn = self.cfg.GetFileStorageDir
9165 cfg_storagedir = get_fsd_fn()
9166 if not cfg_storagedir:
9167 raise errors.OpPrereqError("Cluster file storage dir not defined")
9168 joinargs.append(cfg_storagedir)
9170 if self.op.file_storage_dir is not None:
9171 joinargs.append(self.op.file_storage_dir)
9173 joinargs.append(self.op.instance_name)
9175 # pylint: disable=W0142
9176 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9178 def CheckPrereq(self):
9179 """Check prerequisites.
9182 self._CalculateFileStorageDir()
9184 if self.op.mode == constants.INSTANCE_IMPORT:
9185 export_info = self._ReadExportInfo()
9186 self._ReadExportParams(export_info)
9188 if (not self.cfg.GetVGName() and
9189 self.op.disk_template not in constants.DTS_NOT_LVM):
9190 raise errors.OpPrereqError("Cluster does not support lvm-based"
9191 " instances", errors.ECODE_STATE)
9193 if (self.op.hypervisor is None or
9194 self.op.hypervisor == constants.VALUE_AUTO):
9195 self.op.hypervisor = self.cfg.GetHypervisorType()
9197 cluster = self.cfg.GetClusterInfo()
9198 enabled_hvs = cluster.enabled_hypervisors
9199 if self.op.hypervisor not in enabled_hvs:
9200 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9201 " cluster (%s)" % (self.op.hypervisor,
9202 ",".join(enabled_hvs)),
9205 # Check tag validity
9206 for tag in self.op.tags:
9207 objects.TaggableObject.ValidateTag(tag)
9209 # check hypervisor parameter syntax (locally)
9210 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9211 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9213 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9214 hv_type.CheckParameterSyntax(filled_hvp)
9215 self.hv_full = filled_hvp
9216 # check that we don't specify global parameters on an instance
9217 _CheckGlobalHvParams(self.op.hvparams)
9219 # fill and remember the beparams dict
9220 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9221 for param, value in self.op.beparams.iteritems():
9222 if value == constants.VALUE_AUTO:
9223 self.op.beparams[param] = default_beparams[param]
9224 objects.UpgradeBeParams(self.op.beparams)
9225 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9226 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9228 # build os parameters
9229 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9231 # now that hvp/bep are in final format, let's reset to defaults,
9233 if self.op.identify_defaults:
9234 self._RevertToDefaults(cluster)
9238 for idx, nic in enumerate(self.op.nics):
9239 nic_mode_req = nic.get(constants.INIC_MODE, None)
9240 nic_mode = nic_mode_req
9241 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9242 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9244 # in routed mode, for the first nic, the default ip is 'auto'
9245 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9246 default_ip_mode = constants.VALUE_AUTO
9248 default_ip_mode = constants.VALUE_NONE
9250 # ip validity checks
9251 ip = nic.get(constants.INIC_IP, default_ip_mode)
9252 if ip is None or ip.lower() == constants.VALUE_NONE:
9254 elif ip.lower() == constants.VALUE_AUTO:
9255 if not self.op.name_check:
9256 raise errors.OpPrereqError("IP address set to auto but name checks"
9257 " have been skipped",
9259 nic_ip = self.hostname1.ip
9261 if not netutils.IPAddress.IsValid(ip):
9262 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9266 # TODO: check the ip address for uniqueness
9267 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9268 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9271 # MAC address verification
9272 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9273 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9274 mac = utils.NormalizeAndValidateMac(mac)
9277 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9278 except errors.ReservationError:
9279 raise errors.OpPrereqError("MAC address %s already in use"
9280 " in cluster" % mac,
9281 errors.ECODE_NOTUNIQUE)
9283 # Build nic parameters
9284 link = nic.get(constants.INIC_LINK, None)
9285 if link == constants.VALUE_AUTO:
9286 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9289 nicparams[constants.NIC_MODE] = nic_mode
9291 nicparams[constants.NIC_LINK] = link
9293 check_params = cluster.SimpleFillNIC(nicparams)
9294 objects.NIC.CheckParameterSyntax(check_params)
9295 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9297 # disk checks/pre-build
9298 default_vg = self.cfg.GetVGName()
9300 for disk in self.op.disks:
9301 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9302 if mode not in constants.DISK_ACCESS_SET:
9303 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9304 mode, errors.ECODE_INVAL)
9305 size = disk.get(constants.IDISK_SIZE, None)
9307 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9310 except (TypeError, ValueError):
9311 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9314 data_vg = disk.get(constants.IDISK_VG, default_vg)
9316 constants.IDISK_SIZE: size,
9317 constants.IDISK_MODE: mode,
9318 constants.IDISK_VG: data_vg,
9320 if constants.IDISK_METAVG in disk:
9321 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9322 if constants.IDISK_ADOPT in disk:
9323 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9324 self.disks.append(new_disk)
9326 if self.op.mode == constants.INSTANCE_IMPORT:
9328 for idx in range(len(self.disks)):
9329 option = "disk%d_dump" % idx
9330 if export_info.has_option(constants.INISECT_INS, option):
9331 # FIXME: are the old os-es, disk sizes, etc. useful?
9332 export_name = export_info.get(constants.INISECT_INS, option)
9333 image = utils.PathJoin(self.op.src_path, export_name)
9334 disk_images.append(image)
9336 disk_images.append(False)
9338 self.src_images = disk_images
9340 old_name = export_info.get(constants.INISECT_INS, "name")
9341 if self.op.instance_name == old_name:
9342 for idx, nic in enumerate(self.nics):
9343 if nic.mac == constants.VALUE_AUTO:
9344 nic_mac_ini = "nic%d_mac" % idx
9345 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9347 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9349 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9350 if self.op.ip_check:
9351 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9352 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9353 (self.check_ip, self.op.instance_name),
9354 errors.ECODE_NOTUNIQUE)
9356 #### mac address generation
9357 # By generating here the mac address both the allocator and the hooks get
9358 # the real final mac address rather than the 'auto' or 'generate' value.
9359 # There is a race condition between the generation and the instance object
9360 # creation, which means that we know the mac is valid now, but we're not
9361 # sure it will be when we actually add the instance. If things go bad
9362 # adding the instance will abort because of a duplicate mac, and the
9363 # creation job will fail.
9364 for nic in self.nics:
9365 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9366 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9370 if self.op.iallocator is not None:
9371 self._RunAllocator()
9373 # Release all unneeded node locks
9374 _ReleaseLocks(self, locking.LEVEL_NODE,
9375 keep=filter(None, [self.op.pnode, self.op.snode,
9378 #### node related checks
9380 # check primary node
9381 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9382 assert self.pnode is not None, \
9383 "Cannot retrieve locked node %s" % self.op.pnode
9385 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9386 pnode.name, errors.ECODE_STATE)
9388 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9389 pnode.name, errors.ECODE_STATE)
9390 if not pnode.vm_capable:
9391 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9392 " '%s'" % pnode.name, errors.ECODE_STATE)
9394 self.secondaries = []
9396 # mirror node verification
9397 if self.op.disk_template in constants.DTS_INT_MIRROR:
9398 if self.op.snode == pnode.name:
9399 raise errors.OpPrereqError("The secondary node cannot be the"
9400 " primary node", errors.ECODE_INVAL)
9401 _CheckNodeOnline(self, self.op.snode)
9402 _CheckNodeNotDrained(self, self.op.snode)
9403 _CheckNodeVmCapable(self, self.op.snode)
9404 self.secondaries.append(self.op.snode)
9406 snode = self.cfg.GetNodeInfo(self.op.snode)
9407 if pnode.group != snode.group:
9408 self.LogWarning("The primary and secondary nodes are in two"
9409 " different node groups; the disk parameters"
9410 " from the first disk's node group will be"
9413 nodenames = [pnode.name] + self.secondaries
9415 # disk parameters (not customizable at instance or node level)
9416 # just use the primary node parameters, ignoring the secondary.
9417 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9419 if not self.adopt_disks:
9420 # Check lv size requirements, if not adopting
9421 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9422 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9424 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9425 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9426 disk[constants.IDISK_ADOPT])
9427 for disk in self.disks])
9428 if len(all_lvs) != len(self.disks):
9429 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9431 for lv_name in all_lvs:
9433 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9434 # to ReserveLV uses the same syntax
9435 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9436 except errors.ReservationError:
9437 raise errors.OpPrereqError("LV named %s used by another instance" %
9438 lv_name, errors.ECODE_NOTUNIQUE)
9440 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9441 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9443 node_lvs = self.rpc.call_lv_list([pnode.name],
9444 vg_names.payload.keys())[pnode.name]
9445 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9446 node_lvs = node_lvs.payload
9448 delta = all_lvs.difference(node_lvs.keys())
9450 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9451 utils.CommaJoin(delta),
9453 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9455 raise errors.OpPrereqError("Online logical volumes found, cannot"
9456 " adopt: %s" % utils.CommaJoin(online_lvs),
9458 # update the size of disk based on what is found
9459 for dsk in self.disks:
9460 dsk[constants.IDISK_SIZE] = \
9461 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9462 dsk[constants.IDISK_ADOPT])][0]))
9464 elif self.op.disk_template == constants.DT_BLOCK:
9465 # Normalize and de-duplicate device paths
9466 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9467 for disk in self.disks])
9468 if len(all_disks) != len(self.disks):
9469 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9471 baddisks = [d for d in all_disks
9472 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9474 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9475 " cannot be adopted" %
9476 (", ".join(baddisks),
9477 constants.ADOPTABLE_BLOCKDEV_ROOT),
9480 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9481 list(all_disks))[pnode.name]
9482 node_disks.Raise("Cannot get block device information from node %s" %
9484 node_disks = node_disks.payload
9485 delta = all_disks.difference(node_disks.keys())
9487 raise errors.OpPrereqError("Missing block device(s): %s" %
9488 utils.CommaJoin(delta),
9490 for dsk in self.disks:
9491 dsk[constants.IDISK_SIZE] = \
9492 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9494 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9496 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9497 # check OS parameters (remotely)
9498 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9500 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9502 # memory check on primary node
9503 #TODO(dynmem): use MINMEM for checking
9505 _CheckNodeFreeMemory(self, self.pnode.name,
9506 "creating instance %s" % self.op.instance_name,
9507 self.be_full[constants.BE_MAXMEM],
9510 self.dry_run_result = list(nodenames)
9512 def Exec(self, feedback_fn):
9513 """Create and add the instance to the cluster.
9516 instance = self.op.instance_name
9517 pnode_name = self.pnode.name
9519 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9520 self.owned_locks(locking.LEVEL_NODE)), \
9521 "Node locks differ from node resource locks"
9523 ht_kind = self.op.hypervisor
9524 if ht_kind in constants.HTS_REQ_PORT:
9525 network_port = self.cfg.AllocatePort()
9529 disks = _GenerateDiskTemplate(self,
9530 self.op.disk_template,
9531 instance, pnode_name,
9534 self.instance_file_storage_dir,
9535 self.op.file_driver,
9540 iobj = objects.Instance(name=instance, os=self.op.os_type,
9541 primary_node=pnode_name,
9542 nics=self.nics, disks=disks,
9543 disk_template=self.op.disk_template,
9544 admin_state=constants.ADMINST_DOWN,
9545 network_port=network_port,
9546 beparams=self.op.beparams,
9547 hvparams=self.op.hvparams,
9548 hypervisor=self.op.hypervisor,
9549 osparams=self.op.osparams,
9553 for tag in self.op.tags:
9556 if self.adopt_disks:
9557 if self.op.disk_template == constants.DT_PLAIN:
9558 # rename LVs to the newly-generated names; we need to construct
9559 # 'fake' LV disks with the old data, plus the new unique_id
9560 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9562 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9563 rename_to.append(t_dsk.logical_id)
9564 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9565 self.cfg.SetDiskID(t_dsk, pnode_name)
9566 result = self.rpc.call_blockdev_rename(pnode_name,
9567 zip(tmp_disks, rename_to))
9568 result.Raise("Failed to rename adoped LVs")
9570 feedback_fn("* creating instance disks...")
9572 _CreateDisks(self, iobj)
9573 except errors.OpExecError:
9574 self.LogWarning("Device creation failed, reverting...")
9576 _RemoveDisks(self, iobj)
9578 self.cfg.ReleaseDRBDMinors(instance)
9581 feedback_fn("adding instance %s to cluster config" % instance)
9583 self.cfg.AddInstance(iobj, self.proc.GetECId())
9585 # Declare that we don't want to remove the instance lock anymore, as we've
9586 # added the instance to the config
9587 del self.remove_locks[locking.LEVEL_INSTANCE]
9589 if self.op.mode == constants.INSTANCE_IMPORT:
9590 # Release unused nodes
9591 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9594 _ReleaseLocks(self, locking.LEVEL_NODE)
9597 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9598 feedback_fn("* wiping instance disks...")
9600 _WipeDisks(self, iobj)
9601 except errors.OpExecError, err:
9602 logging.exception("Wiping disks failed")
9603 self.LogWarning("Wiping instance disks failed (%s)", err)
9607 # Something is already wrong with the disks, don't do anything else
9609 elif self.op.wait_for_sync:
9610 disk_abort = not _WaitForSync(self, iobj)
9611 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9612 # make sure the disks are not degraded (still sync-ing is ok)
9613 feedback_fn("* checking mirrors status")
9614 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9619 _RemoveDisks(self, iobj)
9620 self.cfg.RemoveInstance(iobj.name)
9621 # Make sure the instance lock gets removed
9622 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9623 raise errors.OpExecError("There are some degraded disks for"
9626 # Release all node resource locks
9627 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9629 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9630 if self.op.mode == constants.INSTANCE_CREATE:
9631 if not self.op.no_install:
9632 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9633 not self.op.wait_for_sync)
9635 feedback_fn("* pausing disk sync to install instance OS")
9636 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9638 for idx, success in enumerate(result.payload):
9640 logging.warn("pause-sync of instance %s for disk %d failed",
9643 feedback_fn("* running the instance OS create scripts...")
9644 # FIXME: pass debug option from opcode to backend
9646 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9647 self.op.debug_level)
9649 feedback_fn("* resuming disk sync")
9650 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9652 for idx, success in enumerate(result.payload):
9654 logging.warn("resume-sync of instance %s for disk %d failed",
9657 os_add_result.Raise("Could not add os for instance %s"
9658 " on node %s" % (instance, pnode_name))
9660 elif self.op.mode == constants.INSTANCE_IMPORT:
9661 feedback_fn("* running the instance OS import scripts...")
9665 for idx, image in enumerate(self.src_images):
9669 # FIXME: pass debug option from opcode to backend
9670 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9671 constants.IEIO_FILE, (image, ),
9672 constants.IEIO_SCRIPT,
9673 (iobj.disks[idx], idx),
9675 transfers.append(dt)
9678 masterd.instance.TransferInstanceData(self, feedback_fn,
9679 self.op.src_node, pnode_name,
9680 self.pnode.secondary_ip,
9682 if not compat.all(import_result):
9683 self.LogWarning("Some disks for instance %s on node %s were not"
9684 " imported successfully" % (instance, pnode_name))
9686 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9687 feedback_fn("* preparing remote import...")
9688 # The source cluster will stop the instance before attempting to make a
9689 # connection. In some cases stopping an instance can take a long time,
9690 # hence the shutdown timeout is added to the connection timeout.
9691 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9692 self.op.source_shutdown_timeout)
9693 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9695 assert iobj.primary_node == self.pnode.name
9697 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9698 self.source_x509_ca,
9699 self._cds, timeouts)
9700 if not compat.all(disk_results):
9701 # TODO: Should the instance still be started, even if some disks
9702 # failed to import (valid for local imports, too)?
9703 self.LogWarning("Some disks for instance %s on node %s were not"
9704 " imported successfully" % (instance, pnode_name))
9706 # Run rename script on newly imported instance
9707 assert iobj.name == instance
9708 feedback_fn("Running rename script for %s" % instance)
9709 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9710 self.source_instance_name,
9711 self.op.debug_level)
9713 self.LogWarning("Failed to run rename script for %s on node"
9714 " %s: %s" % (instance, pnode_name, result.fail_msg))
9717 # also checked in the prereq part
9718 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9721 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9724 iobj.admin_state = constants.ADMINST_UP
9725 self.cfg.Update(iobj, feedback_fn)
9726 logging.info("Starting instance %s on node %s", instance, pnode_name)
9727 feedback_fn("* starting instance...")
9728 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9730 result.Raise("Could not start instance")
9732 return list(iobj.all_nodes)
9735 class LUInstanceConsole(NoHooksLU):
9736 """Connect to an instance's console.
9738 This is somewhat special in that it returns the command line that
9739 you need to run on the master node in order to connect to the
9745 def ExpandNames(self):
9746 self.share_locks = _ShareAll()
9747 self._ExpandAndLockInstance()
9749 def CheckPrereq(self):
9750 """Check prerequisites.
9752 This checks that the instance is in the cluster.
9755 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9756 assert self.instance is not None, \
9757 "Cannot retrieve locked instance %s" % self.op.instance_name
9758 _CheckNodeOnline(self, self.instance.primary_node)
9760 def Exec(self, feedback_fn):
9761 """Connect to the console of an instance
9764 instance = self.instance
9765 node = instance.primary_node
9767 node_insts = self.rpc.call_instance_list([node],
9768 [instance.hypervisor])[node]
9769 node_insts.Raise("Can't get node information from %s" % node)
9771 if instance.name not in node_insts.payload:
9772 if instance.admin_state == constants.ADMINST_UP:
9773 state = constants.INSTST_ERRORDOWN
9774 elif instance.admin_state == constants.ADMINST_DOWN:
9775 state = constants.INSTST_ADMINDOWN
9777 state = constants.INSTST_ADMINOFFLINE
9778 raise errors.OpExecError("Instance %s is not running (state %s)" %
9779 (instance.name, state))
9781 logging.debug("Connecting to console of %s on %s", instance.name, node)
9783 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9786 def _GetInstanceConsole(cluster, instance):
9787 """Returns console information for an instance.
9789 @type cluster: L{objects.Cluster}
9790 @type instance: L{objects.Instance}
9794 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9795 # beparams and hvparams are passed separately, to avoid editing the
9796 # instance and then saving the defaults in the instance itself.
9797 hvparams = cluster.FillHV(instance)
9798 beparams = cluster.FillBE(instance)
9799 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9801 assert console.instance == instance.name
9802 assert console.Validate()
9804 return console.ToDict()
9807 class LUInstanceReplaceDisks(LogicalUnit):
9808 """Replace the disks of an instance.
9811 HPATH = "mirrors-replace"
9812 HTYPE = constants.HTYPE_INSTANCE
9815 def CheckArguments(self):
9816 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9819 def ExpandNames(self):
9820 self._ExpandAndLockInstance()
9822 assert locking.LEVEL_NODE not in self.needed_locks
9823 assert locking.LEVEL_NODE_RES not in self.needed_locks
9824 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9826 assert self.op.iallocator is None or self.op.remote_node is None, \
9827 "Conflicting options"
9829 if self.op.remote_node is not None:
9830 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9832 # Warning: do not remove the locking of the new secondary here
9833 # unless DRBD8.AddChildren is changed to work in parallel;
9834 # currently it doesn't since parallel invocations of
9835 # FindUnusedMinor will conflict
9836 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9837 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9839 self.needed_locks[locking.LEVEL_NODE] = []
9840 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9842 if self.op.iallocator is not None:
9843 # iallocator will select a new node in the same group
9844 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9846 self.needed_locks[locking.LEVEL_NODE_RES] = []
9848 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9849 self.op.iallocator, self.op.remote_node,
9850 self.op.disks, False, self.op.early_release)
9852 self.tasklets = [self.replacer]
9854 def DeclareLocks(self, level):
9855 if level == locking.LEVEL_NODEGROUP:
9856 assert self.op.remote_node is None
9857 assert self.op.iallocator is not None
9858 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9860 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9861 # Lock all groups used by instance optimistically; this requires going
9862 # via the node before it's locked, requiring verification later on
9863 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9864 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9866 elif level == locking.LEVEL_NODE:
9867 if self.op.iallocator is not None:
9868 assert self.op.remote_node is None
9869 assert not self.needed_locks[locking.LEVEL_NODE]
9871 # Lock member nodes of all locked groups
9872 self.needed_locks[locking.LEVEL_NODE] = [node_name
9873 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9874 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9876 self._LockInstancesNodes()
9877 elif level == locking.LEVEL_NODE_RES:
9879 self.needed_locks[locking.LEVEL_NODE_RES] = \
9880 self.needed_locks[locking.LEVEL_NODE]
9882 def BuildHooksEnv(self):
9885 This runs on the master, the primary and all the secondaries.
9888 instance = self.replacer.instance
9890 "MODE": self.op.mode,
9891 "NEW_SECONDARY": self.op.remote_node,
9892 "OLD_SECONDARY": instance.secondary_nodes[0],
9894 env.update(_BuildInstanceHookEnvByObject(self, instance))
9897 def BuildHooksNodes(self):
9898 """Build hooks nodes.
9901 instance = self.replacer.instance
9903 self.cfg.GetMasterNode(),
9904 instance.primary_node,
9906 if self.op.remote_node is not None:
9907 nl.append(self.op.remote_node)
9910 def CheckPrereq(self):
9911 """Check prerequisites.
9914 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9915 self.op.iallocator is None)
9917 # Verify if node group locks are still correct
9918 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9920 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9922 return LogicalUnit.CheckPrereq(self)
9925 class TLReplaceDisks(Tasklet):
9926 """Replaces disks for an instance.
9928 Note: Locking is not within the scope of this class.
9931 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9932 disks, delay_iallocator, early_release):
9933 """Initializes this class.
9936 Tasklet.__init__(self, lu)
9939 self.instance_name = instance_name
9941 self.iallocator_name = iallocator_name
9942 self.remote_node = remote_node
9944 self.delay_iallocator = delay_iallocator
9945 self.early_release = early_release
9948 self.instance = None
9949 self.new_node = None
9950 self.target_node = None
9951 self.other_node = None
9952 self.remote_node_info = None
9953 self.node_secondary_ip = None
9956 def CheckArguments(mode, remote_node, iallocator):
9957 """Helper function for users of this class.
9960 # check for valid parameter combination
9961 if mode == constants.REPLACE_DISK_CHG:
9962 if remote_node is None and iallocator is None:
9963 raise errors.OpPrereqError("When changing the secondary either an"
9964 " iallocator script must be used or the"
9965 " new node given", errors.ECODE_INVAL)
9967 if remote_node is not None and iallocator is not None:
9968 raise errors.OpPrereqError("Give either the iallocator or the new"
9969 " secondary, not both", errors.ECODE_INVAL)
9971 elif remote_node is not None or iallocator is not None:
9972 # Not replacing the secondary
9973 raise errors.OpPrereqError("The iallocator and new node options can"
9974 " only be used when changing the"
9975 " secondary node", errors.ECODE_INVAL)
9978 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9979 """Compute a new secondary node using an IAllocator.
9982 ial = IAllocator(lu.cfg, lu.rpc,
9983 mode=constants.IALLOCATOR_MODE_RELOC,
9985 relocate_from=list(relocate_from))
9987 ial.Run(iallocator_name)
9990 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9991 " %s" % (iallocator_name, ial.info),
9994 if len(ial.result) != ial.required_nodes:
9995 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9996 " of nodes (%s), required %s" %
9998 len(ial.result), ial.required_nodes),
10001 remote_node_name = ial.result[0]
10003 lu.LogInfo("Selected new secondary for instance '%s': %s",
10004 instance_name, remote_node_name)
10006 return remote_node_name
10008 def _FindFaultyDisks(self, node_name):
10009 """Wrapper for L{_FindFaultyInstanceDisks}.
10012 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10015 def _CheckDisksActivated(self, instance):
10016 """Checks if the instance disks are activated.
10018 @param instance: The instance to check disks
10019 @return: True if they are activated, False otherwise
10022 nodes = instance.all_nodes
10024 for idx, dev in enumerate(instance.disks):
10026 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10027 self.cfg.SetDiskID(dev, node)
10029 result = self.rpc.call_blockdev_find(node, dev)
10033 elif result.fail_msg or not result.payload:
10038 def CheckPrereq(self):
10039 """Check prerequisites.
10041 This checks that the instance is in the cluster.
10044 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10045 assert instance is not None, \
10046 "Cannot retrieve locked instance %s" % self.instance_name
10048 if instance.disk_template != constants.DT_DRBD8:
10049 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10050 " instances", errors.ECODE_INVAL)
10052 if len(instance.secondary_nodes) != 1:
10053 raise errors.OpPrereqError("The instance has a strange layout,"
10054 " expected one secondary but found %d" %
10055 len(instance.secondary_nodes),
10056 errors.ECODE_FAULT)
10058 if not self.delay_iallocator:
10059 self._CheckPrereq2()
10061 def _CheckPrereq2(self):
10062 """Check prerequisites, second part.
10064 This function should always be part of CheckPrereq. It was separated and is
10065 now called from Exec because during node evacuation iallocator was only
10066 called with an unmodified cluster model, not taking planned changes into
10070 instance = self.instance
10071 secondary_node = instance.secondary_nodes[0]
10073 if self.iallocator_name is None:
10074 remote_node = self.remote_node
10076 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10077 instance.name, instance.secondary_nodes)
10079 if remote_node is None:
10080 self.remote_node_info = None
10082 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10083 "Remote node '%s' is not locked" % remote_node
10085 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10086 assert self.remote_node_info is not None, \
10087 "Cannot retrieve locked node %s" % remote_node
10089 if remote_node == self.instance.primary_node:
10090 raise errors.OpPrereqError("The specified node is the primary node of"
10091 " the instance", errors.ECODE_INVAL)
10093 if remote_node == secondary_node:
10094 raise errors.OpPrereqError("The specified node is already the"
10095 " secondary node of the instance",
10096 errors.ECODE_INVAL)
10098 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10099 constants.REPLACE_DISK_CHG):
10100 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10101 errors.ECODE_INVAL)
10103 if self.mode == constants.REPLACE_DISK_AUTO:
10104 if not self._CheckDisksActivated(instance):
10105 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10106 " first" % self.instance_name,
10107 errors.ECODE_STATE)
10108 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10109 faulty_secondary = self._FindFaultyDisks(secondary_node)
10111 if faulty_primary and faulty_secondary:
10112 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10113 " one node and can not be repaired"
10114 " automatically" % self.instance_name,
10115 errors.ECODE_STATE)
10118 self.disks = faulty_primary
10119 self.target_node = instance.primary_node
10120 self.other_node = secondary_node
10121 check_nodes = [self.target_node, self.other_node]
10122 elif faulty_secondary:
10123 self.disks = faulty_secondary
10124 self.target_node = secondary_node
10125 self.other_node = instance.primary_node
10126 check_nodes = [self.target_node, self.other_node]
10132 # Non-automatic modes
10133 if self.mode == constants.REPLACE_DISK_PRI:
10134 self.target_node = instance.primary_node
10135 self.other_node = secondary_node
10136 check_nodes = [self.target_node, self.other_node]
10138 elif self.mode == constants.REPLACE_DISK_SEC:
10139 self.target_node = secondary_node
10140 self.other_node = instance.primary_node
10141 check_nodes = [self.target_node, self.other_node]
10143 elif self.mode == constants.REPLACE_DISK_CHG:
10144 self.new_node = remote_node
10145 self.other_node = instance.primary_node
10146 self.target_node = secondary_node
10147 check_nodes = [self.new_node, self.other_node]
10149 _CheckNodeNotDrained(self.lu, remote_node)
10150 _CheckNodeVmCapable(self.lu, remote_node)
10152 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10153 assert old_node_info is not None
10154 if old_node_info.offline and not self.early_release:
10155 # doesn't make sense to delay the release
10156 self.early_release = True
10157 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10158 " early-release mode", secondary_node)
10161 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10164 # If not specified all disks should be replaced
10166 self.disks = range(len(self.instance.disks))
10168 # TODO: compute disk parameters
10169 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10170 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10171 if primary_node_info.group != secondary_node_info.group:
10172 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10173 " different node groups; the disk parameters of the"
10174 " primary node's group will be applied.")
10176 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10178 for node in check_nodes:
10179 _CheckNodeOnline(self.lu, node)
10181 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10184 if node_name is not None)
10186 # Release unneeded node and node resource locks
10187 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10188 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10190 # Release any owned node group
10191 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10192 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10194 # Check whether disks are valid
10195 for disk_idx in self.disks:
10196 instance.FindDisk(disk_idx)
10198 # Get secondary node IP addresses
10199 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10200 in self.cfg.GetMultiNodeInfo(touched_nodes))
10202 def Exec(self, feedback_fn):
10203 """Execute disk replacement.
10205 This dispatches the disk replacement to the appropriate handler.
10208 if self.delay_iallocator:
10209 self._CheckPrereq2()
10212 # Verify owned locks before starting operation
10213 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10214 assert set(owned_nodes) == set(self.node_secondary_ip), \
10215 ("Incorrect node locks, owning %s, expected %s" %
10216 (owned_nodes, self.node_secondary_ip.keys()))
10217 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10218 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10220 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10221 assert list(owned_instances) == [self.instance_name], \
10222 "Instance '%s' not locked" % self.instance_name
10224 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10225 "Should not own any node group lock at this point"
10228 feedback_fn("No disks need replacement")
10231 feedback_fn("Replacing disk(s) %s for %s" %
10232 (utils.CommaJoin(self.disks), self.instance.name))
10234 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10236 # Activate the instance disks if we're replacing them on a down instance
10238 _StartInstanceDisks(self.lu, self.instance, True)
10241 # Should we replace the secondary node?
10242 if self.new_node is not None:
10243 fn = self._ExecDrbd8Secondary
10245 fn = self._ExecDrbd8DiskOnly
10247 result = fn(feedback_fn)
10249 # Deactivate the instance disks if we're replacing them on a
10252 _SafeShutdownInstanceDisks(self.lu, self.instance)
10254 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10257 # Verify owned locks
10258 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10259 nodes = frozenset(self.node_secondary_ip)
10260 assert ((self.early_release and not owned_nodes) or
10261 (not self.early_release and not (set(owned_nodes) - nodes))), \
10262 ("Not owning the correct locks, early_release=%s, owned=%r,"
10263 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10267 def _CheckVolumeGroup(self, nodes):
10268 self.lu.LogInfo("Checking volume groups")
10270 vgname = self.cfg.GetVGName()
10272 # Make sure volume group exists on all involved nodes
10273 results = self.rpc.call_vg_list(nodes)
10275 raise errors.OpExecError("Can't list volume groups on the nodes")
10278 res = results[node]
10279 res.Raise("Error checking node %s" % node)
10280 if vgname not in res.payload:
10281 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10284 def _CheckDisksExistence(self, nodes):
10285 # Check disk existence
10286 for idx, dev in enumerate(self.instance.disks):
10287 if idx not in self.disks:
10291 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10292 self.cfg.SetDiskID(dev, node)
10294 result = self.rpc.call_blockdev_find(node, dev)
10296 msg = result.fail_msg
10297 if msg or not result.payload:
10299 msg = "disk not found"
10300 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10303 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10304 for idx, dev in enumerate(self.instance.disks):
10305 if idx not in self.disks:
10308 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10311 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10313 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10314 " replace disks for instance %s" %
10315 (node_name, self.instance.name))
10317 def _CreateNewStorage(self, node_name):
10318 """Create new storage on the primary or secondary node.
10320 This is only used for same-node replaces, not for changing the
10321 secondary node, hence we don't want to modify the existing disk.
10326 for idx, dev in enumerate(self.instance.disks):
10327 if idx not in self.disks:
10330 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10332 self.cfg.SetDiskID(dev, node_name)
10334 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10335 names = _GenerateUniqueNames(self.lu, lv_names)
10337 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10339 vg_data = dev.children[0].logical_id[0]
10340 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10341 logical_id=(vg_data, names[0]), params=data_p)
10342 vg_meta = dev.children[1].logical_id[0]
10343 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10344 logical_id=(vg_meta, names[1]), params=meta_p)
10346 new_lvs = [lv_data, lv_meta]
10347 old_lvs = [child.Copy() for child in dev.children]
10348 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10350 # we pass force_create=True to force the LVM creation
10351 for new_lv in new_lvs:
10352 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10353 _GetInstanceInfoText(self.instance), False)
10357 def _CheckDevices(self, node_name, iv_names):
10358 for name, (dev, _, _) in iv_names.iteritems():
10359 self.cfg.SetDiskID(dev, node_name)
10361 result = self.rpc.call_blockdev_find(node_name, dev)
10363 msg = result.fail_msg
10364 if msg or not result.payload:
10366 msg = "disk not found"
10367 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10370 if result.payload.is_degraded:
10371 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10373 def _RemoveOldStorage(self, node_name, iv_names):
10374 for name, (_, old_lvs, _) in iv_names.iteritems():
10375 self.lu.LogInfo("Remove logical volumes for %s" % name)
10378 self.cfg.SetDiskID(lv, node_name)
10380 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10382 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10383 hint="remove unused LVs manually")
10385 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10386 """Replace a disk on the primary or secondary for DRBD 8.
10388 The algorithm for replace is quite complicated:
10390 1. for each disk to be replaced:
10392 1. create new LVs on the target node with unique names
10393 1. detach old LVs from the drbd device
10394 1. rename old LVs to name_replaced.<time_t>
10395 1. rename new LVs to old LVs
10396 1. attach the new LVs (with the old names now) to the drbd device
10398 1. wait for sync across all devices
10400 1. for each modified disk:
10402 1. remove old LVs (which have the name name_replaces.<time_t>)
10404 Failures are not very well handled.
10409 # Step: check device activation
10410 self.lu.LogStep(1, steps_total, "Check device existence")
10411 self._CheckDisksExistence([self.other_node, self.target_node])
10412 self._CheckVolumeGroup([self.target_node, self.other_node])
10414 # Step: check other node consistency
10415 self.lu.LogStep(2, steps_total, "Check peer consistency")
10416 self._CheckDisksConsistency(self.other_node,
10417 self.other_node == self.instance.primary_node,
10420 # Step: create new storage
10421 self.lu.LogStep(3, steps_total, "Allocate new storage")
10422 iv_names = self._CreateNewStorage(self.target_node)
10424 # Step: for each lv, detach+rename*2+attach
10425 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10426 for dev, old_lvs, new_lvs in iv_names.itervalues():
10427 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10429 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10431 result.Raise("Can't detach drbd from local storage on node"
10432 " %s for device %s" % (self.target_node, dev.iv_name))
10434 #cfg.Update(instance)
10436 # ok, we created the new LVs, so now we know we have the needed
10437 # storage; as such, we proceed on the target node to rename
10438 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10439 # using the assumption that logical_id == physical_id (which in
10440 # turn is the unique_id on that node)
10442 # FIXME(iustin): use a better name for the replaced LVs
10443 temp_suffix = int(time.time())
10444 ren_fn = lambda d, suff: (d.physical_id[0],
10445 d.physical_id[1] + "_replaced-%s" % suff)
10447 # Build the rename list based on what LVs exist on the node
10448 rename_old_to_new = []
10449 for to_ren in old_lvs:
10450 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10451 if not result.fail_msg and result.payload:
10453 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10455 self.lu.LogInfo("Renaming the old LVs on the target node")
10456 result = self.rpc.call_blockdev_rename(self.target_node,
10458 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10460 # Now we rename the new LVs to the old LVs
10461 self.lu.LogInfo("Renaming the new LVs on the target node")
10462 rename_new_to_old = [(new, old.physical_id)
10463 for old, new in zip(old_lvs, new_lvs)]
10464 result = self.rpc.call_blockdev_rename(self.target_node,
10466 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10468 # Intermediate steps of in memory modifications
10469 for old, new in zip(old_lvs, new_lvs):
10470 new.logical_id = old.logical_id
10471 self.cfg.SetDiskID(new, self.target_node)
10473 # We need to modify old_lvs so that removal later removes the
10474 # right LVs, not the newly added ones; note that old_lvs is a
10476 for disk in old_lvs:
10477 disk.logical_id = ren_fn(disk, temp_suffix)
10478 self.cfg.SetDiskID(disk, self.target_node)
10480 # Now that the new lvs have the old name, we can add them to the device
10481 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10482 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10484 msg = result.fail_msg
10486 for new_lv in new_lvs:
10487 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10490 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10491 hint=("cleanup manually the unused logical"
10493 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10495 cstep = itertools.count(5)
10497 if self.early_release:
10498 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10499 self._RemoveOldStorage(self.target_node, iv_names)
10500 # TODO: Check if releasing locks early still makes sense
10501 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10503 # Release all resource locks except those used by the instance
10504 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10505 keep=self.node_secondary_ip.keys())
10507 # Release all node locks while waiting for sync
10508 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10510 # TODO: Can the instance lock be downgraded here? Take the optional disk
10511 # shutdown in the caller into consideration.
10514 # This can fail as the old devices are degraded and _WaitForSync
10515 # does a combined result over all disks, so we don't check its return value
10516 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10517 _WaitForSync(self.lu, self.instance)
10519 # Check all devices manually
10520 self._CheckDevices(self.instance.primary_node, iv_names)
10522 # Step: remove old storage
10523 if not self.early_release:
10524 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10525 self._RemoveOldStorage(self.target_node, iv_names)
10527 def _ExecDrbd8Secondary(self, feedback_fn):
10528 """Replace the secondary node for DRBD 8.
10530 The algorithm for replace is quite complicated:
10531 - for all disks of the instance:
10532 - create new LVs on the new node with same names
10533 - shutdown the drbd device on the old secondary
10534 - disconnect the drbd network on the primary
10535 - create the drbd device on the new secondary
10536 - network attach the drbd on the primary, using an artifice:
10537 the drbd code for Attach() will connect to the network if it
10538 finds a device which is connected to the good local disks but
10539 not network enabled
10540 - wait for sync across all devices
10541 - remove all disks from the old secondary
10543 Failures are not very well handled.
10548 pnode = self.instance.primary_node
10550 # Step: check device activation
10551 self.lu.LogStep(1, steps_total, "Check device existence")
10552 self._CheckDisksExistence([self.instance.primary_node])
10553 self._CheckVolumeGroup([self.instance.primary_node])
10555 # Step: check other node consistency
10556 self.lu.LogStep(2, steps_total, "Check peer consistency")
10557 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10559 # Step: create new storage
10560 self.lu.LogStep(3, steps_total, "Allocate new storage")
10561 for idx, dev in enumerate(self.instance.disks):
10562 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10563 (self.new_node, idx))
10564 # we pass force_create=True to force LVM creation
10565 for new_lv in dev.children:
10566 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10567 _GetInstanceInfoText(self.instance), False)
10569 # Step 4: dbrd minors and drbd setups changes
10570 # after this, we must manually remove the drbd minors on both the
10571 # error and the success paths
10572 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10573 minors = self.cfg.AllocateDRBDMinor([self.new_node
10574 for dev in self.instance.disks],
10575 self.instance.name)
10576 logging.debug("Allocated minors %r", minors)
10579 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10580 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10581 (self.new_node, idx))
10582 # create new devices on new_node; note that we create two IDs:
10583 # one without port, so the drbd will be activated without
10584 # networking information on the new node at this stage, and one
10585 # with network, for the latter activation in step 4
10586 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10587 if self.instance.primary_node == o_node1:
10590 assert self.instance.primary_node == o_node2, "Three-node instance?"
10593 new_alone_id = (self.instance.primary_node, self.new_node, None,
10594 p_minor, new_minor, o_secret)
10595 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10596 p_minor, new_minor, o_secret)
10598 iv_names[idx] = (dev, dev.children, new_net_id)
10599 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10601 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10602 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10603 logical_id=new_alone_id,
10604 children=dev.children,
10606 params=drbd_params)
10608 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10609 _GetInstanceInfoText(self.instance), False)
10610 except errors.GenericError:
10611 self.cfg.ReleaseDRBDMinors(self.instance.name)
10614 # We have new devices, shutdown the drbd on the old secondary
10615 for idx, dev in enumerate(self.instance.disks):
10616 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10617 self.cfg.SetDiskID(dev, self.target_node)
10618 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10620 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10621 "node: %s" % (idx, msg),
10622 hint=("Please cleanup this device manually as"
10623 " soon as possible"))
10625 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10626 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10627 self.instance.disks)[pnode]
10629 msg = result.fail_msg
10631 # detaches didn't succeed (unlikely)
10632 self.cfg.ReleaseDRBDMinors(self.instance.name)
10633 raise errors.OpExecError("Can't detach the disks from the network on"
10634 " old node: %s" % (msg,))
10636 # if we managed to detach at least one, we update all the disks of
10637 # the instance to point to the new secondary
10638 self.lu.LogInfo("Updating instance configuration")
10639 for dev, _, new_logical_id in iv_names.itervalues():
10640 dev.logical_id = new_logical_id
10641 self.cfg.SetDiskID(dev, self.instance.primary_node)
10643 self.cfg.Update(self.instance, feedback_fn)
10645 # Release all node locks (the configuration has been updated)
10646 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10648 # and now perform the drbd attach
10649 self.lu.LogInfo("Attaching primary drbds to new secondary"
10650 " (standalone => connected)")
10651 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10653 self.node_secondary_ip,
10654 self.instance.disks,
10655 self.instance.name,
10657 for to_node, to_result in result.items():
10658 msg = to_result.fail_msg
10660 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10662 hint=("please do a gnt-instance info to see the"
10663 " status of disks"))
10665 cstep = itertools.count(5)
10667 if self.early_release:
10668 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10669 self._RemoveOldStorage(self.target_node, iv_names)
10670 # TODO: Check if releasing locks early still makes sense
10671 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10673 # Release all resource locks except those used by the instance
10674 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10675 keep=self.node_secondary_ip.keys())
10677 # TODO: Can the instance lock be downgraded here? Take the optional disk
10678 # shutdown in the caller into consideration.
10681 # This can fail as the old devices are degraded and _WaitForSync
10682 # does a combined result over all disks, so we don't check its return value
10683 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10684 _WaitForSync(self.lu, self.instance)
10686 # Check all devices manually
10687 self._CheckDevices(self.instance.primary_node, iv_names)
10689 # Step: remove old storage
10690 if not self.early_release:
10691 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10692 self._RemoveOldStorage(self.target_node, iv_names)
10695 class LURepairNodeStorage(NoHooksLU):
10696 """Repairs the volume group on a node.
10701 def CheckArguments(self):
10702 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10704 storage_type = self.op.storage_type
10706 if (constants.SO_FIX_CONSISTENCY not in
10707 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10708 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10709 " repaired" % storage_type,
10710 errors.ECODE_INVAL)
10712 def ExpandNames(self):
10713 self.needed_locks = {
10714 locking.LEVEL_NODE: [self.op.node_name],
10717 def _CheckFaultyDisks(self, instance, node_name):
10718 """Ensure faulty disks abort the opcode or at least warn."""
10720 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10722 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10723 " node '%s'" % (instance.name, node_name),
10724 errors.ECODE_STATE)
10725 except errors.OpPrereqError, err:
10726 if self.op.ignore_consistency:
10727 self.proc.LogWarning(str(err.args[0]))
10731 def CheckPrereq(self):
10732 """Check prerequisites.
10735 # Check whether any instance on this node has faulty disks
10736 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10737 if inst.admin_state != constants.ADMINST_UP:
10739 check_nodes = set(inst.all_nodes)
10740 check_nodes.discard(self.op.node_name)
10741 for inst_node_name in check_nodes:
10742 self._CheckFaultyDisks(inst, inst_node_name)
10744 def Exec(self, feedback_fn):
10745 feedback_fn("Repairing storage unit '%s' on %s ..." %
10746 (self.op.name, self.op.node_name))
10748 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10749 result = self.rpc.call_storage_execute(self.op.node_name,
10750 self.op.storage_type, st_args,
10752 constants.SO_FIX_CONSISTENCY)
10753 result.Raise("Failed to repair storage unit '%s' on %s" %
10754 (self.op.name, self.op.node_name))
10757 class LUNodeEvacuate(NoHooksLU):
10758 """Evacuates instances off a list of nodes.
10763 _MODE2IALLOCATOR = {
10764 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10765 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10766 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10768 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10769 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10770 constants.IALLOCATOR_NEVAC_MODES)
10772 def CheckArguments(self):
10773 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10775 def ExpandNames(self):
10776 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10778 if self.op.remote_node is not None:
10779 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10780 assert self.op.remote_node
10782 if self.op.remote_node == self.op.node_name:
10783 raise errors.OpPrereqError("Can not use evacuated node as a new"
10784 " secondary node", errors.ECODE_INVAL)
10786 if self.op.mode != constants.NODE_EVAC_SEC:
10787 raise errors.OpPrereqError("Without the use of an iallocator only"
10788 " secondary instances can be evacuated",
10789 errors.ECODE_INVAL)
10792 self.share_locks = _ShareAll()
10793 self.needed_locks = {
10794 locking.LEVEL_INSTANCE: [],
10795 locking.LEVEL_NODEGROUP: [],
10796 locking.LEVEL_NODE: [],
10799 # Determine nodes (via group) optimistically, needs verification once locks
10800 # have been acquired
10801 self.lock_nodes = self._DetermineNodes()
10803 def _DetermineNodes(self):
10804 """Gets the list of nodes to operate on.
10807 if self.op.remote_node is None:
10808 # Iallocator will choose any node(s) in the same group
10809 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10811 group_nodes = frozenset([self.op.remote_node])
10813 # Determine nodes to be locked
10814 return set([self.op.node_name]) | group_nodes
10816 def _DetermineInstances(self):
10817 """Builds list of instances to operate on.
10820 assert self.op.mode in constants.NODE_EVAC_MODES
10822 if self.op.mode == constants.NODE_EVAC_PRI:
10823 # Primary instances only
10824 inst_fn = _GetNodePrimaryInstances
10825 assert self.op.remote_node is None, \
10826 "Evacuating primary instances requires iallocator"
10827 elif self.op.mode == constants.NODE_EVAC_SEC:
10828 # Secondary instances only
10829 inst_fn = _GetNodeSecondaryInstances
10832 assert self.op.mode == constants.NODE_EVAC_ALL
10833 inst_fn = _GetNodeInstances
10834 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10836 raise errors.OpPrereqError("Due to an issue with the iallocator"
10837 " interface it is not possible to evacuate"
10838 " all instances at once; specify explicitly"
10839 " whether to evacuate primary or secondary"
10841 errors.ECODE_INVAL)
10843 return inst_fn(self.cfg, self.op.node_name)
10845 def DeclareLocks(self, level):
10846 if level == locking.LEVEL_INSTANCE:
10847 # Lock instances optimistically, needs verification once node and group
10848 # locks have been acquired
10849 self.needed_locks[locking.LEVEL_INSTANCE] = \
10850 set(i.name for i in self._DetermineInstances())
10852 elif level == locking.LEVEL_NODEGROUP:
10853 # Lock node groups for all potential target nodes optimistically, needs
10854 # verification once nodes have been acquired
10855 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10856 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10858 elif level == locking.LEVEL_NODE:
10859 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10861 def CheckPrereq(self):
10863 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10864 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10865 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10867 need_nodes = self._DetermineNodes()
10869 if not owned_nodes.issuperset(need_nodes):
10870 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10871 " locks were acquired, current nodes are"
10872 " are '%s', used to be '%s'; retry the"
10874 (self.op.node_name,
10875 utils.CommaJoin(need_nodes),
10876 utils.CommaJoin(owned_nodes)),
10877 errors.ECODE_STATE)
10879 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10880 if owned_groups != wanted_groups:
10881 raise errors.OpExecError("Node groups changed since locks were acquired,"
10882 " current groups are '%s', used to be '%s';"
10883 " retry the operation" %
10884 (utils.CommaJoin(wanted_groups),
10885 utils.CommaJoin(owned_groups)))
10887 # Determine affected instances
10888 self.instances = self._DetermineInstances()
10889 self.instance_names = [i.name for i in self.instances]
10891 if set(self.instance_names) != owned_instances:
10892 raise errors.OpExecError("Instances on node '%s' changed since locks"
10893 " were acquired, current instances are '%s',"
10894 " used to be '%s'; retry the operation" %
10895 (self.op.node_name,
10896 utils.CommaJoin(self.instance_names),
10897 utils.CommaJoin(owned_instances)))
10899 if self.instance_names:
10900 self.LogInfo("Evacuating instances from node '%s': %s",
10902 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10904 self.LogInfo("No instances to evacuate from node '%s'",
10907 if self.op.remote_node is not None:
10908 for i in self.instances:
10909 if i.primary_node == self.op.remote_node:
10910 raise errors.OpPrereqError("Node %s is the primary node of"
10911 " instance %s, cannot use it as"
10913 (self.op.remote_node, i.name),
10914 errors.ECODE_INVAL)
10916 def Exec(self, feedback_fn):
10917 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10919 if not self.instance_names:
10920 # No instances to evacuate
10923 elif self.op.iallocator is not None:
10924 # TODO: Implement relocation to other group
10925 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10926 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10927 instances=list(self.instance_names))
10929 ial.Run(self.op.iallocator)
10931 if not ial.success:
10932 raise errors.OpPrereqError("Can't compute node evacuation using"
10933 " iallocator '%s': %s" %
10934 (self.op.iallocator, ial.info),
10935 errors.ECODE_NORES)
10937 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10939 elif self.op.remote_node is not None:
10940 assert self.op.mode == constants.NODE_EVAC_SEC
10942 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10943 remote_node=self.op.remote_node,
10945 mode=constants.REPLACE_DISK_CHG,
10946 early_release=self.op.early_release)]
10947 for instance_name in self.instance_names
10951 raise errors.ProgrammerError("No iallocator or remote node")
10953 return ResultWithJobs(jobs)
10956 def _SetOpEarlyRelease(early_release, op):
10957 """Sets C{early_release} flag on opcodes if available.
10961 op.early_release = early_release
10962 except AttributeError:
10963 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10968 def _NodeEvacDest(use_nodes, group, nodes):
10969 """Returns group or nodes depending on caller's choice.
10973 return utils.CommaJoin(nodes)
10978 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10979 """Unpacks the result of change-group and node-evacuate iallocator requests.
10981 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10982 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10984 @type lu: L{LogicalUnit}
10985 @param lu: Logical unit instance
10986 @type alloc_result: tuple/list
10987 @param alloc_result: Result from iallocator
10988 @type early_release: bool
10989 @param early_release: Whether to release locks early if possible
10990 @type use_nodes: bool
10991 @param use_nodes: Whether to display node names instead of groups
10994 (moved, failed, jobs) = alloc_result
10997 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10998 for (name, reason) in failed)
10999 lu.LogWarning("Unable to evacuate instances %s", failreason)
11000 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11003 lu.LogInfo("Instances to be moved: %s",
11004 utils.CommaJoin("%s (to %s)" %
11005 (name, _NodeEvacDest(use_nodes, group, nodes))
11006 for (name, group, nodes) in moved))
11008 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11009 map(opcodes.OpCode.LoadOpCode, ops))
11013 class LUInstanceGrowDisk(LogicalUnit):
11014 """Grow a disk of an instance.
11017 HPATH = "disk-grow"
11018 HTYPE = constants.HTYPE_INSTANCE
11021 def ExpandNames(self):
11022 self._ExpandAndLockInstance()
11023 self.needed_locks[locking.LEVEL_NODE] = []
11024 self.needed_locks[locking.LEVEL_NODE_RES] = []
11025 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11027 def DeclareLocks(self, level):
11028 if level == locking.LEVEL_NODE:
11029 self._LockInstancesNodes()
11030 elif level == locking.LEVEL_NODE_RES:
11032 self.needed_locks[locking.LEVEL_NODE_RES] = \
11033 self.needed_locks[locking.LEVEL_NODE][:]
11035 def BuildHooksEnv(self):
11036 """Build hooks env.
11038 This runs on the master, the primary and all the secondaries.
11042 "DISK": self.op.disk,
11043 "AMOUNT": self.op.amount,
11045 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11048 def BuildHooksNodes(self):
11049 """Build hooks nodes.
11052 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11055 def CheckPrereq(self):
11056 """Check prerequisites.
11058 This checks that the instance is in the cluster.
11061 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11062 assert instance is not None, \
11063 "Cannot retrieve locked instance %s" % self.op.instance_name
11064 nodenames = list(instance.all_nodes)
11065 for node in nodenames:
11066 _CheckNodeOnline(self, node)
11068 self.instance = instance
11070 if instance.disk_template not in constants.DTS_GROWABLE:
11071 raise errors.OpPrereqError("Instance's disk layout does not support"
11072 " growing", errors.ECODE_INVAL)
11074 self.disk = instance.FindDisk(self.op.disk)
11076 if instance.disk_template not in (constants.DT_FILE,
11077 constants.DT_SHARED_FILE):
11078 # TODO: check the free disk space for file, when that feature will be
11080 _CheckNodesFreeDiskPerVG(self, nodenames,
11081 self.disk.ComputeGrowth(self.op.amount))
11083 def Exec(self, feedback_fn):
11084 """Execute disk grow.
11087 instance = self.instance
11090 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11091 assert (self.owned_locks(locking.LEVEL_NODE) ==
11092 self.owned_locks(locking.LEVEL_NODE_RES))
11094 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11096 raise errors.OpExecError("Cannot activate block device to grow")
11098 feedback_fn("Growing disk %s of instance '%s' by %s" %
11099 (self.op.disk, instance.name,
11100 utils.FormatUnit(self.op.amount, "h")))
11102 # First run all grow ops in dry-run mode
11103 for node in instance.all_nodes:
11104 self.cfg.SetDiskID(disk, node)
11105 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11106 result.Raise("Grow request failed to node %s" % node)
11108 # We know that (as far as we can test) operations across different
11109 # nodes will succeed, time to run it for real
11110 for node in instance.all_nodes:
11111 self.cfg.SetDiskID(disk, node)
11112 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11113 result.Raise("Grow request failed to node %s" % node)
11115 # TODO: Rewrite code to work properly
11116 # DRBD goes into sync mode for a short amount of time after executing the
11117 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11118 # calling "resize" in sync mode fails. Sleeping for a short amount of
11119 # time is a work-around.
11122 disk.RecordGrow(self.op.amount)
11123 self.cfg.Update(instance, feedback_fn)
11125 # Changes have been recorded, release node lock
11126 _ReleaseLocks(self, locking.LEVEL_NODE)
11128 # Downgrade lock while waiting for sync
11129 self.glm.downgrade(locking.LEVEL_INSTANCE)
11131 if self.op.wait_for_sync:
11132 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11134 self.proc.LogWarning("Disk sync-ing has not returned a good"
11135 " status; please check the instance")
11136 if instance.admin_state != constants.ADMINST_UP:
11137 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11138 elif instance.admin_state != constants.ADMINST_UP:
11139 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11140 " not supposed to be running because no wait for"
11141 " sync mode was requested")
11143 assert self.owned_locks(locking.LEVEL_NODE_RES)
11144 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11147 class LUInstanceQueryData(NoHooksLU):
11148 """Query runtime instance data.
11153 def ExpandNames(self):
11154 self.needed_locks = {}
11156 # Use locking if requested or when non-static information is wanted
11157 if not (self.op.static or self.op.use_locking):
11158 self.LogWarning("Non-static data requested, locks need to be acquired")
11159 self.op.use_locking = True
11161 if self.op.instances or not self.op.use_locking:
11162 # Expand instance names right here
11163 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11165 # Will use acquired locks
11166 self.wanted_names = None
11168 if self.op.use_locking:
11169 self.share_locks = _ShareAll()
11171 if self.wanted_names is None:
11172 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11174 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11176 self.needed_locks[locking.LEVEL_NODE] = []
11177 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11179 def DeclareLocks(self, level):
11180 if self.op.use_locking and level == locking.LEVEL_NODE:
11181 self._LockInstancesNodes()
11183 def CheckPrereq(self):
11184 """Check prerequisites.
11186 This only checks the optional instance list against the existing names.
11189 if self.wanted_names is None:
11190 assert self.op.use_locking, "Locking was not used"
11191 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11193 self.wanted_instances = \
11194 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11196 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11197 """Returns the status of a block device
11200 if self.op.static or not node:
11203 self.cfg.SetDiskID(dev, node)
11205 result = self.rpc.call_blockdev_find(node, dev)
11209 result.Raise("Can't compute disk status for %s" % instance_name)
11211 status = result.payload
11215 return (status.dev_path, status.major, status.minor,
11216 status.sync_percent, status.estimated_time,
11217 status.is_degraded, status.ldisk_status)
11219 def _ComputeDiskStatus(self, instance, snode, dev):
11220 """Compute block device status.
11223 if dev.dev_type in constants.LDS_DRBD:
11224 # we change the snode then (otherwise we use the one passed in)
11225 if dev.logical_id[0] == instance.primary_node:
11226 snode = dev.logical_id[1]
11228 snode = dev.logical_id[0]
11230 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11231 instance.name, dev)
11232 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11235 dev_children = map(compat.partial(self._ComputeDiskStatus,
11242 "iv_name": dev.iv_name,
11243 "dev_type": dev.dev_type,
11244 "logical_id": dev.logical_id,
11245 "physical_id": dev.physical_id,
11246 "pstatus": dev_pstatus,
11247 "sstatus": dev_sstatus,
11248 "children": dev_children,
11253 def Exec(self, feedback_fn):
11254 """Gather and return data"""
11257 cluster = self.cfg.GetClusterInfo()
11259 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11260 for i in self.wanted_instances)
11261 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11262 if self.op.static or pnode.offline:
11263 remote_state = None
11265 self.LogWarning("Primary node %s is marked offline, returning static"
11266 " information only for instance %s" %
11267 (pnode.name, instance.name))
11269 remote_info = self.rpc.call_instance_info(instance.primary_node,
11271 instance.hypervisor)
11272 remote_info.Raise("Error checking node %s" % instance.primary_node)
11273 remote_info = remote_info.payload
11274 if remote_info and "state" in remote_info:
11275 remote_state = "up"
11277 if instance.admin_state == constants.ADMINST_UP:
11278 remote_state = "down"
11280 remote_state = instance.admin_state
11282 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11285 result[instance.name] = {
11286 "name": instance.name,
11287 "config_state": instance.admin_state,
11288 "run_state": remote_state,
11289 "pnode": instance.primary_node,
11290 "snodes": instance.secondary_nodes,
11292 # this happens to be the same format used for hooks
11293 "nics": _NICListToTuple(self, instance.nics),
11294 "disk_template": instance.disk_template,
11296 "hypervisor": instance.hypervisor,
11297 "network_port": instance.network_port,
11298 "hv_instance": instance.hvparams,
11299 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11300 "be_instance": instance.beparams,
11301 "be_actual": cluster.FillBE(instance),
11302 "os_instance": instance.osparams,
11303 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11304 "serial_no": instance.serial_no,
11305 "mtime": instance.mtime,
11306 "ctime": instance.ctime,
11307 "uuid": instance.uuid,
11313 class LUInstanceSetParams(LogicalUnit):
11314 """Modifies an instances's parameters.
11317 HPATH = "instance-modify"
11318 HTYPE = constants.HTYPE_INSTANCE
11321 def CheckArguments(self):
11322 if not (self.op.nics or self.op.disks or self.op.disk_template or
11323 self.op.hvparams or self.op.beparams or self.op.os_name or
11324 self.op.online_inst or self.op.offline_inst):
11325 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11327 if self.op.hvparams:
11328 _CheckGlobalHvParams(self.op.hvparams)
11332 for disk_op, disk_dict in self.op.disks:
11333 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11334 if disk_op == constants.DDM_REMOVE:
11335 disk_addremove += 1
11337 elif disk_op == constants.DDM_ADD:
11338 disk_addremove += 1
11340 if not isinstance(disk_op, int):
11341 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11342 if not isinstance(disk_dict, dict):
11343 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11344 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11346 if disk_op == constants.DDM_ADD:
11347 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11348 if mode not in constants.DISK_ACCESS_SET:
11349 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11350 errors.ECODE_INVAL)
11351 size = disk_dict.get(constants.IDISK_SIZE, None)
11353 raise errors.OpPrereqError("Required disk parameter size missing",
11354 errors.ECODE_INVAL)
11357 except (TypeError, ValueError), err:
11358 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11359 str(err), errors.ECODE_INVAL)
11360 disk_dict[constants.IDISK_SIZE] = size
11362 # modification of disk
11363 if constants.IDISK_SIZE in disk_dict:
11364 raise errors.OpPrereqError("Disk size change not possible, use"
11365 " grow-disk", errors.ECODE_INVAL)
11367 if disk_addremove > 1:
11368 raise errors.OpPrereqError("Only one disk add or remove operation"
11369 " supported at a time", errors.ECODE_INVAL)
11371 if self.op.disks and self.op.disk_template is not None:
11372 raise errors.OpPrereqError("Disk template conversion and other disk"
11373 " changes not supported at the same time",
11374 errors.ECODE_INVAL)
11376 if (self.op.disk_template and
11377 self.op.disk_template in constants.DTS_INT_MIRROR and
11378 self.op.remote_node is None):
11379 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11380 " one requires specifying a secondary node",
11381 errors.ECODE_INVAL)
11385 for nic_op, nic_dict in self.op.nics:
11386 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11387 if nic_op == constants.DDM_REMOVE:
11390 elif nic_op == constants.DDM_ADD:
11393 if not isinstance(nic_op, int):
11394 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11395 if not isinstance(nic_dict, dict):
11396 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11397 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11399 # nic_dict should be a dict
11400 nic_ip = nic_dict.get(constants.INIC_IP, None)
11401 if nic_ip is not None:
11402 if nic_ip.lower() == constants.VALUE_NONE:
11403 nic_dict[constants.INIC_IP] = None
11405 if not netutils.IPAddress.IsValid(nic_ip):
11406 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11407 errors.ECODE_INVAL)
11409 nic_bridge = nic_dict.get("bridge", None)
11410 nic_link = nic_dict.get(constants.INIC_LINK, None)
11411 if nic_bridge and nic_link:
11412 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11413 " at the same time", errors.ECODE_INVAL)
11414 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11415 nic_dict["bridge"] = None
11416 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11417 nic_dict[constants.INIC_LINK] = None
11419 if nic_op == constants.DDM_ADD:
11420 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11421 if nic_mac is None:
11422 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11424 if constants.INIC_MAC in nic_dict:
11425 nic_mac = nic_dict[constants.INIC_MAC]
11426 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11427 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11429 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11430 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11431 " modifying an existing nic",
11432 errors.ECODE_INVAL)
11434 if nic_addremove > 1:
11435 raise errors.OpPrereqError("Only one NIC add or remove operation"
11436 " supported at a time", errors.ECODE_INVAL)
11438 def ExpandNames(self):
11439 self._ExpandAndLockInstance()
11440 # Can't even acquire node locks in shared mode as upcoming changes in
11441 # Ganeti 2.6 will start to modify the node object on disk conversion
11442 self.needed_locks[locking.LEVEL_NODE] = []
11443 self.needed_locks[locking.LEVEL_NODE_RES] = []
11444 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11446 def DeclareLocks(self, level):
11447 if level == locking.LEVEL_NODE:
11448 self._LockInstancesNodes()
11449 if self.op.disk_template and self.op.remote_node:
11450 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11451 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11452 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11454 self.needed_locks[locking.LEVEL_NODE_RES] = \
11455 self.needed_locks[locking.LEVEL_NODE][:]
11457 def BuildHooksEnv(self):
11458 """Build hooks env.
11460 This runs on the master, primary and secondaries.
11464 if constants.BE_MINMEM in self.be_new:
11465 args["minmem"] = self.be_new[constants.BE_MINMEM]
11466 if constants.BE_MAXMEM in self.be_new:
11467 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11468 if constants.BE_VCPUS in self.be_new:
11469 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11470 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11471 # information at all.
11474 nic_override = dict(self.op.nics)
11475 for idx, nic in enumerate(self.instance.nics):
11476 if idx in nic_override:
11477 this_nic_override = nic_override[idx]
11479 this_nic_override = {}
11480 if constants.INIC_IP in this_nic_override:
11481 ip = this_nic_override[constants.INIC_IP]
11484 if constants.INIC_MAC in this_nic_override:
11485 mac = this_nic_override[constants.INIC_MAC]
11488 if idx in self.nic_pnew:
11489 nicparams = self.nic_pnew[idx]
11491 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11492 mode = nicparams[constants.NIC_MODE]
11493 link = nicparams[constants.NIC_LINK]
11494 args["nics"].append((ip, mac, mode, link))
11495 if constants.DDM_ADD in nic_override:
11496 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11497 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11498 nicparams = self.nic_pnew[constants.DDM_ADD]
11499 mode = nicparams[constants.NIC_MODE]
11500 link = nicparams[constants.NIC_LINK]
11501 args["nics"].append((ip, mac, mode, link))
11502 elif constants.DDM_REMOVE in nic_override:
11503 del args["nics"][-1]
11505 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11506 if self.op.disk_template:
11507 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11511 def BuildHooksNodes(self):
11512 """Build hooks nodes.
11515 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11518 def CheckPrereq(self):
11519 """Check prerequisites.
11521 This only checks the instance list against the existing names.
11524 # checking the new params on the primary/secondary nodes
11526 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11527 cluster = self.cluster = self.cfg.GetClusterInfo()
11528 assert self.instance is not None, \
11529 "Cannot retrieve locked instance %s" % self.op.instance_name
11530 pnode = instance.primary_node
11531 nodelist = list(instance.all_nodes)
11532 pnode_info = self.cfg.GetNodeInfo(pnode)
11533 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11536 if self.op.os_name and not self.op.force:
11537 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11538 self.op.force_variant)
11539 instance_os = self.op.os_name
11541 instance_os = instance.os
11543 if self.op.disk_template:
11544 if instance.disk_template == self.op.disk_template:
11545 raise errors.OpPrereqError("Instance already has disk template %s" %
11546 instance.disk_template, errors.ECODE_INVAL)
11548 if (instance.disk_template,
11549 self.op.disk_template) not in self._DISK_CONVERSIONS:
11550 raise errors.OpPrereqError("Unsupported disk template conversion from"
11551 " %s to %s" % (instance.disk_template,
11552 self.op.disk_template),
11553 errors.ECODE_INVAL)
11554 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11555 msg="cannot change disk template")
11556 if self.op.disk_template in constants.DTS_INT_MIRROR:
11557 if self.op.remote_node == pnode:
11558 raise errors.OpPrereqError("Given new secondary node %s is the same"
11559 " as the primary node of the instance" %
11560 self.op.remote_node, errors.ECODE_STATE)
11561 _CheckNodeOnline(self, self.op.remote_node)
11562 _CheckNodeNotDrained(self, self.op.remote_node)
11563 # FIXME: here we assume that the old instance type is DT_PLAIN
11564 assert instance.disk_template == constants.DT_PLAIN
11565 disks = [{constants.IDISK_SIZE: d.size,
11566 constants.IDISK_VG: d.logical_id[0]}
11567 for d in instance.disks]
11568 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11569 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11571 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11572 if pnode_info.group != snode_info.group:
11573 self.LogWarning("The primary and secondary nodes are in two"
11574 " different node groups; the disk parameters"
11575 " from the first disk's node group will be"
11578 # hvparams processing
11579 if self.op.hvparams:
11580 hv_type = instance.hypervisor
11581 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11582 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11583 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11586 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11587 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11588 self.hv_proposed = self.hv_new = hv_new # the new actual values
11589 self.hv_inst = i_hvdict # the new dict (without defaults)
11591 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11593 self.hv_new = self.hv_inst = {}
11595 # beparams processing
11596 if self.op.beparams:
11597 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11599 objects.UpgradeBeParams(i_bedict)
11600 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11601 be_new = cluster.SimpleFillBE(i_bedict)
11602 self.be_proposed = self.be_new = be_new # the new actual values
11603 self.be_inst = i_bedict # the new dict (without defaults)
11605 self.be_new = self.be_inst = {}
11606 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11607 be_old = cluster.FillBE(instance)
11609 # CPU param validation -- checking every time a paramtere is
11610 # changed to cover all cases where either CPU mask or vcpus have
11612 if (constants.BE_VCPUS in self.be_proposed and
11613 constants.HV_CPU_MASK in self.hv_proposed):
11615 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11616 # Verify mask is consistent with number of vCPUs. Can skip this
11617 # test if only 1 entry in the CPU mask, which means same mask
11618 # is applied to all vCPUs.
11619 if (len(cpu_list) > 1 and
11620 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11621 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11623 (self.be_proposed[constants.BE_VCPUS],
11624 self.hv_proposed[constants.HV_CPU_MASK]),
11625 errors.ECODE_INVAL)
11627 # Only perform this test if a new CPU mask is given
11628 if constants.HV_CPU_MASK in self.hv_new:
11629 # Calculate the largest CPU number requested
11630 max_requested_cpu = max(map(max, cpu_list))
11631 # Check that all of the instance's nodes have enough physical CPUs to
11632 # satisfy the requested CPU mask
11633 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11634 max_requested_cpu + 1, instance.hypervisor)
11636 # osparams processing
11637 if self.op.osparams:
11638 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11639 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11640 self.os_inst = i_osdict # the new dict (without defaults)
11646 #TODO(dynmem): do the appropriate check involving MINMEM
11647 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11648 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11649 mem_check_list = [pnode]
11650 if be_new[constants.BE_AUTO_BALANCE]:
11651 # either we changed auto_balance to yes or it was from before
11652 mem_check_list.extend(instance.secondary_nodes)
11653 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11654 instance.hypervisor)
11655 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11656 [instance.hypervisor])
11657 pninfo = nodeinfo[pnode]
11658 msg = pninfo.fail_msg
11660 # Assume the primary node is unreachable and go ahead
11661 self.warn.append("Can't get info from primary node %s: %s" %
11664 (_, _, (pnhvinfo, )) = pninfo.payload
11665 if not isinstance(pnhvinfo.get("memory_free", None), int):
11666 self.warn.append("Node data from primary node %s doesn't contain"
11667 " free memory information" % pnode)
11668 elif instance_info.fail_msg:
11669 self.warn.append("Can't get instance runtime information: %s" %
11670 instance_info.fail_msg)
11672 if instance_info.payload:
11673 current_mem = int(instance_info.payload["memory"])
11675 # Assume instance not running
11676 # (there is a slight race condition here, but it's not very
11677 # probable, and we have no other way to check)
11678 # TODO: Describe race condition
11680 #TODO(dynmem): do the appropriate check involving MINMEM
11681 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11682 pnhvinfo["memory_free"])
11684 raise errors.OpPrereqError("This change will prevent the instance"
11685 " from starting, due to %d MB of memory"
11686 " missing on its primary node" %
11688 errors.ECODE_NORES)
11690 if be_new[constants.BE_AUTO_BALANCE]:
11691 for node, nres in nodeinfo.items():
11692 if node not in instance.secondary_nodes:
11694 nres.Raise("Can't get info from secondary node %s" % node,
11695 prereq=True, ecode=errors.ECODE_STATE)
11696 (_, _, (nhvinfo, )) = nres.payload
11697 if not isinstance(nhvinfo.get("memory_free", None), int):
11698 raise errors.OpPrereqError("Secondary node %s didn't return free"
11699 " memory information" % node,
11700 errors.ECODE_STATE)
11701 #TODO(dynmem): do the appropriate check involving MINMEM
11702 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11703 raise errors.OpPrereqError("This change will prevent the instance"
11704 " from failover to its secondary node"
11705 " %s, due to not enough memory" % node,
11706 errors.ECODE_STATE)
11710 self.nic_pinst = {}
11711 for nic_op, nic_dict in self.op.nics:
11712 if nic_op == constants.DDM_REMOVE:
11713 if not instance.nics:
11714 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11715 errors.ECODE_INVAL)
11717 if nic_op != constants.DDM_ADD:
11719 if not instance.nics:
11720 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11721 " no NICs" % nic_op,
11722 errors.ECODE_INVAL)
11723 if nic_op < 0 or nic_op >= len(instance.nics):
11724 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11726 (nic_op, len(instance.nics) - 1),
11727 errors.ECODE_INVAL)
11728 old_nic_params = instance.nics[nic_op].nicparams
11729 old_nic_ip = instance.nics[nic_op].ip
11731 old_nic_params = {}
11734 update_params_dict = dict([(key, nic_dict[key])
11735 for key in constants.NICS_PARAMETERS
11736 if key in nic_dict])
11738 if "bridge" in nic_dict:
11739 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11741 new_nic_params = _GetUpdatedParams(old_nic_params,
11742 update_params_dict)
11743 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11744 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11745 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11746 self.nic_pinst[nic_op] = new_nic_params
11747 self.nic_pnew[nic_op] = new_filled_nic_params
11748 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11750 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11751 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11752 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11754 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11756 self.warn.append(msg)
11758 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11759 if new_nic_mode == constants.NIC_MODE_ROUTED:
11760 if constants.INIC_IP in nic_dict:
11761 nic_ip = nic_dict[constants.INIC_IP]
11763 nic_ip = old_nic_ip
11765 raise errors.OpPrereqError("Cannot set the nic ip to None"
11766 " on a routed nic", errors.ECODE_INVAL)
11767 if constants.INIC_MAC in nic_dict:
11768 nic_mac = nic_dict[constants.INIC_MAC]
11769 if nic_mac is None:
11770 raise errors.OpPrereqError("Cannot set the nic mac to None",
11771 errors.ECODE_INVAL)
11772 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11773 # otherwise generate the mac
11774 nic_dict[constants.INIC_MAC] = \
11775 self.cfg.GenerateMAC(self.proc.GetECId())
11777 # or validate/reserve the current one
11779 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11780 except errors.ReservationError:
11781 raise errors.OpPrereqError("MAC address %s already in use"
11782 " in cluster" % nic_mac,
11783 errors.ECODE_NOTUNIQUE)
11786 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11787 raise errors.OpPrereqError("Disk operations not supported for"
11788 " diskless instances",
11789 errors.ECODE_INVAL)
11790 for disk_op, _ in self.op.disks:
11791 if disk_op == constants.DDM_REMOVE:
11792 if len(instance.disks) == 1:
11793 raise errors.OpPrereqError("Cannot remove the last disk of"
11794 " an instance", errors.ECODE_INVAL)
11795 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11796 msg="cannot remove disks")
11798 if (disk_op == constants.DDM_ADD and
11799 len(instance.disks) >= constants.MAX_DISKS):
11800 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11801 " add more" % constants.MAX_DISKS,
11802 errors.ECODE_STATE)
11803 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11805 if disk_op < 0 or disk_op >= len(instance.disks):
11806 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11808 (disk_op, len(instance.disks)),
11809 errors.ECODE_INVAL)
11811 # disabling the instance
11812 if self.op.offline_inst:
11813 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11814 msg="cannot change instance state to offline")
11816 # enabling the instance
11817 if self.op.online_inst:
11818 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11819 msg="cannot make instance go online")
11821 def _ConvertPlainToDrbd(self, feedback_fn):
11822 """Converts an instance from plain to drbd.
11825 feedback_fn("Converting template to drbd")
11826 instance = self.instance
11827 pnode = instance.primary_node
11828 snode = self.op.remote_node
11830 assert instance.disk_template == constants.DT_PLAIN
11832 # create a fake disk info for _GenerateDiskTemplate
11833 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11834 constants.IDISK_VG: d.logical_id[0]}
11835 for d in instance.disks]
11836 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11837 instance.name, pnode, [snode],
11838 disk_info, None, None, 0, feedback_fn,
11840 info = _GetInstanceInfoText(instance)
11841 feedback_fn("Creating aditional volumes...")
11842 # first, create the missing data and meta devices
11843 for disk in new_disks:
11844 # unfortunately this is... not too nice
11845 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11847 for child in disk.children:
11848 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11849 # at this stage, all new LVs have been created, we can rename the
11851 feedback_fn("Renaming original volumes...")
11852 rename_list = [(o, n.children[0].logical_id)
11853 for (o, n) in zip(instance.disks, new_disks)]
11854 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11855 result.Raise("Failed to rename original LVs")
11857 feedback_fn("Initializing DRBD devices...")
11858 # all child devices are in place, we can now create the DRBD devices
11859 for disk in new_disks:
11860 for node in [pnode, snode]:
11861 f_create = node == pnode
11862 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11864 # at this point, the instance has been modified
11865 instance.disk_template = constants.DT_DRBD8
11866 instance.disks = new_disks
11867 self.cfg.Update(instance, feedback_fn)
11869 # Release node locks while waiting for sync
11870 _ReleaseLocks(self, locking.LEVEL_NODE)
11872 # disks are created, waiting for sync
11873 disk_abort = not _WaitForSync(self, instance,
11874 oneshot=not self.op.wait_for_sync)
11876 raise errors.OpExecError("There are some degraded disks for"
11877 " this instance, please cleanup manually")
11879 # Node resource locks will be released by caller
11881 def _ConvertDrbdToPlain(self, feedback_fn):
11882 """Converts an instance from drbd to plain.
11885 instance = self.instance
11887 assert len(instance.secondary_nodes) == 1
11888 assert instance.disk_template == constants.DT_DRBD8
11890 pnode = instance.primary_node
11891 snode = instance.secondary_nodes[0]
11892 feedback_fn("Converting template to plain")
11894 old_disks = instance.disks
11895 new_disks = [d.children[0] for d in old_disks]
11897 # copy over size and mode
11898 for parent, child in zip(old_disks, new_disks):
11899 child.size = parent.size
11900 child.mode = parent.mode
11902 # update instance structure
11903 instance.disks = new_disks
11904 instance.disk_template = constants.DT_PLAIN
11905 self.cfg.Update(instance, feedback_fn)
11907 # Release locks in case removing disks takes a while
11908 _ReleaseLocks(self, locking.LEVEL_NODE)
11910 feedback_fn("Removing volumes on the secondary node...")
11911 for disk in old_disks:
11912 self.cfg.SetDiskID(disk, snode)
11913 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11915 self.LogWarning("Could not remove block device %s on node %s,"
11916 " continuing anyway: %s", disk.iv_name, snode, msg)
11918 feedback_fn("Removing unneeded volumes on the primary node...")
11919 for idx, disk in enumerate(old_disks):
11920 meta = disk.children[1]
11921 self.cfg.SetDiskID(meta, pnode)
11922 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11924 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11925 " continuing anyway: %s", idx, pnode, msg)
11927 # this is a DRBD disk, return its port to the pool
11928 for disk in old_disks:
11929 tcp_port = disk.logical_id[2]
11930 self.cfg.AddTcpUdpPort(tcp_port)
11932 # Node resource locks will be released by caller
11934 def Exec(self, feedback_fn):
11935 """Modifies an instance.
11937 All parameters take effect only at the next restart of the instance.
11940 # Process here the warnings from CheckPrereq, as we don't have a
11941 # feedback_fn there.
11942 for warn in self.warn:
11943 feedback_fn("WARNING: %s" % warn)
11945 assert ((self.op.disk_template is None) ^
11946 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11947 "Not owning any node resource locks"
11950 instance = self.instance
11952 for disk_op, disk_dict in self.op.disks:
11953 if disk_op == constants.DDM_REMOVE:
11954 # remove the last disk
11955 device = instance.disks.pop()
11956 device_idx = len(instance.disks)
11957 for node, disk in device.ComputeNodeTree(instance.primary_node):
11958 self.cfg.SetDiskID(disk, node)
11959 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11961 self.LogWarning("Could not remove disk/%d on node %s: %s,"
11962 " continuing anyway", device_idx, node, msg)
11963 result.append(("disk/%d" % device_idx, "remove"))
11965 # if this is a DRBD disk, return its port to the pool
11966 if device.dev_type in constants.LDS_DRBD:
11967 tcp_port = device.logical_id[2]
11968 self.cfg.AddTcpUdpPort(tcp_port)
11969 elif disk_op == constants.DDM_ADD:
11971 if instance.disk_template in (constants.DT_FILE,
11972 constants.DT_SHARED_FILE):
11973 file_driver, file_path = instance.disks[0].logical_id
11974 file_path = os.path.dirname(file_path)
11976 file_driver = file_path = None
11977 disk_idx_base = len(instance.disks)
11978 new_disk = _GenerateDiskTemplate(self,
11979 instance.disk_template,
11980 instance.name, instance.primary_node,
11981 instance.secondary_nodes,
11987 self.diskparams)[0]
11988 instance.disks.append(new_disk)
11989 info = _GetInstanceInfoText(instance)
11991 logging.info("Creating volume %s for instance %s",
11992 new_disk.iv_name, instance.name)
11993 # Note: this needs to be kept in sync with _CreateDisks
11995 for node in instance.all_nodes:
11996 f_create = node == instance.primary_node
11998 _CreateBlockDev(self, node, instance, new_disk,
11999 f_create, info, f_create)
12000 except errors.OpExecError, err:
12001 self.LogWarning("Failed to create volume %s (%s) on"
12003 new_disk.iv_name, new_disk, node, err)
12004 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12005 (new_disk.size, new_disk.mode)))
12007 # change a given disk
12008 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12009 result.append(("disk.mode/%d" % disk_op,
12010 disk_dict[constants.IDISK_MODE]))
12012 if self.op.disk_template:
12014 check_nodes = set(instance.all_nodes)
12015 if self.op.remote_node:
12016 check_nodes.add(self.op.remote_node)
12017 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12018 owned = self.owned_locks(level)
12019 assert not (check_nodes - owned), \
12020 ("Not owning the correct locks, owning %r, expected at least %r" %
12021 (owned, check_nodes))
12023 r_shut = _ShutdownInstanceDisks(self, instance)
12025 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12026 " proceed with disk template conversion")
12027 mode = (instance.disk_template, self.op.disk_template)
12029 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12031 self.cfg.ReleaseDRBDMinors(instance.name)
12033 result.append(("disk_template", self.op.disk_template))
12035 assert instance.disk_template == self.op.disk_template, \
12036 ("Expected disk template '%s', found '%s'" %
12037 (self.op.disk_template, instance.disk_template))
12039 # Release node and resource locks if there are any (they might already have
12040 # been released during disk conversion)
12041 _ReleaseLocks(self, locking.LEVEL_NODE)
12042 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12045 for nic_op, nic_dict in self.op.nics:
12046 if nic_op == constants.DDM_REMOVE:
12047 # remove the last nic
12048 del instance.nics[-1]
12049 result.append(("nic.%d" % len(instance.nics), "remove"))
12050 elif nic_op == constants.DDM_ADD:
12051 # mac and bridge should be set, by now
12052 mac = nic_dict[constants.INIC_MAC]
12053 ip = nic_dict.get(constants.INIC_IP, None)
12054 nicparams = self.nic_pinst[constants.DDM_ADD]
12055 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12056 instance.nics.append(new_nic)
12057 result.append(("nic.%d" % (len(instance.nics) - 1),
12058 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12059 (new_nic.mac, new_nic.ip,
12060 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12061 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12064 for key in (constants.INIC_MAC, constants.INIC_IP):
12065 if key in nic_dict:
12066 setattr(instance.nics[nic_op], key, nic_dict[key])
12067 if nic_op in self.nic_pinst:
12068 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12069 for key, val in nic_dict.iteritems():
12070 result.append(("nic.%s/%d" % (key, nic_op), val))
12073 if self.op.hvparams:
12074 instance.hvparams = self.hv_inst
12075 for key, val in self.op.hvparams.iteritems():
12076 result.append(("hv/%s" % key, val))
12079 if self.op.beparams:
12080 instance.beparams = self.be_inst
12081 for key, val in self.op.beparams.iteritems():
12082 result.append(("be/%s" % key, val))
12085 if self.op.os_name:
12086 instance.os = self.op.os_name
12089 if self.op.osparams:
12090 instance.osparams = self.os_inst
12091 for key, val in self.op.osparams.iteritems():
12092 result.append(("os/%s" % key, val))
12094 # online/offline instance
12095 if self.op.online_inst:
12096 self.cfg.MarkInstanceDown(instance.name)
12097 result.append(("admin_state", constants.ADMINST_DOWN))
12098 if self.op.offline_inst:
12099 self.cfg.MarkInstanceOffline(instance.name)
12100 result.append(("admin_state", constants.ADMINST_OFFLINE))
12102 self.cfg.Update(instance, feedback_fn)
12104 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12105 self.owned_locks(locking.LEVEL_NODE)), \
12106 "All node locks should have been released by now"
12110 _DISK_CONVERSIONS = {
12111 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12112 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12116 class LUInstanceChangeGroup(LogicalUnit):
12117 HPATH = "instance-change-group"
12118 HTYPE = constants.HTYPE_INSTANCE
12121 def ExpandNames(self):
12122 self.share_locks = _ShareAll()
12123 self.needed_locks = {
12124 locking.LEVEL_NODEGROUP: [],
12125 locking.LEVEL_NODE: [],
12128 self._ExpandAndLockInstance()
12130 if self.op.target_groups:
12131 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12132 self.op.target_groups)
12134 self.req_target_uuids = None
12136 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12138 def DeclareLocks(self, level):
12139 if level == locking.LEVEL_NODEGROUP:
12140 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12142 if self.req_target_uuids:
12143 lock_groups = set(self.req_target_uuids)
12145 # Lock all groups used by instance optimistically; this requires going
12146 # via the node before it's locked, requiring verification later on
12147 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12148 lock_groups.update(instance_groups)
12150 # No target groups, need to lock all of them
12151 lock_groups = locking.ALL_SET
12153 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12155 elif level == locking.LEVEL_NODE:
12156 if self.req_target_uuids:
12157 # Lock all nodes used by instances
12158 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12159 self._LockInstancesNodes()
12161 # Lock all nodes in all potential target groups
12162 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12163 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12164 member_nodes = [node_name
12165 for group in lock_groups
12166 for node_name in self.cfg.GetNodeGroup(group).members]
12167 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12169 # Lock all nodes as all groups are potential targets
12170 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12172 def CheckPrereq(self):
12173 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12174 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12175 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12177 assert (self.req_target_uuids is None or
12178 owned_groups.issuperset(self.req_target_uuids))
12179 assert owned_instances == set([self.op.instance_name])
12181 # Get instance information
12182 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12184 # Check if node groups for locked instance are still correct
12185 assert owned_nodes.issuperset(self.instance.all_nodes), \
12186 ("Instance %s's nodes changed while we kept the lock" %
12187 self.op.instance_name)
12189 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12192 if self.req_target_uuids:
12193 # User requested specific target groups
12194 self.target_uuids = self.req_target_uuids
12196 # All groups except those used by the instance are potential targets
12197 self.target_uuids = owned_groups - inst_groups
12199 conflicting_groups = self.target_uuids & inst_groups
12200 if conflicting_groups:
12201 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12202 " used by the instance '%s'" %
12203 (utils.CommaJoin(conflicting_groups),
12204 self.op.instance_name),
12205 errors.ECODE_INVAL)
12207 if not self.target_uuids:
12208 raise errors.OpPrereqError("There are no possible target groups",
12209 errors.ECODE_INVAL)
12211 def BuildHooksEnv(self):
12212 """Build hooks env.
12215 assert self.target_uuids
12218 "TARGET_GROUPS": " ".join(self.target_uuids),
12221 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12225 def BuildHooksNodes(self):
12226 """Build hooks nodes.
12229 mn = self.cfg.GetMasterNode()
12230 return ([mn], [mn])
12232 def Exec(self, feedback_fn):
12233 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12235 assert instances == [self.op.instance_name], "Instance not locked"
12237 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12238 instances=instances, target_groups=list(self.target_uuids))
12240 ial.Run(self.op.iallocator)
12242 if not ial.success:
12243 raise errors.OpPrereqError("Can't compute solution for changing group of"
12244 " instance '%s' using iallocator '%s': %s" %
12245 (self.op.instance_name, self.op.iallocator,
12247 errors.ECODE_NORES)
12249 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12251 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12252 " instance '%s'", len(jobs), self.op.instance_name)
12254 return ResultWithJobs(jobs)
12257 class LUBackupQuery(NoHooksLU):
12258 """Query the exports list
12263 def ExpandNames(self):
12264 self.needed_locks = {}
12265 self.share_locks[locking.LEVEL_NODE] = 1
12266 if not self.op.nodes:
12267 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12269 self.needed_locks[locking.LEVEL_NODE] = \
12270 _GetWantedNodes(self, self.op.nodes)
12272 def Exec(self, feedback_fn):
12273 """Compute the list of all the exported system images.
12276 @return: a dictionary with the structure node->(export-list)
12277 where export-list is a list of the instances exported on
12281 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12282 rpcresult = self.rpc.call_export_list(self.nodes)
12284 for node in rpcresult:
12285 if rpcresult[node].fail_msg:
12286 result[node] = False
12288 result[node] = rpcresult[node].payload
12293 class LUBackupPrepare(NoHooksLU):
12294 """Prepares an instance for an export and returns useful information.
12299 def ExpandNames(self):
12300 self._ExpandAndLockInstance()
12302 def CheckPrereq(self):
12303 """Check prerequisites.
12306 instance_name = self.op.instance_name
12308 self.instance = self.cfg.GetInstanceInfo(instance_name)
12309 assert self.instance is not None, \
12310 "Cannot retrieve locked instance %s" % self.op.instance_name
12311 _CheckNodeOnline(self, self.instance.primary_node)
12313 self._cds = _GetClusterDomainSecret()
12315 def Exec(self, feedback_fn):
12316 """Prepares an instance for an export.
12319 instance = self.instance
12321 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12322 salt = utils.GenerateSecret(8)
12324 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12325 result = self.rpc.call_x509_cert_create(instance.primary_node,
12326 constants.RIE_CERT_VALIDITY)
12327 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12329 (name, cert_pem) = result.payload
12331 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12335 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12336 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12338 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12344 class LUBackupExport(LogicalUnit):
12345 """Export an instance to an image in the cluster.
12348 HPATH = "instance-export"
12349 HTYPE = constants.HTYPE_INSTANCE
12352 def CheckArguments(self):
12353 """Check the arguments.
12356 self.x509_key_name = self.op.x509_key_name
12357 self.dest_x509_ca_pem = self.op.destination_x509_ca
12359 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12360 if not self.x509_key_name:
12361 raise errors.OpPrereqError("Missing X509 key name for encryption",
12362 errors.ECODE_INVAL)
12364 if not self.dest_x509_ca_pem:
12365 raise errors.OpPrereqError("Missing destination X509 CA",
12366 errors.ECODE_INVAL)
12368 def ExpandNames(self):
12369 self._ExpandAndLockInstance()
12371 # Lock all nodes for local exports
12372 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12373 # FIXME: lock only instance primary and destination node
12375 # Sad but true, for now we have do lock all nodes, as we don't know where
12376 # the previous export might be, and in this LU we search for it and
12377 # remove it from its current node. In the future we could fix this by:
12378 # - making a tasklet to search (share-lock all), then create the
12379 # new one, then one to remove, after
12380 # - removing the removal operation altogether
12381 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12383 def DeclareLocks(self, level):
12384 """Last minute lock declaration."""
12385 # All nodes are locked anyway, so nothing to do here.
12387 def BuildHooksEnv(self):
12388 """Build hooks env.
12390 This will run on the master, primary node and target node.
12394 "EXPORT_MODE": self.op.mode,
12395 "EXPORT_NODE": self.op.target_node,
12396 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12397 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12398 # TODO: Generic function for boolean env variables
12399 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12402 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12406 def BuildHooksNodes(self):
12407 """Build hooks nodes.
12410 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12412 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12413 nl.append(self.op.target_node)
12417 def CheckPrereq(self):
12418 """Check prerequisites.
12420 This checks that the instance and node names are valid.
12423 instance_name = self.op.instance_name
12425 self.instance = self.cfg.GetInstanceInfo(instance_name)
12426 assert self.instance is not None, \
12427 "Cannot retrieve locked instance %s" % self.op.instance_name
12428 _CheckNodeOnline(self, self.instance.primary_node)
12430 if (self.op.remove_instance and
12431 self.instance.admin_state == constants.ADMINST_UP and
12432 not self.op.shutdown):
12433 raise errors.OpPrereqError("Can not remove instance without shutting it"
12436 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12437 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12438 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12439 assert self.dst_node is not None
12441 _CheckNodeOnline(self, self.dst_node.name)
12442 _CheckNodeNotDrained(self, self.dst_node.name)
12445 self.dest_disk_info = None
12446 self.dest_x509_ca = None
12448 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12449 self.dst_node = None
12451 if len(self.op.target_node) != len(self.instance.disks):
12452 raise errors.OpPrereqError(("Received destination information for %s"
12453 " disks, but instance %s has %s disks") %
12454 (len(self.op.target_node), instance_name,
12455 len(self.instance.disks)),
12456 errors.ECODE_INVAL)
12458 cds = _GetClusterDomainSecret()
12460 # Check X509 key name
12462 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12463 except (TypeError, ValueError), err:
12464 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12466 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12467 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12468 errors.ECODE_INVAL)
12470 # Load and verify CA
12472 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12473 except OpenSSL.crypto.Error, err:
12474 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12475 (err, ), errors.ECODE_INVAL)
12477 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12478 if errcode is not None:
12479 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12480 (msg, ), errors.ECODE_INVAL)
12482 self.dest_x509_ca = cert
12484 # Verify target information
12486 for idx, disk_data in enumerate(self.op.target_node):
12488 (host, port, magic) = \
12489 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12490 except errors.GenericError, err:
12491 raise errors.OpPrereqError("Target info for disk %s: %s" %
12492 (idx, err), errors.ECODE_INVAL)
12494 disk_info.append((host, port, magic))
12496 assert len(disk_info) == len(self.op.target_node)
12497 self.dest_disk_info = disk_info
12500 raise errors.ProgrammerError("Unhandled export mode %r" %
12503 # instance disk type verification
12504 # TODO: Implement export support for file-based disks
12505 for disk in self.instance.disks:
12506 if disk.dev_type == constants.LD_FILE:
12507 raise errors.OpPrereqError("Export not supported for instances with"
12508 " file-based disks", errors.ECODE_INVAL)
12510 def _CleanupExports(self, feedback_fn):
12511 """Removes exports of current instance from all other nodes.
12513 If an instance in a cluster with nodes A..D was exported to node C, its
12514 exports will be removed from the nodes A, B and D.
12517 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12519 nodelist = self.cfg.GetNodeList()
12520 nodelist.remove(self.dst_node.name)
12522 # on one-node clusters nodelist will be empty after the removal
12523 # if we proceed the backup would be removed because OpBackupQuery
12524 # substitutes an empty list with the full cluster node list.
12525 iname = self.instance.name
12527 feedback_fn("Removing old exports for instance %s" % iname)
12528 exportlist = self.rpc.call_export_list(nodelist)
12529 for node in exportlist:
12530 if exportlist[node].fail_msg:
12532 if iname in exportlist[node].payload:
12533 msg = self.rpc.call_export_remove(node, iname).fail_msg
12535 self.LogWarning("Could not remove older export for instance %s"
12536 " on node %s: %s", iname, node, msg)
12538 def Exec(self, feedback_fn):
12539 """Export an instance to an image in the cluster.
12542 assert self.op.mode in constants.EXPORT_MODES
12544 instance = self.instance
12545 src_node = instance.primary_node
12547 if self.op.shutdown:
12548 # shutdown the instance, but not the disks
12549 feedback_fn("Shutting down instance %s" % instance.name)
12550 result = self.rpc.call_instance_shutdown(src_node, instance,
12551 self.op.shutdown_timeout)
12552 # TODO: Maybe ignore failures if ignore_remove_failures is set
12553 result.Raise("Could not shutdown instance %s on"
12554 " node %s" % (instance.name, src_node))
12556 # set the disks ID correctly since call_instance_start needs the
12557 # correct drbd minor to create the symlinks
12558 for disk in instance.disks:
12559 self.cfg.SetDiskID(disk, src_node)
12561 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12564 # Activate the instance disks if we'exporting a stopped instance
12565 feedback_fn("Activating disks for %s" % instance.name)
12566 _StartInstanceDisks(self, instance, None)
12569 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12572 helper.CreateSnapshots()
12574 if (self.op.shutdown and
12575 instance.admin_state == constants.ADMINST_UP and
12576 not self.op.remove_instance):
12577 assert not activate_disks
12578 feedback_fn("Starting instance %s" % instance.name)
12579 result = self.rpc.call_instance_start(src_node,
12580 (instance, None, None), False)
12581 msg = result.fail_msg
12583 feedback_fn("Failed to start instance: %s" % msg)
12584 _ShutdownInstanceDisks(self, instance)
12585 raise errors.OpExecError("Could not start instance: %s" % msg)
12587 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12588 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12589 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12590 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12591 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12593 (key_name, _, _) = self.x509_key_name
12596 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12599 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12600 key_name, dest_ca_pem,
12605 # Check for backwards compatibility
12606 assert len(dresults) == len(instance.disks)
12607 assert compat.all(isinstance(i, bool) for i in dresults), \
12608 "Not all results are boolean: %r" % dresults
12612 feedback_fn("Deactivating disks for %s" % instance.name)
12613 _ShutdownInstanceDisks(self, instance)
12615 if not (compat.all(dresults) and fin_resu):
12618 failures.append("export finalization")
12619 if not compat.all(dresults):
12620 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12622 failures.append("disk export: disk(s) %s" % fdsk)
12624 raise errors.OpExecError("Export failed, errors in %s" %
12625 utils.CommaJoin(failures))
12627 # At this point, the export was successful, we can cleanup/finish
12629 # Remove instance if requested
12630 if self.op.remove_instance:
12631 feedback_fn("Removing instance %s" % instance.name)
12632 _RemoveInstance(self, feedback_fn, instance,
12633 self.op.ignore_remove_failures)
12635 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12636 self._CleanupExports(feedback_fn)
12638 return fin_resu, dresults
12641 class LUBackupRemove(NoHooksLU):
12642 """Remove exports related to the named instance.
12647 def ExpandNames(self):
12648 self.needed_locks = {}
12649 # We need all nodes to be locked in order for RemoveExport to work, but we
12650 # don't need to lock the instance itself, as nothing will happen to it (and
12651 # we can remove exports also for a removed instance)
12652 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12654 def Exec(self, feedback_fn):
12655 """Remove any export.
12658 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12659 # If the instance was not found we'll try with the name that was passed in.
12660 # This will only work if it was an FQDN, though.
12662 if not instance_name:
12664 instance_name = self.op.instance_name
12666 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12667 exportlist = self.rpc.call_export_list(locked_nodes)
12669 for node in exportlist:
12670 msg = exportlist[node].fail_msg
12672 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12674 if instance_name in exportlist[node].payload:
12676 result = self.rpc.call_export_remove(node, instance_name)
12677 msg = result.fail_msg
12679 logging.error("Could not remove export for instance %s"
12680 " on node %s: %s", instance_name, node, msg)
12682 if fqdn_warn and not found:
12683 feedback_fn("Export not found. If trying to remove an export belonging"
12684 " to a deleted instance please use its Fully Qualified"
12688 class LUGroupAdd(LogicalUnit):
12689 """Logical unit for creating node groups.
12692 HPATH = "group-add"
12693 HTYPE = constants.HTYPE_GROUP
12696 def ExpandNames(self):
12697 # We need the new group's UUID here so that we can create and acquire the
12698 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12699 # that it should not check whether the UUID exists in the configuration.
12700 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12701 self.needed_locks = {}
12702 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12704 def CheckPrereq(self):
12705 """Check prerequisites.
12707 This checks that the given group name is not an existing node group
12712 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12713 except errors.OpPrereqError:
12716 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12717 " node group (UUID: %s)" %
12718 (self.op.group_name, existing_uuid),
12719 errors.ECODE_EXISTS)
12721 if self.op.ndparams:
12722 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12724 if self.op.diskparams:
12725 for templ in constants.DISK_TEMPLATES:
12726 if templ not in self.op.diskparams:
12727 self.op.diskparams[templ] = {}
12728 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12730 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12732 def BuildHooksEnv(self):
12733 """Build hooks env.
12737 "GROUP_NAME": self.op.group_name,
12740 def BuildHooksNodes(self):
12741 """Build hooks nodes.
12744 mn = self.cfg.GetMasterNode()
12745 return ([mn], [mn])
12747 def Exec(self, feedback_fn):
12748 """Add the node group to the cluster.
12751 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12752 uuid=self.group_uuid,
12753 alloc_policy=self.op.alloc_policy,
12754 ndparams=self.op.ndparams,
12755 diskparams=self.op.diskparams)
12757 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12758 del self.remove_locks[locking.LEVEL_NODEGROUP]
12761 class LUGroupAssignNodes(NoHooksLU):
12762 """Logical unit for assigning nodes to groups.
12767 def ExpandNames(self):
12768 # These raise errors.OpPrereqError on their own:
12769 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12770 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12772 # We want to lock all the affected nodes and groups. We have readily
12773 # available the list of nodes, and the *destination* group. To gather the
12774 # list of "source" groups, we need to fetch node information later on.
12775 self.needed_locks = {
12776 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12777 locking.LEVEL_NODE: self.op.nodes,
12780 def DeclareLocks(self, level):
12781 if level == locking.LEVEL_NODEGROUP:
12782 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12784 # Try to get all affected nodes' groups without having the group or node
12785 # lock yet. Needs verification later in the code flow.
12786 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12788 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12790 def CheckPrereq(self):
12791 """Check prerequisites.
12794 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12795 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12796 frozenset(self.op.nodes))
12798 expected_locks = (set([self.group_uuid]) |
12799 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12800 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12801 if actual_locks != expected_locks:
12802 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12803 " current groups are '%s', used to be '%s'" %
12804 (utils.CommaJoin(expected_locks),
12805 utils.CommaJoin(actual_locks)))
12807 self.node_data = self.cfg.GetAllNodesInfo()
12808 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12809 instance_data = self.cfg.GetAllInstancesInfo()
12811 if self.group is None:
12812 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12813 (self.op.group_name, self.group_uuid))
12815 (new_splits, previous_splits) = \
12816 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12817 for node in self.op.nodes],
12818 self.node_data, instance_data)
12821 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12823 if not self.op.force:
12824 raise errors.OpExecError("The following instances get split by this"
12825 " change and --force was not given: %s" %
12828 self.LogWarning("This operation will split the following instances: %s",
12831 if previous_splits:
12832 self.LogWarning("In addition, these already-split instances continue"
12833 " to be split across groups: %s",
12834 utils.CommaJoin(utils.NiceSort(previous_splits)))
12836 def Exec(self, feedback_fn):
12837 """Assign nodes to a new group.
12840 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12842 self.cfg.AssignGroupNodes(mods)
12845 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12846 """Check for split instances after a node assignment.
12848 This method considers a series of node assignments as an atomic operation,
12849 and returns information about split instances after applying the set of
12852 In particular, it returns information about newly split instances, and
12853 instances that were already split, and remain so after the change.
12855 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12858 @type changes: list of (node_name, new_group_uuid) pairs.
12859 @param changes: list of node assignments to consider.
12860 @param node_data: a dict with data for all nodes
12861 @param instance_data: a dict with all instances to consider
12862 @rtype: a two-tuple
12863 @return: a list of instances that were previously okay and result split as a
12864 consequence of this change, and a list of instances that were previously
12865 split and this change does not fix.
12868 changed_nodes = dict((node, group) for node, group in changes
12869 if node_data[node].group != group)
12871 all_split_instances = set()
12872 previously_split_instances = set()
12874 def InstanceNodes(instance):
12875 return [instance.primary_node] + list(instance.secondary_nodes)
12877 for inst in instance_data.values():
12878 if inst.disk_template not in constants.DTS_INT_MIRROR:
12881 instance_nodes = InstanceNodes(inst)
12883 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12884 previously_split_instances.add(inst.name)
12886 if len(set(changed_nodes.get(node, node_data[node].group)
12887 for node in instance_nodes)) > 1:
12888 all_split_instances.add(inst.name)
12890 return (list(all_split_instances - previously_split_instances),
12891 list(previously_split_instances & all_split_instances))
12894 class _GroupQuery(_QueryBase):
12895 FIELDS = query.GROUP_FIELDS
12897 def ExpandNames(self, lu):
12898 lu.needed_locks = {}
12900 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12901 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12904 self.wanted = [name_to_uuid[name]
12905 for name in utils.NiceSort(name_to_uuid.keys())]
12907 # Accept names to be either names or UUIDs.
12910 all_uuid = frozenset(self._all_groups.keys())
12912 for name in self.names:
12913 if name in all_uuid:
12914 self.wanted.append(name)
12915 elif name in name_to_uuid:
12916 self.wanted.append(name_to_uuid[name])
12918 missing.append(name)
12921 raise errors.OpPrereqError("Some groups do not exist: %s" %
12922 utils.CommaJoin(missing),
12923 errors.ECODE_NOENT)
12925 def DeclareLocks(self, lu, level):
12928 def _GetQueryData(self, lu):
12929 """Computes the list of node groups and their attributes.
12932 do_nodes = query.GQ_NODE in self.requested_data
12933 do_instances = query.GQ_INST in self.requested_data
12935 group_to_nodes = None
12936 group_to_instances = None
12938 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12939 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12940 # latter GetAllInstancesInfo() is not enough, for we have to go through
12941 # instance->node. Hence, we will need to process nodes even if we only need
12942 # instance information.
12943 if do_nodes or do_instances:
12944 all_nodes = lu.cfg.GetAllNodesInfo()
12945 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12948 for node in all_nodes.values():
12949 if node.group in group_to_nodes:
12950 group_to_nodes[node.group].append(node.name)
12951 node_to_group[node.name] = node.group
12954 all_instances = lu.cfg.GetAllInstancesInfo()
12955 group_to_instances = dict((uuid, []) for uuid in self.wanted)
12957 for instance in all_instances.values():
12958 node = instance.primary_node
12959 if node in node_to_group:
12960 group_to_instances[node_to_group[node]].append(instance.name)
12963 # Do not pass on node information if it was not requested.
12964 group_to_nodes = None
12966 return query.GroupQueryData([self._all_groups[uuid]
12967 for uuid in self.wanted],
12968 group_to_nodes, group_to_instances)
12971 class LUGroupQuery(NoHooksLU):
12972 """Logical unit for querying node groups.
12977 def CheckArguments(self):
12978 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12979 self.op.output_fields, False)
12981 def ExpandNames(self):
12982 self.gq.ExpandNames(self)
12984 def DeclareLocks(self, level):
12985 self.gq.DeclareLocks(self, level)
12987 def Exec(self, feedback_fn):
12988 return self.gq.OldStyleQuery(self)
12991 class LUGroupSetParams(LogicalUnit):
12992 """Modifies the parameters of a node group.
12995 HPATH = "group-modify"
12996 HTYPE = constants.HTYPE_GROUP
12999 def CheckArguments(self):
13002 self.op.diskparams,
13003 self.op.alloc_policy,
13008 if all_changes.count(None) == len(all_changes):
13009 raise errors.OpPrereqError("Please pass at least one modification",
13010 errors.ECODE_INVAL)
13012 def ExpandNames(self):
13013 # This raises errors.OpPrereqError on its own:
13014 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13016 self.needed_locks = {
13017 locking.LEVEL_NODEGROUP: [self.group_uuid],
13020 def CheckPrereq(self):
13021 """Check prerequisites.
13024 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13026 if self.group is None:
13027 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13028 (self.op.group_name, self.group_uuid))
13030 if self.op.ndparams:
13031 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13032 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13033 self.new_ndparams = new_ndparams
13035 if self.op.diskparams:
13036 self.new_diskparams = dict()
13037 for templ in constants.DISK_TEMPLATES:
13038 if templ not in self.op.diskparams:
13039 self.op.diskparams[templ] = {}
13040 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13041 self.op.diskparams[templ])
13042 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13043 self.new_diskparams[templ] = new_templ_params
13045 if self.op.hv_state:
13046 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13047 self.group.hv_state_static)
13049 if self.op.disk_state:
13050 self.new_disk_state = \
13051 _MergeAndVerifyDiskState(self.op.disk_state,
13052 self.group.disk_state_static)
13054 def BuildHooksEnv(self):
13055 """Build hooks env.
13059 "GROUP_NAME": self.op.group_name,
13060 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13063 def BuildHooksNodes(self):
13064 """Build hooks nodes.
13067 mn = self.cfg.GetMasterNode()
13068 return ([mn], [mn])
13070 def Exec(self, feedback_fn):
13071 """Modifies the node group.
13076 if self.op.ndparams:
13077 self.group.ndparams = self.new_ndparams
13078 result.append(("ndparams", str(self.group.ndparams)))
13080 if self.op.diskparams:
13081 self.group.diskparams = self.new_diskparams
13082 result.append(("diskparams", str(self.group.diskparams)))
13084 if self.op.alloc_policy:
13085 self.group.alloc_policy = self.op.alloc_policy
13087 if self.op.hv_state:
13088 self.group.hv_state_static = self.new_hv_state
13090 if self.op.disk_state:
13091 self.group.disk_state_static = self.new_disk_state
13093 self.cfg.Update(self.group, feedback_fn)
13097 class LUGroupRemove(LogicalUnit):
13098 HPATH = "group-remove"
13099 HTYPE = constants.HTYPE_GROUP
13102 def ExpandNames(self):
13103 # This will raises errors.OpPrereqError on its own:
13104 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13105 self.needed_locks = {
13106 locking.LEVEL_NODEGROUP: [self.group_uuid],
13109 def CheckPrereq(self):
13110 """Check prerequisites.
13112 This checks that the given group name exists as a node group, that is
13113 empty (i.e., contains no nodes), and that is not the last group of the
13117 # Verify that the group is empty.
13118 group_nodes = [node.name
13119 for node in self.cfg.GetAllNodesInfo().values()
13120 if node.group == self.group_uuid]
13123 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13125 (self.op.group_name,
13126 utils.CommaJoin(utils.NiceSort(group_nodes))),
13127 errors.ECODE_STATE)
13129 # Verify the cluster would not be left group-less.
13130 if len(self.cfg.GetNodeGroupList()) == 1:
13131 raise errors.OpPrereqError("Group '%s' is the only group,"
13132 " cannot be removed" %
13133 self.op.group_name,
13134 errors.ECODE_STATE)
13136 def BuildHooksEnv(self):
13137 """Build hooks env.
13141 "GROUP_NAME": self.op.group_name,
13144 def BuildHooksNodes(self):
13145 """Build hooks nodes.
13148 mn = self.cfg.GetMasterNode()
13149 return ([mn], [mn])
13151 def Exec(self, feedback_fn):
13152 """Remove the node group.
13156 self.cfg.RemoveNodeGroup(self.group_uuid)
13157 except errors.ConfigurationError:
13158 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13159 (self.op.group_name, self.group_uuid))
13161 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13164 class LUGroupRename(LogicalUnit):
13165 HPATH = "group-rename"
13166 HTYPE = constants.HTYPE_GROUP
13169 def ExpandNames(self):
13170 # This raises errors.OpPrereqError on its own:
13171 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13173 self.needed_locks = {
13174 locking.LEVEL_NODEGROUP: [self.group_uuid],
13177 def CheckPrereq(self):
13178 """Check prerequisites.
13180 Ensures requested new name is not yet used.
13184 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13185 except errors.OpPrereqError:
13188 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13189 " node group (UUID: %s)" %
13190 (self.op.new_name, new_name_uuid),
13191 errors.ECODE_EXISTS)
13193 def BuildHooksEnv(self):
13194 """Build hooks env.
13198 "OLD_NAME": self.op.group_name,
13199 "NEW_NAME": self.op.new_name,
13202 def BuildHooksNodes(self):
13203 """Build hooks nodes.
13206 mn = self.cfg.GetMasterNode()
13208 all_nodes = self.cfg.GetAllNodesInfo()
13209 all_nodes.pop(mn, None)
13212 run_nodes.extend(node.name for node in all_nodes.values()
13213 if node.group == self.group_uuid)
13215 return (run_nodes, run_nodes)
13217 def Exec(self, feedback_fn):
13218 """Rename the node group.
13221 group = self.cfg.GetNodeGroup(self.group_uuid)
13224 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13225 (self.op.group_name, self.group_uuid))
13227 group.name = self.op.new_name
13228 self.cfg.Update(group, feedback_fn)
13230 return self.op.new_name
13233 class LUGroupEvacuate(LogicalUnit):
13234 HPATH = "group-evacuate"
13235 HTYPE = constants.HTYPE_GROUP
13238 def ExpandNames(self):
13239 # This raises errors.OpPrereqError on its own:
13240 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13242 if self.op.target_groups:
13243 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13244 self.op.target_groups)
13246 self.req_target_uuids = []
13248 if self.group_uuid in self.req_target_uuids:
13249 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13250 " as a target group (targets are %s)" %
13252 utils.CommaJoin(self.req_target_uuids)),
13253 errors.ECODE_INVAL)
13255 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13257 self.share_locks = _ShareAll()
13258 self.needed_locks = {
13259 locking.LEVEL_INSTANCE: [],
13260 locking.LEVEL_NODEGROUP: [],
13261 locking.LEVEL_NODE: [],
13264 def DeclareLocks(self, level):
13265 if level == locking.LEVEL_INSTANCE:
13266 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13268 # Lock instances optimistically, needs verification once node and group
13269 # locks have been acquired
13270 self.needed_locks[locking.LEVEL_INSTANCE] = \
13271 self.cfg.GetNodeGroupInstances(self.group_uuid)
13273 elif level == locking.LEVEL_NODEGROUP:
13274 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13276 if self.req_target_uuids:
13277 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13279 # Lock all groups used by instances optimistically; this requires going
13280 # via the node before it's locked, requiring verification later on
13281 lock_groups.update(group_uuid
13282 for instance_name in
13283 self.owned_locks(locking.LEVEL_INSTANCE)
13285 self.cfg.GetInstanceNodeGroups(instance_name))
13287 # No target groups, need to lock all of them
13288 lock_groups = locking.ALL_SET
13290 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13292 elif level == locking.LEVEL_NODE:
13293 # This will only lock the nodes in the group to be evacuated which
13294 # contain actual instances
13295 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13296 self._LockInstancesNodes()
13298 # Lock all nodes in group to be evacuated and target groups
13299 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13300 assert self.group_uuid in owned_groups
13301 member_nodes = [node_name
13302 for group in owned_groups
13303 for node_name in self.cfg.GetNodeGroup(group).members]
13304 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13306 def CheckPrereq(self):
13307 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13308 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13309 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13311 assert owned_groups.issuperset(self.req_target_uuids)
13312 assert self.group_uuid in owned_groups
13314 # Check if locked instances are still correct
13315 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13317 # Get instance information
13318 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13320 # Check if node groups for locked instances are still correct
13321 for instance_name in owned_instances:
13322 inst = self.instances[instance_name]
13323 assert owned_nodes.issuperset(inst.all_nodes), \
13324 "Instance %s's nodes changed while we kept the lock" % instance_name
13326 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13329 assert self.group_uuid in inst_groups, \
13330 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13332 if self.req_target_uuids:
13333 # User requested specific target groups
13334 self.target_uuids = self.req_target_uuids
13336 # All groups except the one to be evacuated are potential targets
13337 self.target_uuids = [group_uuid for group_uuid in owned_groups
13338 if group_uuid != self.group_uuid]
13340 if not self.target_uuids:
13341 raise errors.OpPrereqError("There are no possible target groups",
13342 errors.ECODE_INVAL)
13344 def BuildHooksEnv(self):
13345 """Build hooks env.
13349 "GROUP_NAME": self.op.group_name,
13350 "TARGET_GROUPS": " ".join(self.target_uuids),
13353 def BuildHooksNodes(self):
13354 """Build hooks nodes.
13357 mn = self.cfg.GetMasterNode()
13359 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13361 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13363 return (run_nodes, run_nodes)
13365 def Exec(self, feedback_fn):
13366 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13368 assert self.group_uuid not in self.target_uuids
13370 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13371 instances=instances, target_groups=self.target_uuids)
13373 ial.Run(self.op.iallocator)
13375 if not ial.success:
13376 raise errors.OpPrereqError("Can't compute group evacuation using"
13377 " iallocator '%s': %s" %
13378 (self.op.iallocator, ial.info),
13379 errors.ECODE_NORES)
13381 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13383 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13384 len(jobs), self.op.group_name)
13386 return ResultWithJobs(jobs)
13389 class TagsLU(NoHooksLU): # pylint: disable=W0223
13390 """Generic tags LU.
13392 This is an abstract class which is the parent of all the other tags LUs.
13395 def ExpandNames(self):
13396 self.group_uuid = None
13397 self.needed_locks = {}
13398 if self.op.kind == constants.TAG_NODE:
13399 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13400 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13401 elif self.op.kind == constants.TAG_INSTANCE:
13402 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13403 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13404 elif self.op.kind == constants.TAG_NODEGROUP:
13405 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13407 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13408 # not possible to acquire the BGL based on opcode parameters)
13410 def CheckPrereq(self):
13411 """Check prerequisites.
13414 if self.op.kind == constants.TAG_CLUSTER:
13415 self.target = self.cfg.GetClusterInfo()
13416 elif self.op.kind == constants.TAG_NODE:
13417 self.target = self.cfg.GetNodeInfo(self.op.name)
13418 elif self.op.kind == constants.TAG_INSTANCE:
13419 self.target = self.cfg.GetInstanceInfo(self.op.name)
13420 elif self.op.kind == constants.TAG_NODEGROUP:
13421 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13423 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13424 str(self.op.kind), errors.ECODE_INVAL)
13427 class LUTagsGet(TagsLU):
13428 """Returns the tags of a given object.
13433 def ExpandNames(self):
13434 TagsLU.ExpandNames(self)
13436 # Share locks as this is only a read operation
13437 self.share_locks = _ShareAll()
13439 def Exec(self, feedback_fn):
13440 """Returns the tag list.
13443 return list(self.target.GetTags())
13446 class LUTagsSearch(NoHooksLU):
13447 """Searches the tags for a given pattern.
13452 def ExpandNames(self):
13453 self.needed_locks = {}
13455 def CheckPrereq(self):
13456 """Check prerequisites.
13458 This checks the pattern passed for validity by compiling it.
13462 self.re = re.compile(self.op.pattern)
13463 except re.error, err:
13464 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13465 (self.op.pattern, err), errors.ECODE_INVAL)
13467 def Exec(self, feedback_fn):
13468 """Returns the tag list.
13472 tgts = [("/cluster", cfg.GetClusterInfo())]
13473 ilist = cfg.GetAllInstancesInfo().values()
13474 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13475 nlist = cfg.GetAllNodesInfo().values()
13476 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13477 tgts.extend(("/nodegroup/%s" % n.name, n)
13478 for n in cfg.GetAllNodeGroupsInfo().values())
13480 for path, target in tgts:
13481 for tag in target.GetTags():
13482 if self.re.search(tag):
13483 results.append((path, tag))
13487 class LUTagsSet(TagsLU):
13488 """Sets a tag on a given object.
13493 def CheckPrereq(self):
13494 """Check prerequisites.
13496 This checks the type and length of the tag name and value.
13499 TagsLU.CheckPrereq(self)
13500 for tag in self.op.tags:
13501 objects.TaggableObject.ValidateTag(tag)
13503 def Exec(self, feedback_fn):
13508 for tag in self.op.tags:
13509 self.target.AddTag(tag)
13510 except errors.TagError, err:
13511 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13512 self.cfg.Update(self.target, feedback_fn)
13515 class LUTagsDel(TagsLU):
13516 """Delete a list of tags from a given object.
13521 def CheckPrereq(self):
13522 """Check prerequisites.
13524 This checks that we have the given tag.
13527 TagsLU.CheckPrereq(self)
13528 for tag in self.op.tags:
13529 objects.TaggableObject.ValidateTag(tag)
13530 del_tags = frozenset(self.op.tags)
13531 cur_tags = self.target.GetTags()
13533 diff_tags = del_tags - cur_tags
13535 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13536 raise errors.OpPrereqError("Tag(s) %s not found" %
13537 (utils.CommaJoin(diff_names), ),
13538 errors.ECODE_NOENT)
13540 def Exec(self, feedback_fn):
13541 """Remove the tag from the object.
13544 for tag in self.op.tags:
13545 self.target.RemoveTag(tag)
13546 self.cfg.Update(self.target, feedback_fn)
13549 class LUTestDelay(NoHooksLU):
13550 """Sleep for a specified amount of time.
13552 This LU sleeps on the master and/or nodes for a specified amount of
13558 def ExpandNames(self):
13559 """Expand names and set required locks.
13561 This expands the node list, if any.
13564 self.needed_locks = {}
13565 if self.op.on_nodes:
13566 # _GetWantedNodes can be used here, but is not always appropriate to use
13567 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13568 # more information.
13569 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13570 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13572 def _TestDelay(self):
13573 """Do the actual sleep.
13576 if self.op.on_master:
13577 if not utils.TestDelay(self.op.duration):
13578 raise errors.OpExecError("Error during master delay test")
13579 if self.op.on_nodes:
13580 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13581 for node, node_result in result.items():
13582 node_result.Raise("Failure during rpc call to node %s" % node)
13584 def Exec(self, feedback_fn):
13585 """Execute the test delay opcode, with the wanted repetitions.
13588 if self.op.repeat == 0:
13591 top_value = self.op.repeat - 1
13592 for i in range(self.op.repeat):
13593 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13597 class LUTestJqueue(NoHooksLU):
13598 """Utility LU to test some aspects of the job queue.
13603 # Must be lower than default timeout for WaitForJobChange to see whether it
13604 # notices changed jobs
13605 _CLIENT_CONNECT_TIMEOUT = 20.0
13606 _CLIENT_CONFIRM_TIMEOUT = 60.0
13609 def _NotifyUsingSocket(cls, cb, errcls):
13610 """Opens a Unix socket and waits for another program to connect.
13613 @param cb: Callback to send socket name to client
13614 @type errcls: class
13615 @param errcls: Exception class to use for errors
13618 # Using a temporary directory as there's no easy way to create temporary
13619 # sockets without writing a custom loop around tempfile.mktemp and
13621 tmpdir = tempfile.mkdtemp()
13623 tmpsock = utils.PathJoin(tmpdir, "sock")
13625 logging.debug("Creating temporary socket at %s", tmpsock)
13626 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13631 # Send details to client
13634 # Wait for client to connect before continuing
13635 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13637 (conn, _) = sock.accept()
13638 except socket.error, err:
13639 raise errcls("Client didn't connect in time (%s)" % err)
13643 # Remove as soon as client is connected
13644 shutil.rmtree(tmpdir)
13646 # Wait for client to close
13649 # pylint: disable=E1101
13650 # Instance of '_socketobject' has no ... member
13651 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13653 except socket.error, err:
13654 raise errcls("Client failed to confirm notification (%s)" % err)
13658 def _SendNotification(self, test, arg, sockname):
13659 """Sends a notification to the client.
13662 @param test: Test name
13663 @param arg: Test argument (depends on test)
13664 @type sockname: string
13665 @param sockname: Socket path
13668 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13670 def _Notify(self, prereq, test, arg):
13671 """Notifies the client of a test.
13674 @param prereq: Whether this is a prereq-phase test
13676 @param test: Test name
13677 @param arg: Test argument (depends on test)
13681 errcls = errors.OpPrereqError
13683 errcls = errors.OpExecError
13685 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13689 def CheckArguments(self):
13690 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13691 self.expandnames_calls = 0
13693 def ExpandNames(self):
13694 checkargs_calls = getattr(self, "checkargs_calls", 0)
13695 if checkargs_calls < 1:
13696 raise errors.ProgrammerError("CheckArguments was not called")
13698 self.expandnames_calls += 1
13700 if self.op.notify_waitlock:
13701 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13703 self.LogInfo("Expanding names")
13705 # Get lock on master node (just to get a lock, not for a particular reason)
13706 self.needed_locks = {
13707 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13710 def Exec(self, feedback_fn):
13711 if self.expandnames_calls < 1:
13712 raise errors.ProgrammerError("ExpandNames was not called")
13714 if self.op.notify_exec:
13715 self._Notify(False, constants.JQT_EXEC, None)
13717 self.LogInfo("Executing")
13719 if self.op.log_messages:
13720 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13721 for idx, msg in enumerate(self.op.log_messages):
13722 self.LogInfo("Sending log message %s", idx + 1)
13723 feedback_fn(constants.JQT_MSGPREFIX + msg)
13724 # Report how many test messages have been sent
13725 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13728 raise errors.OpExecError("Opcode failure was requested")
13733 class IAllocator(object):
13734 """IAllocator framework.
13736 An IAllocator instance has three sets of attributes:
13737 - cfg that is needed to query the cluster
13738 - input data (all members of the _KEYS class attribute are required)
13739 - four buffer attributes (in|out_data|text), that represent the
13740 input (to the external script) in text and data structure format,
13741 and the output from it, again in two formats
13742 - the result variables from the script (success, info, nodes) for
13746 # pylint: disable=R0902
13747 # lots of instance attributes
13749 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13751 self.rpc = rpc_runner
13752 # init buffer variables
13753 self.in_text = self.out_text = self.in_data = self.out_data = None
13754 # init all input fields so that pylint is happy
13756 self.memory = self.disks = self.disk_template = None
13757 self.os = self.tags = self.nics = self.vcpus = None
13758 self.hypervisor = None
13759 self.relocate_from = None
13761 self.instances = None
13762 self.evac_mode = None
13763 self.target_groups = []
13765 self.required_nodes = None
13766 # init result fields
13767 self.success = self.info = self.result = None
13770 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13772 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13773 " IAllocator" % self.mode)
13775 keyset = [n for (n, _) in keydata]
13778 if key not in keyset:
13779 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13780 " IAllocator" % key)
13781 setattr(self, key, kwargs[key])
13784 if key not in kwargs:
13785 raise errors.ProgrammerError("Missing input parameter '%s' to"
13786 " IAllocator" % key)
13787 self._BuildInputData(compat.partial(fn, self), keydata)
13789 def _ComputeClusterData(self):
13790 """Compute the generic allocator input data.
13792 This is the data that is independent of the actual operation.
13796 cluster_info = cfg.GetClusterInfo()
13799 "version": constants.IALLOCATOR_VERSION,
13800 "cluster_name": cfg.GetClusterName(),
13801 "cluster_tags": list(cluster_info.GetTags()),
13802 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13803 # we don't have job IDs
13805 ninfo = cfg.GetAllNodesInfo()
13806 iinfo = cfg.GetAllInstancesInfo().values()
13807 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13810 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13812 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13813 hypervisor_name = self.hypervisor
13814 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13815 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13817 hypervisor_name = cluster_info.primary_hypervisor
13819 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13822 self.rpc.call_all_instances_info(node_list,
13823 cluster_info.enabled_hypervisors)
13825 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13827 config_ndata = self._ComputeBasicNodeData(ninfo)
13828 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13829 i_list, config_ndata)
13830 assert len(data["nodes"]) == len(ninfo), \
13831 "Incomplete node data computed"
13833 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13835 self.in_data = data
13838 def _ComputeNodeGroupData(cfg):
13839 """Compute node groups data.
13842 ng = dict((guuid, {
13843 "name": gdata.name,
13844 "alloc_policy": gdata.alloc_policy,
13846 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13851 def _ComputeBasicNodeData(node_cfg):
13852 """Compute global node data.
13855 @returns: a dict of name: (node dict, node config)
13858 # fill in static (config-based) values
13859 node_results = dict((ninfo.name, {
13860 "tags": list(ninfo.GetTags()),
13861 "primary_ip": ninfo.primary_ip,
13862 "secondary_ip": ninfo.secondary_ip,
13863 "offline": ninfo.offline,
13864 "drained": ninfo.drained,
13865 "master_candidate": ninfo.master_candidate,
13866 "group": ninfo.group,
13867 "master_capable": ninfo.master_capable,
13868 "vm_capable": ninfo.vm_capable,
13870 for ninfo in node_cfg.values())
13872 return node_results
13875 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13877 """Compute global node data.
13879 @param node_results: the basic node structures as filled from the config
13882 #TODO(dynmem): compute the right data on MAX and MIN memory
13883 # make a copy of the current dict
13884 node_results = dict(node_results)
13885 for nname, nresult in node_data.items():
13886 assert nname in node_results, "Missing basic data for node %s" % nname
13887 ninfo = node_cfg[nname]
13889 if not (ninfo.offline or ninfo.drained):
13890 nresult.Raise("Can't get data for node %s" % nname)
13891 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13893 remote_info = _MakeLegacyNodeInfo(nresult.payload)
13895 for attr in ["memory_total", "memory_free", "memory_dom0",
13896 "vg_size", "vg_free", "cpu_total"]:
13897 if attr not in remote_info:
13898 raise errors.OpExecError("Node '%s' didn't return attribute"
13899 " '%s'" % (nname, attr))
13900 if not isinstance(remote_info[attr], int):
13901 raise errors.OpExecError("Node '%s' returned invalid value"
13903 (nname, attr, remote_info[attr]))
13904 # compute memory used by primary instances
13905 i_p_mem = i_p_up_mem = 0
13906 for iinfo, beinfo in i_list:
13907 if iinfo.primary_node == nname:
13908 i_p_mem += beinfo[constants.BE_MAXMEM]
13909 if iinfo.name not in node_iinfo[nname].payload:
13912 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13913 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13914 remote_info["memory_free"] -= max(0, i_mem_diff)
13916 if iinfo.admin_state == constants.ADMINST_UP:
13917 i_p_up_mem += beinfo[constants.BE_MAXMEM]
13919 # compute memory used by instances
13921 "total_memory": remote_info["memory_total"],
13922 "reserved_memory": remote_info["memory_dom0"],
13923 "free_memory": remote_info["memory_free"],
13924 "total_disk": remote_info["vg_size"],
13925 "free_disk": remote_info["vg_free"],
13926 "total_cpus": remote_info["cpu_total"],
13927 "i_pri_memory": i_p_mem,
13928 "i_pri_up_memory": i_p_up_mem,
13930 pnr_dyn.update(node_results[nname])
13931 node_results[nname] = pnr_dyn
13933 return node_results
13936 def _ComputeInstanceData(cluster_info, i_list):
13937 """Compute global instance data.
13941 for iinfo, beinfo in i_list:
13943 for nic in iinfo.nics:
13944 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13948 "mode": filled_params[constants.NIC_MODE],
13949 "link": filled_params[constants.NIC_LINK],
13951 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13952 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13953 nic_data.append(nic_dict)
13955 "tags": list(iinfo.GetTags()),
13956 "admin_state": iinfo.admin_state,
13957 "vcpus": beinfo[constants.BE_VCPUS],
13958 "memory": beinfo[constants.BE_MAXMEM],
13960 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13962 "disks": [{constants.IDISK_SIZE: dsk.size,
13963 constants.IDISK_MODE: dsk.mode}
13964 for dsk in iinfo.disks],
13965 "disk_template": iinfo.disk_template,
13966 "hypervisor": iinfo.hypervisor,
13968 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13970 instance_data[iinfo.name] = pir
13972 return instance_data
13974 def _AddNewInstance(self):
13975 """Add new instance data to allocator structure.
13977 This in combination with _AllocatorGetClusterData will create the
13978 correct structure needed as input for the allocator.
13980 The checks for the completeness of the opcode must have already been
13984 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13986 if self.disk_template in constants.DTS_INT_MIRROR:
13987 self.required_nodes = 2
13989 self.required_nodes = 1
13993 "disk_template": self.disk_template,
13996 "vcpus": self.vcpus,
13997 "memory": self.memory,
13998 "disks": self.disks,
13999 "disk_space_total": disk_space,
14001 "required_nodes": self.required_nodes,
14002 "hypervisor": self.hypervisor,
14007 def _AddRelocateInstance(self):
14008 """Add relocate instance data to allocator structure.
14010 This in combination with _IAllocatorGetClusterData will create the
14011 correct structure needed as input for the allocator.
14013 The checks for the completeness of the opcode must have already been
14017 instance = self.cfg.GetInstanceInfo(self.name)
14018 if instance is None:
14019 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14020 " IAllocator" % self.name)
14022 if instance.disk_template not in constants.DTS_MIRRORED:
14023 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14024 errors.ECODE_INVAL)
14026 if instance.disk_template in constants.DTS_INT_MIRROR and \
14027 len(instance.secondary_nodes) != 1:
14028 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14029 errors.ECODE_STATE)
14031 self.required_nodes = 1
14032 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14033 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14037 "disk_space_total": disk_space,
14038 "required_nodes": self.required_nodes,
14039 "relocate_from": self.relocate_from,
14043 def _AddNodeEvacuate(self):
14044 """Get data for node-evacuate requests.
14048 "instances": self.instances,
14049 "evac_mode": self.evac_mode,
14052 def _AddChangeGroup(self):
14053 """Get data for node-evacuate requests.
14057 "instances": self.instances,
14058 "target_groups": self.target_groups,
14061 def _BuildInputData(self, fn, keydata):
14062 """Build input data structures.
14065 self._ComputeClusterData()
14068 request["type"] = self.mode
14069 for keyname, keytype in keydata:
14070 if keyname not in request:
14071 raise errors.ProgrammerError("Request parameter %s is missing" %
14073 val = request[keyname]
14074 if not keytype(val):
14075 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14076 " validation, value %s, expected"
14077 " type %s" % (keyname, val, keytype))
14078 self.in_data["request"] = request
14080 self.in_text = serializer.Dump(self.in_data)
14082 _STRING_LIST = ht.TListOf(ht.TString)
14083 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14084 # pylint: disable=E1101
14085 # Class '...' has no 'OP_ID' member
14086 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14087 opcodes.OpInstanceMigrate.OP_ID,
14088 opcodes.OpInstanceReplaceDisks.OP_ID])
14092 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14093 ht.TItems([ht.TNonEmptyString,
14094 ht.TNonEmptyString,
14095 ht.TListOf(ht.TNonEmptyString),
14098 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14099 ht.TItems([ht.TNonEmptyString,
14102 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14103 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14106 constants.IALLOCATOR_MODE_ALLOC:
14109 ("name", ht.TString),
14110 ("memory", ht.TInt),
14111 ("disks", ht.TListOf(ht.TDict)),
14112 ("disk_template", ht.TString),
14113 ("os", ht.TString),
14114 ("tags", _STRING_LIST),
14115 ("nics", ht.TListOf(ht.TDict)),
14116 ("vcpus", ht.TInt),
14117 ("hypervisor", ht.TString),
14119 constants.IALLOCATOR_MODE_RELOC:
14120 (_AddRelocateInstance,
14121 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14123 constants.IALLOCATOR_MODE_NODE_EVAC:
14124 (_AddNodeEvacuate, [
14125 ("instances", _STRING_LIST),
14126 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14128 constants.IALLOCATOR_MODE_CHG_GROUP:
14129 (_AddChangeGroup, [
14130 ("instances", _STRING_LIST),
14131 ("target_groups", _STRING_LIST),
14135 def Run(self, name, validate=True, call_fn=None):
14136 """Run an instance allocator and return the results.
14139 if call_fn is None:
14140 call_fn = self.rpc.call_iallocator_runner
14142 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14143 result.Raise("Failure while running the iallocator script")
14145 self.out_text = result.payload
14147 self._ValidateResult()
14149 def _ValidateResult(self):
14150 """Process the allocator results.
14152 This will process and if successful save the result in
14153 self.out_data and the other parameters.
14157 rdict = serializer.Load(self.out_text)
14158 except Exception, err:
14159 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14161 if not isinstance(rdict, dict):
14162 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14164 # TODO: remove backwards compatiblity in later versions
14165 if "nodes" in rdict and "result" not in rdict:
14166 rdict["result"] = rdict["nodes"]
14169 for key in "success", "info", "result":
14170 if key not in rdict:
14171 raise errors.OpExecError("Can't parse iallocator results:"
14172 " missing key '%s'" % key)
14173 setattr(self, key, rdict[key])
14175 if not self._result_check(self.result):
14176 raise errors.OpExecError("Iallocator returned invalid result,"
14177 " expected %s, got %s" %
14178 (self._result_check, self.result),
14179 errors.ECODE_INVAL)
14181 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14182 assert self.relocate_from is not None
14183 assert self.required_nodes == 1
14185 node2group = dict((name, ndata["group"])
14186 for (name, ndata) in self.in_data["nodes"].items())
14188 fn = compat.partial(self._NodesToGroups, node2group,
14189 self.in_data["nodegroups"])
14191 instance = self.cfg.GetInstanceInfo(self.name)
14192 request_groups = fn(self.relocate_from + [instance.primary_node])
14193 result_groups = fn(rdict["result"] + [instance.primary_node])
14195 if self.success and not set(result_groups).issubset(request_groups):
14196 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14197 " differ from original groups (%s)" %
14198 (utils.CommaJoin(result_groups),
14199 utils.CommaJoin(request_groups)))
14201 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14202 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14204 self.out_data = rdict
14207 def _NodesToGroups(node2group, groups, nodes):
14208 """Returns a list of unique group names for a list of nodes.
14210 @type node2group: dict
14211 @param node2group: Map from node name to group UUID
14213 @param groups: Group information
14215 @param nodes: Node names
14222 group_uuid = node2group[node]
14224 # Ignore unknown node
14228 group = groups[group_uuid]
14230 # Can't find group, let's use UUID
14231 group_name = group_uuid
14233 group_name = group["name"]
14235 result.add(group_name)
14237 return sorted(result)
14240 class LUTestAllocator(NoHooksLU):
14241 """Run allocator tests.
14243 This LU runs the allocator tests
14246 def CheckPrereq(self):
14247 """Check prerequisites.
14249 This checks the opcode parameters depending on the director and mode test.
14252 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14253 for attr in ["memory", "disks", "disk_template",
14254 "os", "tags", "nics", "vcpus"]:
14255 if not hasattr(self.op, attr):
14256 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14257 attr, errors.ECODE_INVAL)
14258 iname = self.cfg.ExpandInstanceName(self.op.name)
14259 if iname is not None:
14260 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14261 iname, errors.ECODE_EXISTS)
14262 if not isinstance(self.op.nics, list):
14263 raise errors.OpPrereqError("Invalid parameter 'nics'",
14264 errors.ECODE_INVAL)
14265 if not isinstance(self.op.disks, list):
14266 raise errors.OpPrereqError("Invalid parameter 'disks'",
14267 errors.ECODE_INVAL)
14268 for row in self.op.disks:
14269 if (not isinstance(row, dict) or
14270 constants.IDISK_SIZE not in row or
14271 not isinstance(row[constants.IDISK_SIZE], int) or
14272 constants.IDISK_MODE not in row or
14273 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14274 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14275 " parameter", errors.ECODE_INVAL)
14276 if self.op.hypervisor is None:
14277 self.op.hypervisor = self.cfg.GetHypervisorType()
14278 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14279 fname = _ExpandInstanceName(self.cfg, self.op.name)
14280 self.op.name = fname
14281 self.relocate_from = \
14282 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14283 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14284 constants.IALLOCATOR_MODE_NODE_EVAC):
14285 if not self.op.instances:
14286 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14287 self.op.instances = _GetWantedInstances(self, self.op.instances)
14289 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14290 self.op.mode, errors.ECODE_INVAL)
14292 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14293 if self.op.allocator is None:
14294 raise errors.OpPrereqError("Missing allocator name",
14295 errors.ECODE_INVAL)
14296 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14297 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14298 self.op.direction, errors.ECODE_INVAL)
14300 def Exec(self, feedback_fn):
14301 """Run the allocator test.
14304 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14305 ial = IAllocator(self.cfg, self.rpc,
14308 memory=self.op.memory,
14309 disks=self.op.disks,
14310 disk_template=self.op.disk_template,
14314 vcpus=self.op.vcpus,
14315 hypervisor=self.op.hypervisor,
14317 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14318 ial = IAllocator(self.cfg, self.rpc,
14321 relocate_from=list(self.relocate_from),
14323 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14324 ial = IAllocator(self.cfg, self.rpc,
14326 instances=self.op.instances,
14327 target_groups=self.op.target_groups)
14328 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14329 ial = IAllocator(self.cfg, self.rpc,
14331 instances=self.op.instances,
14332 evac_mode=self.op.evac_mode)
14334 raise errors.ProgrammerError("Uncatched mode %s in"
14335 " LUTestAllocator.Exec", self.op.mode)
14337 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14338 result = ial.in_text
14340 ial.Run(self.op.allocator, validate=False)
14341 result = ial.out_text
14345 #: Query type implementations
14347 constants.QR_INSTANCE: _InstanceQuery,
14348 constants.QR_NODE: _NodeQuery,
14349 constants.QR_GROUP: _GroupQuery,
14350 constants.QR_OS: _OsQuery,
14353 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14356 def _GetQueryImplementation(name):
14357 """Returns the implemtnation for a query type.
14359 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14363 return _QUERY_IMPL[name]
14365 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14366 errors.ECODE_INVAL)