4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ExpandItemName(fn, name, kind):
1040 """Expand an item name.
1042 @param fn: the function to use for expansion
1043 @param name: requested item name
1044 @param kind: text description ('Node' or 'Instance')
1045 @return: the resolved (full) name
1046 @raise errors.OpPrereqError: if the item is not found
1049 full_name = fn(name)
1050 if full_name is None:
1051 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1056 def _ExpandNodeName(cfg, name):
1057 """Wrapper over L{_ExpandItemName} for nodes."""
1058 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1061 def _ExpandInstanceName(cfg, name):
1062 """Wrapper over L{_ExpandItemName} for instance."""
1063 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1066 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1067 minmem, maxmem, vcpus, nics, disk_template, disks,
1068 bep, hvp, hypervisor_name, tags):
1069 """Builds instance related env variables for hooks
1071 This builds the hook environment from individual variables.
1074 @param name: the name of the instance
1075 @type primary_node: string
1076 @param primary_node: the name of the instance's primary node
1077 @type secondary_nodes: list
1078 @param secondary_nodes: list of secondary nodes as strings
1079 @type os_type: string
1080 @param os_type: the name of the instance's OS
1081 @type status: string
1082 @param status: the desired status of the instance
1083 @type minmem: string
1084 @param minmem: the minimum memory size of the instance
1085 @type maxmem: string
1086 @param maxmem: the maximum memory size of the instance
1088 @param vcpus: the count of VCPUs the instance has
1090 @param nics: list of tuples (ip, mac, mode, link) representing
1091 the NICs the instance has
1092 @type disk_template: string
1093 @param disk_template: the disk template of the instance
1095 @param disks: the list of (size, mode) pairs
1097 @param bep: the backend parameters for the instance
1099 @param hvp: the hypervisor parameters for the instance
1100 @type hypervisor_name: string
1101 @param hypervisor_name: the hypervisor for the instance
1103 @param tags: list of instance tags as strings
1105 @return: the hook environment for this instance
1110 "INSTANCE_NAME": name,
1111 "INSTANCE_PRIMARY": primary_node,
1112 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1113 "INSTANCE_OS_TYPE": os_type,
1114 "INSTANCE_STATUS": status,
1115 "INSTANCE_MINMEM": minmem,
1116 "INSTANCE_MAXMEM": maxmem,
1117 # TODO(2.7) remove deprecated "memory" value
1118 "INSTANCE_MEMORY": maxmem,
1119 "INSTANCE_VCPUS": vcpus,
1120 "INSTANCE_DISK_TEMPLATE": disk_template,
1121 "INSTANCE_HYPERVISOR": hypervisor_name,
1124 nic_count = len(nics)
1125 for idx, (ip, mac, mode, link) in enumerate(nics):
1128 env["INSTANCE_NIC%d_IP" % idx] = ip
1129 env["INSTANCE_NIC%d_MAC" % idx] = mac
1130 env["INSTANCE_NIC%d_MODE" % idx] = mode
1131 env["INSTANCE_NIC%d_LINK" % idx] = link
1132 if mode == constants.NIC_MODE_BRIDGED:
1133 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1137 env["INSTANCE_NIC_COUNT"] = nic_count
1140 disk_count = len(disks)
1141 for idx, (size, mode) in enumerate(disks):
1142 env["INSTANCE_DISK%d_SIZE" % idx] = size
1143 env["INSTANCE_DISK%d_MODE" % idx] = mode
1147 env["INSTANCE_DISK_COUNT"] = disk_count
1152 env["INSTANCE_TAGS"] = " ".join(tags)
1154 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1155 for key, value in source.items():
1156 env["INSTANCE_%s_%s" % (kind, key)] = value
1161 def _NICListToTuple(lu, nics):
1162 """Build a list of nic information tuples.
1164 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1165 value in LUInstanceQueryData.
1167 @type lu: L{LogicalUnit}
1168 @param lu: the logical unit on whose behalf we execute
1169 @type nics: list of L{objects.NIC}
1170 @param nics: list of nics to convert to hooks tuples
1174 cluster = lu.cfg.GetClusterInfo()
1178 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1179 mode = filled_params[constants.NIC_MODE]
1180 link = filled_params[constants.NIC_LINK]
1181 hooks_nics.append((ip, mac, mode, link))
1185 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1186 """Builds instance related env variables for hooks from an object.
1188 @type lu: L{LogicalUnit}
1189 @param lu: the logical unit on whose behalf we execute
1190 @type instance: L{objects.Instance}
1191 @param instance: the instance for which we should build the
1193 @type override: dict
1194 @param override: dictionary with key/values that will override
1197 @return: the hook environment dictionary
1200 cluster = lu.cfg.GetClusterInfo()
1201 bep = cluster.FillBE(instance)
1202 hvp = cluster.FillHV(instance)
1204 "name": instance.name,
1205 "primary_node": instance.primary_node,
1206 "secondary_nodes": instance.secondary_nodes,
1207 "os_type": instance.os,
1208 "status": instance.admin_state,
1209 "maxmem": bep[constants.BE_MAXMEM],
1210 "minmem": bep[constants.BE_MINMEM],
1211 "vcpus": bep[constants.BE_VCPUS],
1212 "nics": _NICListToTuple(lu, instance.nics),
1213 "disk_template": instance.disk_template,
1214 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1217 "hypervisor_name": instance.hypervisor,
1218 "tags": instance.tags,
1221 args.update(override)
1222 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1225 def _AdjustCandidatePool(lu, exceptions):
1226 """Adjust the candidate pool after node operations.
1229 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1231 lu.LogInfo("Promoted nodes to master candidate role: %s",
1232 utils.CommaJoin(node.name for node in mod_list))
1233 for name in mod_list:
1234 lu.context.ReaddNode(name)
1235 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1237 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1241 def _DecideSelfPromotion(lu, exceptions=None):
1242 """Decide whether I should promote myself as a master candidate.
1245 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1246 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1247 # the new node will increase mc_max with one, so:
1248 mc_should = min(mc_should + 1, cp_size)
1249 return mc_now < mc_should
1252 def _CalculateGroupIPolicy(cfg, group):
1253 """Calculate instance policy for group.
1256 cluster = cfg.GetClusterInfo()
1257 return cluster.SimpleFillIPolicy(group.ipolicy)
1260 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1261 """Check that the brigdes needed by a list of nics exist.
1264 cluster = lu.cfg.GetClusterInfo()
1265 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1266 brlist = [params[constants.NIC_LINK] for params in paramslist
1267 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1269 result = lu.rpc.call_bridges_exist(target_node, brlist)
1270 result.Raise("Error checking bridges on destination node '%s'" %
1271 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1274 def _CheckInstanceBridgesExist(lu, instance, node=None):
1275 """Check that the brigdes needed by an instance exist.
1279 node = instance.primary_node
1280 _CheckNicsBridgesExist(lu, instance.nics, node)
1283 def _CheckOSVariant(os_obj, name):
1284 """Check whether an OS name conforms to the os variants specification.
1286 @type os_obj: L{objects.OS}
1287 @param os_obj: OS object to check
1289 @param name: OS name passed by the user, to check for validity
1292 variant = objects.OS.GetVariant(name)
1293 if not os_obj.supported_variants:
1295 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1296 " passed)" % (os_obj.name, variant),
1300 raise errors.OpPrereqError("OS name must include a variant",
1303 if variant not in os_obj.supported_variants:
1304 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1307 def _GetNodeInstancesInner(cfg, fn):
1308 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1311 def _GetNodeInstances(cfg, node_name):
1312 """Returns a list of all primary and secondary instances on a node.
1316 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1319 def _GetNodePrimaryInstances(cfg, node_name):
1320 """Returns primary instances on a node.
1323 return _GetNodeInstancesInner(cfg,
1324 lambda inst: node_name == inst.primary_node)
1327 def _GetNodeSecondaryInstances(cfg, node_name):
1328 """Returns secondary instances on a node.
1331 return _GetNodeInstancesInner(cfg,
1332 lambda inst: node_name in inst.secondary_nodes)
1335 def _GetStorageTypeArgs(cfg, storage_type):
1336 """Returns the arguments for a storage type.
1339 # Special case for file storage
1340 if storage_type == constants.ST_FILE:
1341 # storage.FileStorage wants a list of storage directories
1342 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1347 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1350 for dev in instance.disks:
1351 cfg.SetDiskID(dev, node_name)
1353 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1354 result.Raise("Failed to get disk status from node %s" % node_name,
1355 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1357 for idx, bdev_status in enumerate(result.payload):
1358 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1364 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1365 """Check the sanity of iallocator and node arguments and use the
1366 cluster-wide iallocator if appropriate.
1368 Check that at most one of (iallocator, node) is specified. If none is
1369 specified, then the LU's opcode's iallocator slot is filled with the
1370 cluster-wide default iallocator.
1372 @type iallocator_slot: string
1373 @param iallocator_slot: the name of the opcode iallocator slot
1374 @type node_slot: string
1375 @param node_slot: the name of the opcode target node slot
1378 node = getattr(lu.op, node_slot, None)
1379 iallocator = getattr(lu.op, iallocator_slot, None)
1381 if node is not None and iallocator is not None:
1382 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1384 elif node is None and iallocator is None:
1385 default_iallocator = lu.cfg.GetDefaultIAllocator()
1386 if default_iallocator:
1387 setattr(lu.op, iallocator_slot, default_iallocator)
1389 raise errors.OpPrereqError("No iallocator or node given and no"
1390 " cluster-wide default iallocator found;"
1391 " please specify either an iallocator or a"
1392 " node, or set a cluster-wide default"
1396 def _GetDefaultIAllocator(cfg, iallocator):
1397 """Decides on which iallocator to use.
1399 @type cfg: L{config.ConfigWriter}
1400 @param cfg: Cluster configuration object
1401 @type iallocator: string or None
1402 @param iallocator: Iallocator specified in opcode
1404 @return: Iallocator name
1408 # Use default iallocator
1409 iallocator = cfg.GetDefaultIAllocator()
1412 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1413 " opcode nor as a cluster-wide default",
1419 class LUClusterPostInit(LogicalUnit):
1420 """Logical unit for running hooks after cluster initialization.
1423 HPATH = "cluster-init"
1424 HTYPE = constants.HTYPE_CLUSTER
1426 def BuildHooksEnv(self):
1431 "OP_TARGET": self.cfg.GetClusterName(),
1434 def BuildHooksNodes(self):
1435 """Build hooks nodes.
1438 return ([], [self.cfg.GetMasterNode()])
1440 def Exec(self, feedback_fn):
1447 class LUClusterDestroy(LogicalUnit):
1448 """Logical unit for destroying the cluster.
1451 HPATH = "cluster-destroy"
1452 HTYPE = constants.HTYPE_CLUSTER
1454 def BuildHooksEnv(self):
1459 "OP_TARGET": self.cfg.GetClusterName(),
1462 def BuildHooksNodes(self):
1463 """Build hooks nodes.
1468 def CheckPrereq(self):
1469 """Check prerequisites.
1471 This checks whether the cluster is empty.
1473 Any errors are signaled by raising errors.OpPrereqError.
1476 master = self.cfg.GetMasterNode()
1478 nodelist = self.cfg.GetNodeList()
1479 if len(nodelist) != 1 or nodelist[0] != master:
1480 raise errors.OpPrereqError("There are still %d node(s) in"
1481 " this cluster." % (len(nodelist) - 1),
1483 instancelist = self.cfg.GetInstanceList()
1485 raise errors.OpPrereqError("There are still %d instance(s) in"
1486 " this cluster." % len(instancelist),
1489 def Exec(self, feedback_fn):
1490 """Destroys the cluster.
1493 master_params = self.cfg.GetMasterNetworkParameters()
1495 # Run post hooks on master node before it's removed
1496 _RunPostHook(self, master_params.name)
1498 ems = self.cfg.GetUseExternalMipScript()
1499 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1501 result.Raise("Could not disable the master role")
1503 return master_params.name
1506 def _VerifyCertificate(filename):
1507 """Verifies a certificate for L{LUClusterVerifyConfig}.
1509 @type filename: string
1510 @param filename: Path to PEM file
1514 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1515 utils.ReadFile(filename))
1516 except Exception, err: # pylint: disable=W0703
1517 return (LUClusterVerifyConfig.ETYPE_ERROR,
1518 "Failed to load X509 certificate %s: %s" % (filename, err))
1521 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1522 constants.SSL_CERT_EXPIRATION_ERROR)
1525 fnamemsg = "While verifying %s: %s" % (filename, msg)
1530 return (None, fnamemsg)
1531 elif errcode == utils.CERT_WARNING:
1532 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1533 elif errcode == utils.CERT_ERROR:
1534 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1536 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1539 def _GetAllHypervisorParameters(cluster, instances):
1540 """Compute the set of all hypervisor parameters.
1542 @type cluster: L{objects.Cluster}
1543 @param cluster: the cluster object
1544 @param instances: list of L{objects.Instance}
1545 @param instances: additional instances from which to obtain parameters
1546 @rtype: list of (origin, hypervisor, parameters)
1547 @return: a list with all parameters found, indicating the hypervisor they
1548 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1553 for hv_name in cluster.enabled_hypervisors:
1554 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1556 for os_name, os_hvp in cluster.os_hvp.items():
1557 for hv_name, hv_params in os_hvp.items():
1559 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1560 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1562 # TODO: collapse identical parameter values in a single one
1563 for instance in instances:
1564 if instance.hvparams:
1565 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1566 cluster.FillHV(instance)))
1571 class _VerifyErrors(object):
1572 """Mix-in for cluster/group verify LUs.
1574 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1575 self.op and self._feedback_fn to be available.)
1579 ETYPE_FIELD = "code"
1580 ETYPE_ERROR = "ERROR"
1581 ETYPE_WARNING = "WARNING"
1583 def _Error(self, ecode, item, msg, *args, **kwargs):
1584 """Format an error message.
1586 Based on the opcode's error_codes parameter, either format a
1587 parseable error code, or a simpler error string.
1589 This must be called only from Exec and functions called from Exec.
1592 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1593 itype, etxt, _ = ecode
1594 # first complete the msg
1597 # then format the whole message
1598 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1599 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1605 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1606 # and finally report it via the feedback_fn
1607 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1609 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1610 """Log an error message if the passed condition is True.
1614 or self.op.debug_simulate_errors) # pylint: disable=E1101
1616 # If the error code is in the list of ignored errors, demote the error to a
1618 (_, etxt, _) = ecode
1619 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1620 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1623 self._Error(ecode, *args, **kwargs)
1625 # do not mark the operation as failed for WARN cases only
1626 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1627 self.bad = self.bad or cond
1630 class LUClusterVerify(NoHooksLU):
1631 """Submits all jobs necessary to verify the cluster.
1636 def ExpandNames(self):
1637 self.needed_locks = {}
1639 def Exec(self, feedback_fn):
1642 if self.op.group_name:
1643 groups = [self.op.group_name]
1644 depends_fn = lambda: None
1646 groups = self.cfg.GetNodeGroupList()
1648 # Verify global configuration
1650 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1653 # Always depend on global verification
1654 depends_fn = lambda: [(-len(jobs), [])]
1656 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1657 ignore_errors=self.op.ignore_errors,
1658 depends=depends_fn())]
1659 for group in groups)
1661 # Fix up all parameters
1662 for op in itertools.chain(*jobs): # pylint: disable=W0142
1663 op.debug_simulate_errors = self.op.debug_simulate_errors
1664 op.verbose = self.op.verbose
1665 op.error_codes = self.op.error_codes
1667 op.skip_checks = self.op.skip_checks
1668 except AttributeError:
1669 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1671 return ResultWithJobs(jobs)
1674 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1675 """Verifies the cluster config.
1680 def _VerifyHVP(self, hvp_data):
1681 """Verifies locally the syntax of the hypervisor parameters.
1684 for item, hv_name, hv_params in hvp_data:
1685 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1688 hv_class = hypervisor.GetHypervisor(hv_name)
1689 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1690 hv_class.CheckParameterSyntax(hv_params)
1691 except errors.GenericError, err:
1692 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1694 def ExpandNames(self):
1695 # Information can be safely retrieved as the BGL is acquired in exclusive
1697 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1698 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1699 self.all_node_info = self.cfg.GetAllNodesInfo()
1700 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1701 self.needed_locks = {}
1703 def Exec(self, feedback_fn):
1704 """Verify integrity of cluster, performing various test on nodes.
1708 self._feedback_fn = feedback_fn
1710 feedback_fn("* Verifying cluster config")
1712 for msg in self.cfg.VerifyConfig():
1713 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1715 feedback_fn("* Verifying cluster certificate files")
1717 for cert_filename in constants.ALL_CERT_FILES:
1718 (errcode, msg) = _VerifyCertificate(cert_filename)
1719 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1721 feedback_fn("* Verifying hypervisor parameters")
1723 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1724 self.all_inst_info.values()))
1726 feedback_fn("* Verifying all nodes belong to an existing group")
1728 # We do this verification here because, should this bogus circumstance
1729 # occur, it would never be caught by VerifyGroup, which only acts on
1730 # nodes/instances reachable from existing node groups.
1732 dangling_nodes = set(node.name for node in self.all_node_info.values()
1733 if node.group not in self.all_group_info)
1735 dangling_instances = {}
1736 no_node_instances = []
1738 for inst in self.all_inst_info.values():
1739 if inst.primary_node in dangling_nodes:
1740 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1741 elif inst.primary_node not in self.all_node_info:
1742 no_node_instances.append(inst.name)
1747 utils.CommaJoin(dangling_instances.get(node.name,
1749 for node in dangling_nodes]
1751 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1753 "the following nodes (and their instances) belong to a non"
1754 " existing group: %s", utils.CommaJoin(pretty_dangling))
1756 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1758 "the following instances have a non-existing primary-node:"
1759 " %s", utils.CommaJoin(no_node_instances))
1764 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1765 """Verifies the status of a node group.
1768 HPATH = "cluster-verify"
1769 HTYPE = constants.HTYPE_CLUSTER
1772 _HOOKS_INDENT_RE = re.compile("^", re.M)
1774 class NodeImage(object):
1775 """A class representing the logical and physical status of a node.
1778 @ivar name: the node name to which this object refers
1779 @ivar volumes: a structure as returned from
1780 L{ganeti.backend.GetVolumeList} (runtime)
1781 @ivar instances: a list of running instances (runtime)
1782 @ivar pinst: list of configured primary instances (config)
1783 @ivar sinst: list of configured secondary instances (config)
1784 @ivar sbp: dictionary of {primary-node: list of instances} for all
1785 instances for which this node is secondary (config)
1786 @ivar mfree: free memory, as reported by hypervisor (runtime)
1787 @ivar dfree: free disk, as reported by the node (runtime)
1788 @ivar offline: the offline status (config)
1789 @type rpc_fail: boolean
1790 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1791 not whether the individual keys were correct) (runtime)
1792 @type lvm_fail: boolean
1793 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1794 @type hyp_fail: boolean
1795 @ivar hyp_fail: whether the RPC call didn't return the instance list
1796 @type ghost: boolean
1797 @ivar ghost: whether this is a known node or not (config)
1798 @type os_fail: boolean
1799 @ivar os_fail: whether the RPC call didn't return valid OS data
1801 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1802 @type vm_capable: boolean
1803 @ivar vm_capable: whether the node can host instances
1806 def __init__(self, offline=False, name=None, vm_capable=True):
1815 self.offline = offline
1816 self.vm_capable = vm_capable
1817 self.rpc_fail = False
1818 self.lvm_fail = False
1819 self.hyp_fail = False
1821 self.os_fail = False
1824 def ExpandNames(self):
1825 # This raises errors.OpPrereqError on its own:
1826 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1828 # Get instances in node group; this is unsafe and needs verification later
1829 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1831 self.needed_locks = {
1832 locking.LEVEL_INSTANCE: inst_names,
1833 locking.LEVEL_NODEGROUP: [self.group_uuid],
1834 locking.LEVEL_NODE: [],
1837 self.share_locks = _ShareAll()
1839 def DeclareLocks(self, level):
1840 if level == locking.LEVEL_NODE:
1841 # Get members of node group; this is unsafe and needs verification later
1842 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1844 all_inst_info = self.cfg.GetAllInstancesInfo()
1846 # In Exec(), we warn about mirrored instances that have primary and
1847 # secondary living in separate node groups. To fully verify that
1848 # volumes for these instances are healthy, we will need to do an
1849 # extra call to their secondaries. We ensure here those nodes will
1851 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1852 # Important: access only the instances whose lock is owned
1853 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1854 nodes.update(all_inst_info[inst].secondary_nodes)
1856 self.needed_locks[locking.LEVEL_NODE] = nodes
1858 def CheckPrereq(self):
1859 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1860 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1862 group_nodes = set(self.group_info.members)
1863 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1866 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1868 unlocked_instances = \
1869 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1872 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1873 utils.CommaJoin(unlocked_nodes))
1875 if unlocked_instances:
1876 raise errors.OpPrereqError("Missing lock for instances: %s" %
1877 utils.CommaJoin(unlocked_instances))
1879 self.all_node_info = self.cfg.GetAllNodesInfo()
1880 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1882 self.my_node_names = utils.NiceSort(group_nodes)
1883 self.my_inst_names = utils.NiceSort(group_instances)
1885 self.my_node_info = dict((name, self.all_node_info[name])
1886 for name in self.my_node_names)
1888 self.my_inst_info = dict((name, self.all_inst_info[name])
1889 for name in self.my_inst_names)
1891 # We detect here the nodes that will need the extra RPC calls for verifying
1892 # split LV volumes; they should be locked.
1893 extra_lv_nodes = set()
1895 for inst in self.my_inst_info.values():
1896 if inst.disk_template in constants.DTS_INT_MIRROR:
1897 group = self.my_node_info[inst.primary_node].group
1898 for nname in inst.secondary_nodes:
1899 if self.all_node_info[nname].group != group:
1900 extra_lv_nodes.add(nname)
1902 unlocked_lv_nodes = \
1903 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1905 if unlocked_lv_nodes:
1906 raise errors.OpPrereqError("these nodes could be locked: %s" %
1907 utils.CommaJoin(unlocked_lv_nodes))
1908 self.extra_lv_nodes = list(extra_lv_nodes)
1910 def _VerifyNode(self, ninfo, nresult):
1911 """Perform some basic validation on data returned from a node.
1913 - check the result data structure is well formed and has all the
1915 - check ganeti version
1917 @type ninfo: L{objects.Node}
1918 @param ninfo: the node to check
1919 @param nresult: the results from the node
1921 @return: whether overall this call was successful (and we can expect
1922 reasonable values in the respose)
1926 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1928 # main result, nresult should be a non-empty dict
1929 test = not nresult or not isinstance(nresult, dict)
1930 _ErrorIf(test, constants.CV_ENODERPC, node,
1931 "unable to verify node: no data returned")
1935 # compares ganeti version
1936 local_version = constants.PROTOCOL_VERSION
1937 remote_version = nresult.get("version", None)
1938 test = not (remote_version and
1939 isinstance(remote_version, (list, tuple)) and
1940 len(remote_version) == 2)
1941 _ErrorIf(test, constants.CV_ENODERPC, node,
1942 "connection to node returned invalid data")
1946 test = local_version != remote_version[0]
1947 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1948 "incompatible protocol versions: master %s,"
1949 " node %s", local_version, remote_version[0])
1953 # node seems compatible, we can actually try to look into its results
1955 # full package version
1956 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1957 constants.CV_ENODEVERSION, node,
1958 "software version mismatch: master %s, node %s",
1959 constants.RELEASE_VERSION, remote_version[1],
1960 code=self.ETYPE_WARNING)
1962 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1963 if ninfo.vm_capable and isinstance(hyp_result, dict):
1964 for hv_name, hv_result in hyp_result.iteritems():
1965 test = hv_result is not None
1966 _ErrorIf(test, constants.CV_ENODEHV, node,
1967 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1969 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1970 if ninfo.vm_capable and isinstance(hvp_result, list):
1971 for item, hv_name, hv_result in hvp_result:
1972 _ErrorIf(True, constants.CV_ENODEHV, node,
1973 "hypervisor %s parameter verify failure (source %s): %s",
1974 hv_name, item, hv_result)
1976 test = nresult.get(constants.NV_NODESETUP,
1977 ["Missing NODESETUP results"])
1978 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1983 def _VerifyNodeTime(self, ninfo, nresult,
1984 nvinfo_starttime, nvinfo_endtime):
1985 """Check the node time.
1987 @type ninfo: L{objects.Node}
1988 @param ninfo: the node to check
1989 @param nresult: the remote results for the node
1990 @param nvinfo_starttime: the start time of the RPC call
1991 @param nvinfo_endtime: the end time of the RPC call
1995 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1997 ntime = nresult.get(constants.NV_TIME, None)
1999 ntime_merged = utils.MergeTime(ntime)
2000 except (ValueError, TypeError):
2001 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2004 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2005 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2006 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2007 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2011 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2012 "Node time diverges by at least %s from master node time",
2015 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2016 """Check the node LVM results.
2018 @type ninfo: L{objects.Node}
2019 @param ninfo: the node to check
2020 @param nresult: the remote results for the node
2021 @param vg_name: the configured VG name
2028 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2030 # checks vg existence and size > 20G
2031 vglist = nresult.get(constants.NV_VGLIST, None)
2033 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2035 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2036 constants.MIN_VG_SIZE)
2037 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2040 pvlist = nresult.get(constants.NV_PVLIST, None)
2041 test = pvlist is None
2042 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2044 # check that ':' is not present in PV names, since it's a
2045 # special character for lvcreate (denotes the range of PEs to
2047 for _, pvname, owner_vg in pvlist:
2048 test = ":" in pvname
2049 _ErrorIf(test, constants.CV_ENODELVM, node,
2050 "Invalid character ':' in PV '%s' of VG '%s'",
2053 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2054 """Check the node bridges.
2056 @type ninfo: L{objects.Node}
2057 @param ninfo: the node to check
2058 @param nresult: the remote results for the node
2059 @param bridges: the expected list of bridges
2066 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2068 missing = nresult.get(constants.NV_BRIDGES, None)
2069 test = not isinstance(missing, list)
2070 _ErrorIf(test, constants.CV_ENODENET, node,
2071 "did not return valid bridge information")
2073 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2074 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2076 def _VerifyNodeUserScripts(self, ninfo, nresult):
2077 """Check the results of user scripts presence and executability on the node
2079 @type ninfo: L{objects.Node}
2080 @param ninfo: the node to check
2081 @param nresult: the remote results for the node
2086 test = not constants.NV_USERSCRIPTS in nresult
2087 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2088 "did not return user scripts information")
2090 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2092 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2093 "user scripts not present or not executable: %s" %
2094 utils.CommaJoin(sorted(broken_scripts)))
2096 def _VerifyNodeNetwork(self, ninfo, nresult):
2097 """Check the node network connectivity results.
2099 @type ninfo: L{objects.Node}
2100 @param ninfo: the node to check
2101 @param nresult: the remote results for the node
2105 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2107 test = constants.NV_NODELIST not in nresult
2108 _ErrorIf(test, constants.CV_ENODESSH, node,
2109 "node hasn't returned node ssh connectivity data")
2111 if nresult[constants.NV_NODELIST]:
2112 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2113 _ErrorIf(True, constants.CV_ENODESSH, node,
2114 "ssh communication with node '%s': %s", a_node, a_msg)
2116 test = constants.NV_NODENETTEST not in nresult
2117 _ErrorIf(test, constants.CV_ENODENET, node,
2118 "node hasn't returned node tcp connectivity data")
2120 if nresult[constants.NV_NODENETTEST]:
2121 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2123 _ErrorIf(True, constants.CV_ENODENET, node,
2124 "tcp communication with node '%s': %s",
2125 anode, nresult[constants.NV_NODENETTEST][anode])
2127 test = constants.NV_MASTERIP not in nresult
2128 _ErrorIf(test, constants.CV_ENODENET, node,
2129 "node hasn't returned node master IP reachability data")
2131 if not nresult[constants.NV_MASTERIP]:
2132 if node == self.master_node:
2133 msg = "the master node cannot reach the master IP (not configured?)"
2135 msg = "cannot reach the master IP"
2136 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2138 def _VerifyInstancePolicy(self, instance):
2139 """Verify instance specs against instance policy set on node group level.
2143 cluster = self.cfg.GetClusterInfo()
2144 full_beparams = cluster.FillBE(instance)
2145 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2147 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2148 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2149 disk_count = len(instance.disks)
2150 disk_sizes = [disk.size for disk in instance.disks]
2151 nic_count = len(instance.nics)
2154 (constants.ISPEC_MEM_SIZE, mem_size),
2155 (constants.ISPEC_CPU_COUNT, cpu_count),
2156 (constants.ISPEC_DISK_COUNT, disk_count),
2157 (constants.ISPEC_NIC_COUNT, nic_count),
2158 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2160 for (name, value) in test_settings:
2161 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2162 self._ErrorIf(test_result is not None,
2163 constants.CV_EINSTANCEPOLICY, instance.name,
2166 def _VerifyInstance(self, instance, instanceconfig, node_image,
2168 """Verify an instance.
2170 This function checks to see if the required block devices are
2171 available on the instance's node.
2174 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2175 node_current = instanceconfig.primary_node
2177 node_vol_should = {}
2178 instanceconfig.MapLVsByNode(node_vol_should)
2180 self._VerifyInstancePolicy(instanceconfig)
2182 for node in node_vol_should:
2183 n_img = node_image[node]
2184 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2185 # ignore missing volumes on offline or broken nodes
2187 for volume in node_vol_should[node]:
2188 test = volume not in n_img.volumes
2189 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2190 "volume %s missing on node %s", volume, node)
2192 if instanceconfig.admin_state == constants.ADMINST_UP:
2193 pri_img = node_image[node_current]
2194 test = instance not in pri_img.instances and not pri_img.offline
2195 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2196 "instance not running on its primary node %s",
2199 diskdata = [(nname, success, status, idx)
2200 for (nname, disks) in diskstatus.items()
2201 for idx, (success, status) in enumerate(disks)]
2203 for nname, success, bdev_status, idx in diskdata:
2204 # the 'ghost node' construction in Exec() ensures that we have a
2206 snode = node_image[nname]
2207 bad_snode = snode.ghost or snode.offline
2208 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2209 not success and not bad_snode,
2210 constants.CV_EINSTANCEFAULTYDISK, instance,
2211 "couldn't retrieve status for disk/%s on %s: %s",
2212 idx, nname, bdev_status)
2213 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2214 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2215 constants.CV_EINSTANCEFAULTYDISK, instance,
2216 "disk/%s on %s is faulty", idx, nname)
2218 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2219 """Verify if there are any unknown volumes in the cluster.
2221 The .os, .swap and backup volumes are ignored. All other volumes are
2222 reported as unknown.
2224 @type reserved: L{ganeti.utils.FieldSet}
2225 @param reserved: a FieldSet of reserved volume names
2228 for node, n_img in node_image.items():
2229 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2230 # skip non-healthy nodes
2232 for volume in n_img.volumes:
2233 test = ((node not in node_vol_should or
2234 volume not in node_vol_should[node]) and
2235 not reserved.Matches(volume))
2236 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2237 "volume %s is unknown", volume)
2239 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2240 """Verify N+1 Memory Resilience.
2242 Check that if one single node dies we can still start all the
2243 instances it was primary for.
2246 cluster_info = self.cfg.GetClusterInfo()
2247 for node, n_img in node_image.items():
2248 # This code checks that every node which is now listed as
2249 # secondary has enough memory to host all instances it is
2250 # supposed to should a single other node in the cluster fail.
2251 # FIXME: not ready for failover to an arbitrary node
2252 # FIXME: does not support file-backed instances
2253 # WARNING: we currently take into account down instances as well
2254 # as up ones, considering that even if they're down someone
2255 # might want to start them even in the event of a node failure.
2257 # we're skipping offline nodes from the N+1 warning, since
2258 # most likely we don't have good memory infromation from them;
2259 # we already list instances living on such nodes, and that's
2262 #TODO(dynmem): use MINMEM for checking
2263 #TODO(dynmem): also consider ballooning out other instances
2264 for prinode, instances in n_img.sbp.items():
2266 for instance in instances:
2267 bep = cluster_info.FillBE(instance_cfg[instance])
2268 if bep[constants.BE_AUTO_BALANCE]:
2269 needed_mem += bep[constants.BE_MAXMEM]
2270 test = n_img.mfree < needed_mem
2271 self._ErrorIf(test, constants.CV_ENODEN1, node,
2272 "not enough memory to accomodate instance failovers"
2273 " should node %s fail (%dMiB needed, %dMiB available)",
2274 prinode, needed_mem, n_img.mfree)
2277 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2278 (files_all, files_opt, files_mc, files_vm)):
2279 """Verifies file checksums collected from all nodes.
2281 @param errorif: Callback for reporting errors
2282 @param nodeinfo: List of L{objects.Node} objects
2283 @param master_node: Name of master node
2284 @param all_nvinfo: RPC results
2287 # Define functions determining which nodes to consider for a file
2290 (files_mc, lambda node: (node.master_candidate or
2291 node.name == master_node)),
2292 (files_vm, lambda node: node.vm_capable),
2295 # Build mapping from filename to list of nodes which should have the file
2297 for (files, fn) in files2nodefn:
2299 filenodes = nodeinfo
2301 filenodes = filter(fn, nodeinfo)
2302 nodefiles.update((filename,
2303 frozenset(map(operator.attrgetter("name"), filenodes)))
2304 for filename in files)
2306 assert set(nodefiles) == (files_all | files_mc | files_vm)
2308 fileinfo = dict((filename, {}) for filename in nodefiles)
2309 ignore_nodes = set()
2311 for node in nodeinfo:
2313 ignore_nodes.add(node.name)
2316 nresult = all_nvinfo[node.name]
2318 if nresult.fail_msg or not nresult.payload:
2321 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2323 test = not (node_files and isinstance(node_files, dict))
2324 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2325 "Node did not return file checksum data")
2327 ignore_nodes.add(node.name)
2330 # Build per-checksum mapping from filename to nodes having it
2331 for (filename, checksum) in node_files.items():
2332 assert filename in nodefiles
2333 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2335 for (filename, checksums) in fileinfo.items():
2336 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2338 # Nodes having the file
2339 with_file = frozenset(node_name
2340 for nodes in fileinfo[filename].values()
2341 for node_name in nodes) - ignore_nodes
2343 expected_nodes = nodefiles[filename] - ignore_nodes
2345 # Nodes missing file
2346 missing_file = expected_nodes - with_file
2348 if filename in files_opt:
2350 errorif(missing_file and missing_file != expected_nodes,
2351 constants.CV_ECLUSTERFILECHECK, None,
2352 "File %s is optional, but it must exist on all or no"
2353 " nodes (not found on %s)",
2354 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2356 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2357 "File %s is missing from node(s) %s", filename,
2358 utils.CommaJoin(utils.NiceSort(missing_file)))
2360 # Warn if a node has a file it shouldn't
2361 unexpected = with_file - expected_nodes
2363 constants.CV_ECLUSTERFILECHECK, None,
2364 "File %s should not exist on node(s) %s",
2365 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2367 # See if there are multiple versions of the file
2368 test = len(checksums) > 1
2370 variants = ["variant %s on %s" %
2371 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2372 for (idx, (checksum, nodes)) in
2373 enumerate(sorted(checksums.items()))]
2377 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2378 "File %s found with %s different checksums (%s)",
2379 filename, len(checksums), "; ".join(variants))
2381 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2383 """Verifies and the node DRBD status.
2385 @type ninfo: L{objects.Node}
2386 @param ninfo: the node to check
2387 @param nresult: the remote results for the node
2388 @param instanceinfo: the dict of instances
2389 @param drbd_helper: the configured DRBD usermode helper
2390 @param drbd_map: the DRBD map as returned by
2391 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2398 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2399 test = (helper_result == None)
2400 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2401 "no drbd usermode helper returned")
2403 status, payload = helper_result
2405 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2406 "drbd usermode helper check unsuccessful: %s", payload)
2407 test = status and (payload != drbd_helper)
2408 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2409 "wrong drbd usermode helper: %s", payload)
2411 # compute the DRBD minors
2413 for minor, instance in drbd_map[node].items():
2414 test = instance not in instanceinfo
2415 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2416 "ghost instance '%s' in temporary DRBD map", instance)
2417 # ghost instance should not be running, but otherwise we
2418 # don't give double warnings (both ghost instance and
2419 # unallocated minor in use)
2421 node_drbd[minor] = (instance, False)
2423 instance = instanceinfo[instance]
2424 node_drbd[minor] = (instance.name,
2425 instance.admin_state == constants.ADMINST_UP)
2427 # and now check them
2428 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2429 test = not isinstance(used_minors, (tuple, list))
2430 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2431 "cannot parse drbd status file: %s", str(used_minors))
2433 # we cannot check drbd status
2436 for minor, (iname, must_exist) in node_drbd.items():
2437 test = minor not in used_minors and must_exist
2438 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2439 "drbd minor %d of instance %s is not active", minor, iname)
2440 for minor in used_minors:
2441 test = minor not in node_drbd
2442 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2443 "unallocated drbd minor %d is in use", minor)
2445 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2446 """Builds the node OS structures.
2448 @type ninfo: L{objects.Node}
2449 @param ninfo: the node to check
2450 @param nresult: the remote results for the node
2451 @param nimg: the node image object
2455 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2457 remote_os = nresult.get(constants.NV_OSLIST, None)
2458 test = (not isinstance(remote_os, list) or
2459 not compat.all(isinstance(v, list) and len(v) == 7
2460 for v in remote_os))
2462 _ErrorIf(test, constants.CV_ENODEOS, node,
2463 "node hasn't returned valid OS data")
2472 for (name, os_path, status, diagnose,
2473 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2475 if name not in os_dict:
2478 # parameters is a list of lists instead of list of tuples due to
2479 # JSON lacking a real tuple type, fix it:
2480 parameters = [tuple(v) for v in parameters]
2481 os_dict[name].append((os_path, status, diagnose,
2482 set(variants), set(parameters), set(api_ver)))
2484 nimg.oslist = os_dict
2486 def _VerifyNodeOS(self, ninfo, nimg, base):
2487 """Verifies the node OS list.
2489 @type ninfo: L{objects.Node}
2490 @param ninfo: the node to check
2491 @param nimg: the node image object
2492 @param base: the 'template' node we match against (e.g. from the master)
2496 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2498 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2500 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2501 for os_name, os_data in nimg.oslist.items():
2502 assert os_data, "Empty OS status for OS %s?!" % os_name
2503 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2504 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2505 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2506 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2507 "OS '%s' has multiple entries (first one shadows the rest): %s",
2508 os_name, utils.CommaJoin([v[0] for v in os_data]))
2509 # comparisons with the 'base' image
2510 test = os_name not in base.oslist
2511 _ErrorIf(test, constants.CV_ENODEOS, node,
2512 "Extra OS %s not present on reference node (%s)",
2516 assert base.oslist[os_name], "Base node has empty OS status?"
2517 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2519 # base OS is invalid, skipping
2521 for kind, a, b in [("API version", f_api, b_api),
2522 ("variants list", f_var, b_var),
2523 ("parameters", beautify_params(f_param),
2524 beautify_params(b_param))]:
2525 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2526 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2527 kind, os_name, base.name,
2528 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2530 # check any missing OSes
2531 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2532 _ErrorIf(missing, constants.CV_ENODEOS, node,
2533 "OSes present on reference node %s but missing on this node: %s",
2534 base.name, utils.CommaJoin(missing))
2536 def _VerifyOob(self, ninfo, nresult):
2537 """Verifies out of band functionality of a node.
2539 @type ninfo: L{objects.Node}
2540 @param ninfo: the node to check
2541 @param nresult: the remote results for the node
2545 # We just have to verify the paths on master and/or master candidates
2546 # as the oob helper is invoked on the master
2547 if ((ninfo.master_candidate or ninfo.master_capable) and
2548 constants.NV_OOB_PATHS in nresult):
2549 for path_result in nresult[constants.NV_OOB_PATHS]:
2550 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2552 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2553 """Verifies and updates the node volume data.
2555 This function will update a L{NodeImage}'s internal structures
2556 with data from the remote call.
2558 @type ninfo: L{objects.Node}
2559 @param ninfo: the node to check
2560 @param nresult: the remote results for the node
2561 @param nimg: the node image object
2562 @param vg_name: the configured VG name
2566 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568 nimg.lvm_fail = True
2569 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2572 elif isinstance(lvdata, basestring):
2573 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2574 utils.SafeEncode(lvdata))
2575 elif not isinstance(lvdata, dict):
2576 _ErrorIf(True, constants.CV_ENODELVM, node,
2577 "rpc call to node failed (lvlist)")
2579 nimg.volumes = lvdata
2580 nimg.lvm_fail = False
2582 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2583 """Verifies and updates the node instance list.
2585 If the listing was successful, then updates this node's instance
2586 list. Otherwise, it marks the RPC call as failed for the instance
2589 @type ninfo: L{objects.Node}
2590 @param ninfo: the node to check
2591 @param nresult: the remote results for the node
2592 @param nimg: the node image object
2595 idata = nresult.get(constants.NV_INSTANCELIST, None)
2596 test = not isinstance(idata, list)
2597 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2598 "rpc call to node failed (instancelist): %s",
2599 utils.SafeEncode(str(idata)))
2601 nimg.hyp_fail = True
2603 nimg.instances = idata
2605 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2606 """Verifies and computes a node information map
2608 @type ninfo: L{objects.Node}
2609 @param ninfo: the node to check
2610 @param nresult: the remote results for the node
2611 @param nimg: the node image object
2612 @param vg_name: the configured VG name
2616 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2618 # try to read free memory (from the hypervisor)
2619 hv_info = nresult.get(constants.NV_HVINFO, None)
2620 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2621 _ErrorIf(test, constants.CV_ENODEHV, node,
2622 "rpc call to node failed (hvinfo)")
2625 nimg.mfree = int(hv_info["memory_free"])
2626 except (ValueError, TypeError):
2627 _ErrorIf(True, constants.CV_ENODERPC, node,
2628 "node returned invalid nodeinfo, check hypervisor")
2630 # FIXME: devise a free space model for file based instances as well
2631 if vg_name is not None:
2632 test = (constants.NV_VGLIST not in nresult or
2633 vg_name not in nresult[constants.NV_VGLIST])
2634 _ErrorIf(test, constants.CV_ENODELVM, node,
2635 "node didn't return data for the volume group '%s'"
2636 " - it is either missing or broken", vg_name)
2639 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2640 except (ValueError, TypeError):
2641 _ErrorIf(True, constants.CV_ENODERPC, node,
2642 "node returned invalid LVM info, check LVM status")
2644 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2645 """Gets per-disk status information for all instances.
2647 @type nodelist: list of strings
2648 @param nodelist: Node names
2649 @type node_image: dict of (name, L{objects.Node})
2650 @param node_image: Node objects
2651 @type instanceinfo: dict of (name, L{objects.Instance})
2652 @param instanceinfo: Instance objects
2653 @rtype: {instance: {node: [(succes, payload)]}}
2654 @return: a dictionary of per-instance dictionaries with nodes as
2655 keys and disk information as values; the disk information is a
2656 list of tuples (success, payload)
2659 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2662 node_disks_devonly = {}
2663 diskless_instances = set()
2664 diskless = constants.DT_DISKLESS
2666 for nname in nodelist:
2667 node_instances = list(itertools.chain(node_image[nname].pinst,
2668 node_image[nname].sinst))
2669 diskless_instances.update(inst for inst in node_instances
2670 if instanceinfo[inst].disk_template == diskless)
2671 disks = [(inst, disk)
2672 for inst in node_instances
2673 for disk in instanceinfo[inst].disks]
2676 # No need to collect data
2679 node_disks[nname] = disks
2681 # Creating copies as SetDiskID below will modify the objects and that can
2682 # lead to incorrect data returned from nodes
2683 devonly = [dev.Copy() for (_, dev) in disks]
2686 self.cfg.SetDiskID(dev, nname)
2688 node_disks_devonly[nname] = devonly
2690 assert len(node_disks) == len(node_disks_devonly)
2692 # Collect data from all nodes with disks
2693 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2696 assert len(result) == len(node_disks)
2700 for (nname, nres) in result.items():
2701 disks = node_disks[nname]
2704 # No data from this node
2705 data = len(disks) * [(False, "node offline")]
2708 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2709 "while getting disk information: %s", msg)
2711 # No data from this node
2712 data = len(disks) * [(False, msg)]
2715 for idx, i in enumerate(nres.payload):
2716 if isinstance(i, (tuple, list)) and len(i) == 2:
2719 logging.warning("Invalid result from node %s, entry %d: %s",
2721 data.append((False, "Invalid result from the remote node"))
2723 for ((inst, _), status) in zip(disks, data):
2724 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2726 # Add empty entries for diskless instances.
2727 for inst in diskless_instances:
2728 assert inst not in instdisk
2731 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2732 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2733 compat.all(isinstance(s, (tuple, list)) and
2734 len(s) == 2 for s in statuses)
2735 for inst, nnames in instdisk.items()
2736 for nname, statuses in nnames.items())
2737 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2742 def _SshNodeSelector(group_uuid, all_nodes):
2743 """Create endless iterators for all potential SSH check hosts.
2746 nodes = [node for node in all_nodes
2747 if (node.group != group_uuid and
2749 keyfunc = operator.attrgetter("group")
2751 return map(itertools.cycle,
2752 [sorted(map(operator.attrgetter("name"), names))
2753 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2757 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2758 """Choose which nodes should talk to which other nodes.
2760 We will make nodes contact all nodes in their group, and one node from
2763 @warning: This algorithm has a known issue if one node group is much
2764 smaller than others (e.g. just one node). In such a case all other
2765 nodes will talk to the single node.
2768 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2769 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2771 return (online_nodes,
2772 dict((name, sorted([i.next() for i in sel]))
2773 for name in online_nodes))
2775 def BuildHooksEnv(self):
2778 Cluster-Verify hooks just ran in the post phase and their failure makes
2779 the output be logged in the verify output and the verification to fail.
2783 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2786 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2787 for node in self.my_node_info.values())
2791 def BuildHooksNodes(self):
2792 """Build hooks nodes.
2795 return ([], self.my_node_names)
2797 def Exec(self, feedback_fn):
2798 """Verify integrity of the node group, performing various test on nodes.
2801 # This method has too many local variables. pylint: disable=R0914
2802 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2804 if not self.my_node_names:
2806 feedback_fn("* Empty node group, skipping verification")
2810 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2811 verbose = self.op.verbose
2812 self._feedback_fn = feedback_fn
2814 vg_name = self.cfg.GetVGName()
2815 drbd_helper = self.cfg.GetDRBDHelper()
2816 cluster = self.cfg.GetClusterInfo()
2817 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2818 hypervisors = cluster.enabled_hypervisors
2819 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2821 i_non_redundant = [] # Non redundant instances
2822 i_non_a_balanced = [] # Non auto-balanced instances
2823 i_offline = 0 # Count of offline instances
2824 n_offline = 0 # Count of offline nodes
2825 n_drained = 0 # Count of nodes being drained
2826 node_vol_should = {}
2828 # FIXME: verify OS list
2831 filemap = _ComputeAncillaryFiles(cluster, False)
2833 # do local checksums
2834 master_node = self.master_node = self.cfg.GetMasterNode()
2835 master_ip = self.cfg.GetMasterIP()
2837 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2840 if self.cfg.GetUseExternalMipScript():
2841 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2843 node_verify_param = {
2844 constants.NV_FILELIST:
2845 utils.UniqueSequence(filename
2846 for files in filemap
2847 for filename in files),
2848 constants.NV_NODELIST:
2849 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2850 self.all_node_info.values()),
2851 constants.NV_HYPERVISOR: hypervisors,
2852 constants.NV_HVPARAMS:
2853 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2854 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2855 for node in node_data_list
2856 if not node.offline],
2857 constants.NV_INSTANCELIST: hypervisors,
2858 constants.NV_VERSION: None,
2859 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2860 constants.NV_NODESETUP: None,
2861 constants.NV_TIME: None,
2862 constants.NV_MASTERIP: (master_node, master_ip),
2863 constants.NV_OSLIST: None,
2864 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2865 constants.NV_USERSCRIPTS: user_scripts,
2868 if vg_name is not None:
2869 node_verify_param[constants.NV_VGLIST] = None
2870 node_verify_param[constants.NV_LVLIST] = vg_name
2871 node_verify_param[constants.NV_PVLIST] = [vg_name]
2872 node_verify_param[constants.NV_DRBDLIST] = None
2875 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2878 # FIXME: this needs to be changed per node-group, not cluster-wide
2880 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2881 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2882 bridges.add(default_nicpp[constants.NIC_LINK])
2883 for instance in self.my_inst_info.values():
2884 for nic in instance.nics:
2885 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2886 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2887 bridges.add(full_nic[constants.NIC_LINK])
2890 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2892 # Build our expected cluster state
2893 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2895 vm_capable=node.vm_capable))
2896 for node in node_data_list)
2900 for node in self.all_node_info.values():
2901 path = _SupportsOob(self.cfg, node)
2902 if path and path not in oob_paths:
2903 oob_paths.append(path)
2906 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2908 for instance in self.my_inst_names:
2909 inst_config = self.my_inst_info[instance]
2911 for nname in inst_config.all_nodes:
2912 if nname not in node_image:
2913 gnode = self.NodeImage(name=nname)
2914 gnode.ghost = (nname not in self.all_node_info)
2915 node_image[nname] = gnode
2917 inst_config.MapLVsByNode(node_vol_should)
2919 pnode = inst_config.primary_node
2920 node_image[pnode].pinst.append(instance)
2922 for snode in inst_config.secondary_nodes:
2923 nimg = node_image[snode]
2924 nimg.sinst.append(instance)
2925 if pnode not in nimg.sbp:
2926 nimg.sbp[pnode] = []
2927 nimg.sbp[pnode].append(instance)
2929 # At this point, we have the in-memory data structures complete,
2930 # except for the runtime information, which we'll gather next
2932 # Due to the way our RPC system works, exact response times cannot be
2933 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2934 # time before and after executing the request, we can at least have a time
2936 nvinfo_starttime = time.time()
2937 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2939 self.cfg.GetClusterName())
2940 nvinfo_endtime = time.time()
2942 if self.extra_lv_nodes and vg_name is not None:
2944 self.rpc.call_node_verify(self.extra_lv_nodes,
2945 {constants.NV_LVLIST: vg_name},
2946 self.cfg.GetClusterName())
2948 extra_lv_nvinfo = {}
2950 all_drbd_map = self.cfg.ComputeDRBDMap()
2952 feedback_fn("* Gathering disk information (%s nodes)" %
2953 len(self.my_node_names))
2954 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2957 feedback_fn("* Verifying configuration file consistency")
2959 # If not all nodes are being checked, we need to make sure the master node
2960 # and a non-checked vm_capable node are in the list.
2961 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2963 vf_nvinfo = all_nvinfo.copy()
2964 vf_node_info = list(self.my_node_info.values())
2965 additional_nodes = []
2966 if master_node not in self.my_node_info:
2967 additional_nodes.append(master_node)
2968 vf_node_info.append(self.all_node_info[master_node])
2969 # Add the first vm_capable node we find which is not included
2970 for node in absent_nodes:
2971 nodeinfo = self.all_node_info[node]
2972 if nodeinfo.vm_capable and not nodeinfo.offline:
2973 additional_nodes.append(node)
2974 vf_node_info.append(self.all_node_info[node])
2976 key = constants.NV_FILELIST
2977 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2978 {key: node_verify_param[key]},
2979 self.cfg.GetClusterName()))
2981 vf_nvinfo = all_nvinfo
2982 vf_node_info = self.my_node_info.values()
2984 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2986 feedback_fn("* Verifying node status")
2990 for node_i in node_data_list:
2992 nimg = node_image[node]
2996 feedback_fn("* Skipping offline node %s" % (node,))
3000 if node == master_node:
3002 elif node_i.master_candidate:
3003 ntype = "master candidate"
3004 elif node_i.drained:
3010 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3012 msg = all_nvinfo[node].fail_msg
3013 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3016 nimg.rpc_fail = True
3019 nresult = all_nvinfo[node].payload
3021 nimg.call_ok = self._VerifyNode(node_i, nresult)
3022 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3023 self._VerifyNodeNetwork(node_i, nresult)
3024 self._VerifyNodeUserScripts(node_i, nresult)
3025 self._VerifyOob(node_i, nresult)
3028 self._VerifyNodeLVM(node_i, nresult, vg_name)
3029 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3032 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3033 self._UpdateNodeInstances(node_i, nresult, nimg)
3034 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3035 self._UpdateNodeOS(node_i, nresult, nimg)
3037 if not nimg.os_fail:
3038 if refos_img is None:
3040 self._VerifyNodeOS(node_i, nimg, refos_img)
3041 self._VerifyNodeBridges(node_i, nresult, bridges)
3043 # Check whether all running instancies are primary for the node. (This
3044 # can no longer be done from _VerifyInstance below, since some of the
3045 # wrong instances could be from other node groups.)
3046 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3048 for inst in non_primary_inst:
3049 # FIXME: investigate best way to handle offline insts
3050 if inst.admin_state == constants.ADMINST_OFFLINE:
3052 feedback_fn("* Skipping offline instance %s" % inst.name)
3055 test = inst in self.all_inst_info
3056 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3057 "instance should not run on node %s", node_i.name)
3058 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3059 "node is running unknown instance %s", inst)
3061 for node, result in extra_lv_nvinfo.items():
3062 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3063 node_image[node], vg_name)
3065 feedback_fn("* Verifying instance status")
3066 for instance in self.my_inst_names:
3068 feedback_fn("* Verifying instance %s" % instance)
3069 inst_config = self.my_inst_info[instance]
3070 self._VerifyInstance(instance, inst_config, node_image,
3072 inst_nodes_offline = []
3074 pnode = inst_config.primary_node
3075 pnode_img = node_image[pnode]
3076 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3077 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3078 " primary node failed", instance)
3080 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3082 constants.CV_EINSTANCEBADNODE, instance,
3083 "instance is marked as running and lives on offline node %s",
3084 inst_config.primary_node)
3086 # If the instance is non-redundant we cannot survive losing its primary
3087 # node, so we are not N+1 compliant. On the other hand we have no disk
3088 # templates with more than one secondary so that situation is not well
3090 # FIXME: does not support file-backed instances
3091 if not inst_config.secondary_nodes:
3092 i_non_redundant.append(instance)
3094 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3095 constants.CV_EINSTANCELAYOUT,
3096 instance, "instance has multiple secondary nodes: %s",
3097 utils.CommaJoin(inst_config.secondary_nodes),
3098 code=self.ETYPE_WARNING)
3100 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3101 pnode = inst_config.primary_node
3102 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3103 instance_groups = {}
3105 for node in instance_nodes:
3106 instance_groups.setdefault(self.all_node_info[node].group,
3110 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3111 # Sort so that we always list the primary node first.
3112 for group, nodes in sorted(instance_groups.items(),
3113 key=lambda (_, nodes): pnode in nodes,
3116 self._ErrorIf(len(instance_groups) > 1,
3117 constants.CV_EINSTANCESPLITGROUPS,
3118 instance, "instance has primary and secondary nodes in"
3119 " different groups: %s", utils.CommaJoin(pretty_list),
3120 code=self.ETYPE_WARNING)
3122 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3123 i_non_a_balanced.append(instance)
3125 for snode in inst_config.secondary_nodes:
3126 s_img = node_image[snode]
3127 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3128 snode, "instance %s, connection to secondary node failed",
3132 inst_nodes_offline.append(snode)
3134 # warn that the instance lives on offline nodes
3135 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3136 "instance has offline secondary node(s) %s",
3137 utils.CommaJoin(inst_nodes_offline))
3138 # ... or ghost/non-vm_capable nodes
3139 for node in inst_config.all_nodes:
3140 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3141 instance, "instance lives on ghost node %s", node)
3142 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3143 instance, "instance lives on non-vm_capable node %s", node)
3145 feedback_fn("* Verifying orphan volumes")
3146 reserved = utils.FieldSet(*cluster.reserved_lvs)
3148 # We will get spurious "unknown volume" warnings if any node of this group
3149 # is secondary for an instance whose primary is in another group. To avoid
3150 # them, we find these instances and add their volumes to node_vol_should.
3151 for inst in self.all_inst_info.values():
3152 for secondary in inst.secondary_nodes:
3153 if (secondary in self.my_node_info
3154 and inst.name not in self.my_inst_info):
3155 inst.MapLVsByNode(node_vol_should)
3158 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3160 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3161 feedback_fn("* Verifying N+1 Memory redundancy")
3162 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3164 feedback_fn("* Other Notes")
3166 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3167 % len(i_non_redundant))
3169 if i_non_a_balanced:
3170 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3171 % len(i_non_a_balanced))
3174 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3177 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3180 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3184 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3185 """Analyze the post-hooks' result
3187 This method analyses the hook result, handles it, and sends some
3188 nicely-formatted feedback back to the user.
3190 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3191 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3192 @param hooks_results: the results of the multi-node hooks rpc call
3193 @param feedback_fn: function used send feedback back to the caller
3194 @param lu_result: previous Exec result
3195 @return: the new Exec result, based on the previous result
3199 # We only really run POST phase hooks, only for non-empty groups,
3200 # and are only interested in their results
3201 if not self.my_node_names:
3204 elif phase == constants.HOOKS_PHASE_POST:
3205 # Used to change hooks' output to proper indentation
3206 feedback_fn("* Hooks Results")
3207 assert hooks_results, "invalid result from hooks"
3209 for node_name in hooks_results:
3210 res = hooks_results[node_name]
3212 test = msg and not res.offline
3213 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3214 "Communication failure in hooks execution: %s", msg)
3215 if res.offline or msg:
3216 # No need to investigate payload if node is offline or gave
3219 for script, hkr, output in res.payload:
3220 test = hkr == constants.HKR_FAIL
3221 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3222 "Script %s failed, output:", script)
3224 output = self._HOOKS_INDENT_RE.sub(" ", output)
3225 feedback_fn("%s" % output)
3231 class LUClusterVerifyDisks(NoHooksLU):
3232 """Verifies the cluster disks status.
3237 def ExpandNames(self):
3238 self.share_locks = _ShareAll()
3239 self.needed_locks = {
3240 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3243 def Exec(self, feedback_fn):
3244 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3246 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3247 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3248 for group in group_names])
3251 class LUGroupVerifyDisks(NoHooksLU):
3252 """Verifies the status of all disks in a node group.
3257 def ExpandNames(self):
3258 # Raises errors.OpPrereqError on its own if group can't be found
3259 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3261 self.share_locks = _ShareAll()
3262 self.needed_locks = {
3263 locking.LEVEL_INSTANCE: [],
3264 locking.LEVEL_NODEGROUP: [],
3265 locking.LEVEL_NODE: [],
3268 def DeclareLocks(self, level):
3269 if level == locking.LEVEL_INSTANCE:
3270 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3272 # Lock instances optimistically, needs verification once node and group
3273 # locks have been acquired
3274 self.needed_locks[locking.LEVEL_INSTANCE] = \
3275 self.cfg.GetNodeGroupInstances(self.group_uuid)
3277 elif level == locking.LEVEL_NODEGROUP:
3278 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3280 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3281 set([self.group_uuid] +
3282 # Lock all groups used by instances optimistically; this requires
3283 # going via the node before it's locked, requiring verification
3286 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3287 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3289 elif level == locking.LEVEL_NODE:
3290 # This will only lock the nodes in the group to be verified which contain
3292 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3293 self._LockInstancesNodes()
3295 # Lock all nodes in group to be verified
3296 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3297 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3298 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3300 def CheckPrereq(self):
3301 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3302 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3303 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3305 assert self.group_uuid in owned_groups
3307 # Check if locked instances are still correct
3308 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3310 # Get instance information
3311 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3313 # Check if node groups for locked instances are still correct
3314 for (instance_name, inst) in self.instances.items():
3315 assert owned_nodes.issuperset(inst.all_nodes), \
3316 "Instance %s's nodes changed while we kept the lock" % instance_name
3318 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3321 assert self.group_uuid in inst_groups, \
3322 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3324 def Exec(self, feedback_fn):
3325 """Verify integrity of cluster disks.
3327 @rtype: tuple of three items
3328 @return: a tuple of (dict of node-to-node_error, list of instances
3329 which need activate-disks, dict of instance: (node, volume) for
3334 res_instances = set()
3337 nv_dict = _MapInstanceDisksToNodes([inst
3338 for inst in self.instances.values()
3339 if inst.admin_state == constants.ADMINST_UP])
3342 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3343 set(self.cfg.GetVmCapableNodeList()))
3345 node_lvs = self.rpc.call_lv_list(nodes, [])
3347 for (node, node_res) in node_lvs.items():
3348 if node_res.offline:
3351 msg = node_res.fail_msg
3353 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3354 res_nodes[node] = msg
3357 for lv_name, (_, _, lv_online) in node_res.payload.items():
3358 inst = nv_dict.pop((node, lv_name), None)
3359 if not (lv_online or inst is None):
3360 res_instances.add(inst)
3362 # any leftover items in nv_dict are missing LVs, let's arrange the data
3364 for key, inst in nv_dict.iteritems():
3365 res_missing.setdefault(inst, []).append(list(key))
3367 return (res_nodes, list(res_instances), res_missing)
3370 class LUClusterRepairDiskSizes(NoHooksLU):
3371 """Verifies the cluster disks sizes.
3376 def ExpandNames(self):
3377 if self.op.instances:
3378 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3379 self.needed_locks = {
3380 locking.LEVEL_NODE_RES: [],
3381 locking.LEVEL_INSTANCE: self.wanted_names,
3383 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3385 self.wanted_names = None
3386 self.needed_locks = {
3387 locking.LEVEL_NODE_RES: locking.ALL_SET,
3388 locking.LEVEL_INSTANCE: locking.ALL_SET,
3390 self.share_locks = {
3391 locking.LEVEL_NODE_RES: 1,
3392 locking.LEVEL_INSTANCE: 0,
3395 def DeclareLocks(self, level):
3396 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3397 self._LockInstancesNodes(primary_only=True, level=level)
3399 def CheckPrereq(self):
3400 """Check prerequisites.
3402 This only checks the optional instance list against the existing names.
3405 if self.wanted_names is None:
3406 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3408 self.wanted_instances = \
3409 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3411 def _EnsureChildSizes(self, disk):
3412 """Ensure children of the disk have the needed disk size.
3414 This is valid mainly for DRBD8 and fixes an issue where the
3415 children have smaller disk size.
3417 @param disk: an L{ganeti.objects.Disk} object
3420 if disk.dev_type == constants.LD_DRBD8:
3421 assert disk.children, "Empty children for DRBD8?"
3422 fchild = disk.children[0]
3423 mismatch = fchild.size < disk.size
3425 self.LogInfo("Child disk has size %d, parent %d, fixing",
3426 fchild.size, disk.size)
3427 fchild.size = disk.size
3429 # and we recurse on this child only, not on the metadev
3430 return self._EnsureChildSizes(fchild) or mismatch
3434 def Exec(self, feedback_fn):
3435 """Verify the size of cluster disks.
3438 # TODO: check child disks too
3439 # TODO: check differences in size between primary/secondary nodes
3441 for instance in self.wanted_instances:
3442 pnode = instance.primary_node
3443 if pnode not in per_node_disks:
3444 per_node_disks[pnode] = []
3445 for idx, disk in enumerate(instance.disks):
3446 per_node_disks[pnode].append((instance, idx, disk))
3448 assert not (frozenset(per_node_disks.keys()) -
3449 self.owned_locks(locking.LEVEL_NODE_RES)), \
3450 "Not owning correct locks"
3451 assert not self.owned_locks(locking.LEVEL_NODE)
3454 for node, dskl in per_node_disks.items():
3455 newl = [v[2].Copy() for v in dskl]
3457 self.cfg.SetDiskID(dsk, node)
3458 result = self.rpc.call_blockdev_getsize(node, newl)
3460 self.LogWarning("Failure in blockdev_getsize call to node"
3461 " %s, ignoring", node)
3463 if len(result.payload) != len(dskl):
3464 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3465 " result.payload=%s", node, len(dskl), result.payload)
3466 self.LogWarning("Invalid result from node %s, ignoring node results",
3469 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3471 self.LogWarning("Disk %d of instance %s did not return size"
3472 " information, ignoring", idx, instance.name)
3474 if not isinstance(size, (int, long)):
3475 self.LogWarning("Disk %d of instance %s did not return valid"
3476 " size information, ignoring", idx, instance.name)
3479 if size != disk.size:
3480 self.LogInfo("Disk %d of instance %s has mismatched size,"
3481 " correcting: recorded %d, actual %d", idx,
3482 instance.name, disk.size, size)
3484 self.cfg.Update(instance, feedback_fn)
3485 changed.append((instance.name, idx, size))
3486 if self._EnsureChildSizes(disk):
3487 self.cfg.Update(instance, feedback_fn)
3488 changed.append((instance.name, idx, disk.size))
3492 class LUClusterRename(LogicalUnit):
3493 """Rename the cluster.
3496 HPATH = "cluster-rename"
3497 HTYPE = constants.HTYPE_CLUSTER
3499 def BuildHooksEnv(self):
3504 "OP_TARGET": self.cfg.GetClusterName(),
3505 "NEW_NAME": self.op.name,
3508 def BuildHooksNodes(self):
3509 """Build hooks nodes.
3512 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3514 def CheckPrereq(self):
3515 """Verify that the passed name is a valid one.
3518 hostname = netutils.GetHostname(name=self.op.name,
3519 family=self.cfg.GetPrimaryIPFamily())
3521 new_name = hostname.name
3522 self.ip = new_ip = hostname.ip
3523 old_name = self.cfg.GetClusterName()
3524 old_ip = self.cfg.GetMasterIP()
3525 if new_name == old_name and new_ip == old_ip:
3526 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3527 " cluster has changed",
3529 if new_ip != old_ip:
3530 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3531 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3532 " reachable on the network" %
3533 new_ip, errors.ECODE_NOTUNIQUE)
3535 self.op.name = new_name
3537 def Exec(self, feedback_fn):
3538 """Rename the cluster.
3541 clustername = self.op.name
3544 # shutdown the master IP
3545 master_params = self.cfg.GetMasterNetworkParameters()
3546 ems = self.cfg.GetUseExternalMipScript()
3547 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3549 result.Raise("Could not disable the master role")
3552 cluster = self.cfg.GetClusterInfo()
3553 cluster.cluster_name = clustername
3554 cluster.master_ip = new_ip
3555 self.cfg.Update(cluster, feedback_fn)
3557 # update the known hosts file
3558 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3559 node_list = self.cfg.GetOnlineNodeList()
3561 node_list.remove(master_params.name)
3564 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3566 master_params.ip = new_ip
3567 result = self.rpc.call_node_activate_master_ip(master_params.name,
3569 msg = result.fail_msg
3571 self.LogWarning("Could not re-enable the master role on"
3572 " the master, please restart manually: %s", msg)
3577 def _ValidateNetmask(cfg, netmask):
3578 """Checks if a netmask is valid.
3580 @type cfg: L{config.ConfigWriter}
3581 @param cfg: The cluster configuration
3583 @param netmask: the netmask to be verified
3584 @raise errors.OpPrereqError: if the validation fails
3587 ip_family = cfg.GetPrimaryIPFamily()
3589 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3590 except errors.ProgrammerError:
3591 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3593 if not ipcls.ValidateNetmask(netmask):
3594 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3598 class LUClusterSetParams(LogicalUnit):
3599 """Change the parameters of the cluster.
3602 HPATH = "cluster-modify"
3603 HTYPE = constants.HTYPE_CLUSTER
3606 def CheckArguments(self):
3610 if self.op.uid_pool:
3611 uidpool.CheckUidPool(self.op.uid_pool)
3613 if self.op.add_uids:
3614 uidpool.CheckUidPool(self.op.add_uids)
3616 if self.op.remove_uids:
3617 uidpool.CheckUidPool(self.op.remove_uids)
3619 if self.op.master_netmask is not None:
3620 _ValidateNetmask(self.cfg, self.op.master_netmask)
3622 if self.op.diskparams:
3623 for dt_params in self.op.diskparams.values():
3624 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3626 def ExpandNames(self):
3627 # FIXME: in the future maybe other cluster params won't require checking on
3628 # all nodes to be modified.
3629 self.needed_locks = {
3630 locking.LEVEL_NODE: locking.ALL_SET,
3632 self.share_locks[locking.LEVEL_NODE] = 1
3634 def BuildHooksEnv(self):
3639 "OP_TARGET": self.cfg.GetClusterName(),
3640 "NEW_VG_NAME": self.op.vg_name,
3643 def BuildHooksNodes(self):
3644 """Build hooks nodes.
3647 mn = self.cfg.GetMasterNode()
3650 def CheckPrereq(self):
3651 """Check prerequisites.
3653 This checks whether the given params don't conflict and
3654 if the given volume group is valid.
3657 if self.op.vg_name is not None and not self.op.vg_name:
3658 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3659 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3660 " instances exist", errors.ECODE_INVAL)
3662 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3663 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3664 raise errors.OpPrereqError("Cannot disable drbd helper while"
3665 " drbd-based instances exist",
3668 node_list = self.owned_locks(locking.LEVEL_NODE)
3670 # if vg_name not None, checks given volume group on all nodes
3672 vglist = self.rpc.call_vg_list(node_list)
3673 for node in node_list:
3674 msg = vglist[node].fail_msg
3676 # ignoring down node
3677 self.LogWarning("Error while gathering data on node %s"
3678 " (ignoring node): %s", node, msg)
3680 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3682 constants.MIN_VG_SIZE)
3684 raise errors.OpPrereqError("Error on node '%s': %s" %
3685 (node, vgstatus), errors.ECODE_ENVIRON)
3687 if self.op.drbd_helper:
3688 # checks given drbd helper on all nodes
3689 helpers = self.rpc.call_drbd_helper(node_list)
3690 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3692 self.LogInfo("Not checking drbd helper on offline node %s", node)
3694 msg = helpers[node].fail_msg
3696 raise errors.OpPrereqError("Error checking drbd helper on node"
3697 " '%s': %s" % (node, msg),
3698 errors.ECODE_ENVIRON)
3699 node_helper = helpers[node].payload
3700 if node_helper != self.op.drbd_helper:
3701 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3702 (node, node_helper), errors.ECODE_ENVIRON)
3704 self.cluster = cluster = self.cfg.GetClusterInfo()
3705 # validate params changes
3706 if self.op.beparams:
3707 objects.UpgradeBeParams(self.op.beparams)
3708 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3709 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3711 if self.op.ndparams:
3712 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3713 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3715 # TODO: we need a more general way to handle resetting
3716 # cluster-level parameters to default values
3717 if self.new_ndparams["oob_program"] == "":
3718 self.new_ndparams["oob_program"] = \
3719 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3721 if self.op.hv_state:
3722 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3723 self.cluster.hv_state_static)
3724 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3725 for hv, values in new_hv_state.items())
3727 if self.op.disk_state:
3728 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3729 self.cluster.disk_state_static)
3730 self.new_disk_state = \
3731 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3732 for name, values in svalues.items()))
3733 for storage, svalues in new_disk_state.items())
3737 for key, value in self.op.ipolicy.items():
3738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3739 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3741 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3742 self.new_ipolicy = ipolicy
3744 if self.op.nicparams:
3745 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3746 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3747 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3750 # check all instances for consistency
3751 for instance in self.cfg.GetAllInstancesInfo().values():
3752 for nic_idx, nic in enumerate(instance.nics):
3753 params_copy = copy.deepcopy(nic.nicparams)
3754 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3756 # check parameter syntax
3758 objects.NIC.CheckParameterSyntax(params_filled)
3759 except errors.ConfigurationError, err:
3760 nic_errors.append("Instance %s, nic/%d: %s" %
3761 (instance.name, nic_idx, err))
3763 # if we're moving instances to routed, check that they have an ip
3764 target_mode = params_filled[constants.NIC_MODE]
3765 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3766 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3767 " address" % (instance.name, nic_idx))
3769 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3770 "\n".join(nic_errors))
3772 # hypervisor list/parameters
3773 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3774 if self.op.hvparams:
3775 for hv_name, hv_dict in self.op.hvparams.items():
3776 if hv_name not in self.new_hvparams:
3777 self.new_hvparams[hv_name] = hv_dict
3779 self.new_hvparams[hv_name].update(hv_dict)
3781 # disk template parameters
3782 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3783 if self.op.diskparams:
3784 for dt_name, dt_params in self.op.diskparams.items():
3785 if dt_name not in self.op.diskparams:
3786 self.new_diskparams[dt_name] = dt_params
3788 self.new_diskparams[dt_name].update(dt_params)
3790 # os hypervisor parameters
3791 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3793 for os_name, hvs in self.op.os_hvp.items():
3794 if os_name not in self.new_os_hvp:
3795 self.new_os_hvp[os_name] = hvs
3797 for hv_name, hv_dict in hvs.items():
3798 if hv_name not in self.new_os_hvp[os_name]:
3799 self.new_os_hvp[os_name][hv_name] = hv_dict
3801 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3804 self.new_osp = objects.FillDict(cluster.osparams, {})
3805 if self.op.osparams:
3806 for os_name, osp in self.op.osparams.items():
3807 if os_name not in self.new_osp:
3808 self.new_osp[os_name] = {}
3810 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3813 if not self.new_osp[os_name]:
3814 # we removed all parameters
3815 del self.new_osp[os_name]
3817 # check the parameter validity (remote check)
3818 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3819 os_name, self.new_osp[os_name])
3821 # changes to the hypervisor list
3822 if self.op.enabled_hypervisors is not None:
3823 self.hv_list = self.op.enabled_hypervisors
3824 for hv in self.hv_list:
3825 # if the hypervisor doesn't already exist in the cluster
3826 # hvparams, we initialize it to empty, and then (in both
3827 # cases) we make sure to fill the defaults, as we might not
3828 # have a complete defaults list if the hypervisor wasn't
3830 if hv not in new_hvp:
3832 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3833 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3835 self.hv_list = cluster.enabled_hypervisors
3837 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3838 # either the enabled list has changed, or the parameters have, validate
3839 for hv_name, hv_params in self.new_hvparams.items():
3840 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3841 (self.op.enabled_hypervisors and
3842 hv_name in self.op.enabled_hypervisors)):
3843 # either this is a new hypervisor, or its parameters have changed
3844 hv_class = hypervisor.GetHypervisor(hv_name)
3845 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3846 hv_class.CheckParameterSyntax(hv_params)
3847 _CheckHVParams(self, node_list, hv_name, hv_params)
3850 # no need to check any newly-enabled hypervisors, since the
3851 # defaults have already been checked in the above code-block
3852 for os_name, os_hvp in self.new_os_hvp.items():
3853 for hv_name, hv_params in os_hvp.items():
3854 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3855 # we need to fill in the new os_hvp on top of the actual hv_p
3856 cluster_defaults = self.new_hvparams.get(hv_name, {})
3857 new_osp = objects.FillDict(cluster_defaults, hv_params)
3858 hv_class = hypervisor.GetHypervisor(hv_name)
3859 hv_class.CheckParameterSyntax(new_osp)
3860 _CheckHVParams(self, node_list, hv_name, new_osp)
3862 if self.op.default_iallocator:
3863 alloc_script = utils.FindFile(self.op.default_iallocator,
3864 constants.IALLOCATOR_SEARCH_PATH,
3866 if alloc_script is None:
3867 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3868 " specified" % self.op.default_iallocator,
3871 def Exec(self, feedback_fn):
3872 """Change the parameters of the cluster.
3875 if self.op.vg_name is not None:
3876 new_volume = self.op.vg_name
3879 if new_volume != self.cfg.GetVGName():
3880 self.cfg.SetVGName(new_volume)
3882 feedback_fn("Cluster LVM configuration already in desired"
3883 " state, not changing")
3884 if self.op.drbd_helper is not None:
3885 new_helper = self.op.drbd_helper
3888 if new_helper != self.cfg.GetDRBDHelper():
3889 self.cfg.SetDRBDHelper(new_helper)
3891 feedback_fn("Cluster DRBD helper already in desired state,"
3893 if self.op.hvparams:
3894 self.cluster.hvparams = self.new_hvparams
3896 self.cluster.os_hvp = self.new_os_hvp
3897 if self.op.enabled_hypervisors is not None:
3898 self.cluster.hvparams = self.new_hvparams
3899 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3900 if self.op.beparams:
3901 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3902 if self.op.nicparams:
3903 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3905 self.cluster.ipolicy = self.new_ipolicy
3906 if self.op.osparams:
3907 self.cluster.osparams = self.new_osp
3908 if self.op.ndparams:
3909 self.cluster.ndparams = self.new_ndparams
3910 if self.op.diskparams:
3911 self.cluster.diskparams = self.new_diskparams
3912 if self.op.hv_state:
3913 self.cluster.hv_state_static = self.new_hv_state
3914 if self.op.disk_state:
3915 self.cluster.disk_state_static = self.new_disk_state
3917 if self.op.candidate_pool_size is not None:
3918 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3919 # we need to update the pool size here, otherwise the save will fail
3920 _AdjustCandidatePool(self, [])
3922 if self.op.maintain_node_health is not None:
3923 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3924 feedback_fn("Note: CONFD was disabled at build time, node health"
3925 " maintenance is not useful (still enabling it)")
3926 self.cluster.maintain_node_health = self.op.maintain_node_health
3928 if self.op.prealloc_wipe_disks is not None:
3929 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3931 if self.op.add_uids is not None:
3932 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3934 if self.op.remove_uids is not None:
3935 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3937 if self.op.uid_pool is not None:
3938 self.cluster.uid_pool = self.op.uid_pool
3940 if self.op.default_iallocator is not None:
3941 self.cluster.default_iallocator = self.op.default_iallocator
3943 if self.op.reserved_lvs is not None:
3944 self.cluster.reserved_lvs = self.op.reserved_lvs
3946 if self.op.use_external_mip_script is not None:
3947 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3949 def helper_os(aname, mods, desc):
3951 lst = getattr(self.cluster, aname)
3952 for key, val in mods:
3953 if key == constants.DDM_ADD:
3955 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3958 elif key == constants.DDM_REMOVE:
3962 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3964 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3966 if self.op.hidden_os:
3967 helper_os("hidden_os", self.op.hidden_os, "hidden")
3969 if self.op.blacklisted_os:
3970 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3972 if self.op.master_netdev:
3973 master_params = self.cfg.GetMasterNetworkParameters()
3974 ems = self.cfg.GetUseExternalMipScript()
3975 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3976 self.cluster.master_netdev)
3977 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3979 result.Raise("Could not disable the master ip")
3980 feedback_fn("Changing master_netdev from %s to %s" %
3981 (master_params.netdev, self.op.master_netdev))
3982 self.cluster.master_netdev = self.op.master_netdev
3984 if self.op.master_netmask:
3985 master_params = self.cfg.GetMasterNetworkParameters()
3986 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3987 result = self.rpc.call_node_change_master_netmask(master_params.name,
3988 master_params.netmask,
3989 self.op.master_netmask,
3991 master_params.netdev)
3993 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3996 self.cluster.master_netmask = self.op.master_netmask
3998 self.cfg.Update(self.cluster, feedback_fn)
4000 if self.op.master_netdev:
4001 master_params = self.cfg.GetMasterNetworkParameters()
4002 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4003 self.op.master_netdev)
4004 ems = self.cfg.GetUseExternalMipScript()
4005 result = self.rpc.call_node_activate_master_ip(master_params.name,
4008 self.LogWarning("Could not re-enable the master ip on"
4009 " the master, please restart manually: %s",
4013 def _UploadHelper(lu, nodes, fname):
4014 """Helper for uploading a file and showing warnings.
4017 if os.path.exists(fname):
4018 result = lu.rpc.call_upload_file(nodes, fname)
4019 for to_node, to_result in result.items():
4020 msg = to_result.fail_msg
4022 msg = ("Copy of file %s to node %s failed: %s" %
4023 (fname, to_node, msg))
4024 lu.proc.LogWarning(msg)
4027 def _ComputeAncillaryFiles(cluster, redist):
4028 """Compute files external to Ganeti which need to be consistent.
4030 @type redist: boolean
4031 @param redist: Whether to include files which need to be redistributed
4034 # Compute files for all nodes
4036 constants.SSH_KNOWN_HOSTS_FILE,
4037 constants.CONFD_HMAC_KEY,
4038 constants.CLUSTER_DOMAIN_SECRET_FILE,
4039 constants.SPICE_CERT_FILE,
4040 constants.SPICE_CACERT_FILE,
4041 constants.RAPI_USERS_FILE,
4045 files_all.update(constants.ALL_CERT_FILES)
4046 files_all.update(ssconf.SimpleStore().GetFileList())
4048 # we need to ship at least the RAPI certificate
4049 files_all.add(constants.RAPI_CERT_FILE)
4051 if cluster.modify_etc_hosts:
4052 files_all.add(constants.ETC_HOSTS)
4054 # Files which are optional, these must:
4055 # - be present in one other category as well
4056 # - either exist or not exist on all nodes of that category (mc, vm all)
4058 constants.RAPI_USERS_FILE,
4061 # Files which should only be on master candidates
4065 files_mc.add(constants.CLUSTER_CONF_FILE)
4067 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4069 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4071 # Files which should only be on VM-capable nodes
4072 files_vm = set(filename
4073 for hv_name in cluster.enabled_hypervisors
4074 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4076 files_opt |= set(filename
4077 for hv_name in cluster.enabled_hypervisors
4078 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4080 # Filenames in each category must be unique
4081 all_files_set = files_all | files_mc | files_vm
4082 assert (len(all_files_set) ==
4083 sum(map(len, [files_all, files_mc, files_vm]))), \
4084 "Found file listed in more than one file list"
4086 # Optional files must be present in one other category
4087 assert all_files_set.issuperset(files_opt), \
4088 "Optional file not in a different required list"
4090 return (files_all, files_opt, files_mc, files_vm)
4093 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4094 """Distribute additional files which are part of the cluster configuration.
4096 ConfigWriter takes care of distributing the config and ssconf files, but
4097 there are more files which should be distributed to all nodes. This function
4098 makes sure those are copied.
4100 @param lu: calling logical unit
4101 @param additional_nodes: list of nodes not in the config to distribute to
4102 @type additional_vm: boolean
4103 @param additional_vm: whether the additional nodes are vm-capable or not
4106 # Gather target nodes
4107 cluster = lu.cfg.GetClusterInfo()
4108 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4110 online_nodes = lu.cfg.GetOnlineNodeList()
4111 vm_nodes = lu.cfg.GetVmCapableNodeList()
4113 if additional_nodes is not None:
4114 online_nodes.extend(additional_nodes)
4116 vm_nodes.extend(additional_nodes)
4118 # Never distribute to master node
4119 for nodelist in [online_nodes, vm_nodes]:
4120 if master_info.name in nodelist:
4121 nodelist.remove(master_info.name)
4124 (files_all, _, files_mc, files_vm) = \
4125 _ComputeAncillaryFiles(cluster, True)
4127 # Never re-distribute configuration file from here
4128 assert not (constants.CLUSTER_CONF_FILE in files_all or
4129 constants.CLUSTER_CONF_FILE in files_vm)
4130 assert not files_mc, "Master candidates not handled in this function"
4133 (online_nodes, files_all),
4134 (vm_nodes, files_vm),
4138 for (node_list, files) in filemap:
4140 _UploadHelper(lu, node_list, fname)
4143 class LUClusterRedistConf(NoHooksLU):
4144 """Force the redistribution of cluster configuration.
4146 This is a very simple LU.
4151 def ExpandNames(self):
4152 self.needed_locks = {
4153 locking.LEVEL_NODE: locking.ALL_SET,
4155 self.share_locks[locking.LEVEL_NODE] = 1
4157 def Exec(self, feedback_fn):
4158 """Redistribute the configuration.
4161 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4162 _RedistributeAncillaryFiles(self)
4165 class LUClusterActivateMasterIp(NoHooksLU):
4166 """Activate the master IP on the master node.
4169 def Exec(self, feedback_fn):
4170 """Activate the master IP.
4173 master_params = self.cfg.GetMasterNetworkParameters()
4174 ems = self.cfg.GetUseExternalMipScript()
4175 result = self.rpc.call_node_activate_master_ip(master_params.name,
4177 result.Raise("Could not activate the master IP")
4180 class LUClusterDeactivateMasterIp(NoHooksLU):
4181 """Deactivate the master IP on the master node.
4184 def Exec(self, feedback_fn):
4185 """Deactivate the master IP.
4188 master_params = self.cfg.GetMasterNetworkParameters()
4189 ems = self.cfg.GetUseExternalMipScript()
4190 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4192 result.Raise("Could not deactivate the master IP")
4195 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4196 """Sleep and poll for an instance's disk to sync.
4199 if not instance.disks or disks is not None and not disks:
4202 disks = _ExpandCheckDisks(instance, disks)
4205 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4207 node = instance.primary_node
4210 lu.cfg.SetDiskID(dev, node)
4212 # TODO: Convert to utils.Retry
4215 degr_retries = 10 # in seconds, as we sleep 1 second each time
4219 cumul_degraded = False
4220 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4221 msg = rstats.fail_msg
4223 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4226 raise errors.RemoteError("Can't contact node %s for mirror data,"
4227 " aborting." % node)
4230 rstats = rstats.payload
4232 for i, mstat in enumerate(rstats):
4234 lu.LogWarning("Can't compute data for node %s/%s",
4235 node, disks[i].iv_name)
4238 cumul_degraded = (cumul_degraded or
4239 (mstat.is_degraded and mstat.sync_percent is None))
4240 if mstat.sync_percent is not None:
4242 if mstat.estimated_time is not None:
4243 rem_time = ("%s remaining (estimated)" %
4244 utils.FormatSeconds(mstat.estimated_time))
4245 max_time = mstat.estimated_time
4247 rem_time = "no time estimate"
4248 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4249 (disks[i].iv_name, mstat.sync_percent, rem_time))
4251 # if we're done but degraded, let's do a few small retries, to
4252 # make sure we see a stable and not transient situation; therefore
4253 # we force restart of the loop
4254 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4255 logging.info("Degraded disks found, %d retries left", degr_retries)
4263 time.sleep(min(60, max_time))
4266 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4267 return not cumul_degraded
4270 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4271 """Check that mirrors are not degraded.
4273 The ldisk parameter, if True, will change the test from the
4274 is_degraded attribute (which represents overall non-ok status for
4275 the device(s)) to the ldisk (representing the local storage status).
4278 lu.cfg.SetDiskID(dev, node)
4282 if on_primary or dev.AssembleOnSecondary():
4283 rstats = lu.rpc.call_blockdev_find(node, dev)
4284 msg = rstats.fail_msg
4286 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4288 elif not rstats.payload:
4289 lu.LogWarning("Can't find disk on node %s", node)
4293 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4295 result = result and not rstats.payload.is_degraded
4298 for child in dev.children:
4299 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4304 class LUOobCommand(NoHooksLU):
4305 """Logical unit for OOB handling.
4309 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4311 def ExpandNames(self):
4312 """Gather locks we need.
4315 if self.op.node_names:
4316 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4317 lock_names = self.op.node_names
4319 lock_names = locking.ALL_SET
4321 self.needed_locks = {
4322 locking.LEVEL_NODE: lock_names,
4325 def CheckPrereq(self):
4326 """Check prerequisites.
4329 - the node exists in the configuration
4332 Any errors are signaled by raising errors.OpPrereqError.
4336 self.master_node = self.cfg.GetMasterNode()
4338 assert self.op.power_delay >= 0.0
4340 if self.op.node_names:
4341 if (self.op.command in self._SKIP_MASTER and
4342 self.master_node in self.op.node_names):
4343 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4344 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4346 if master_oob_handler:
4347 additional_text = ("run '%s %s %s' if you want to operate on the"
4348 " master regardless") % (master_oob_handler,
4352 additional_text = "it does not support out-of-band operations"
4354 raise errors.OpPrereqError(("Operating on the master node %s is not"
4355 " allowed for %s; %s") %
4356 (self.master_node, self.op.command,
4357 additional_text), errors.ECODE_INVAL)
4359 self.op.node_names = self.cfg.GetNodeList()
4360 if self.op.command in self._SKIP_MASTER:
4361 self.op.node_names.remove(self.master_node)
4363 if self.op.command in self._SKIP_MASTER:
4364 assert self.master_node not in self.op.node_names
4366 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4368 raise errors.OpPrereqError("Node %s not found" % node_name,
4371 self.nodes.append(node)
4373 if (not self.op.ignore_status and
4374 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4375 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4376 " not marked offline") % node_name,
4379 def Exec(self, feedback_fn):
4380 """Execute OOB and return result if we expect any.
4383 master_node = self.master_node
4386 for idx, node in enumerate(utils.NiceSort(self.nodes,
4387 key=lambda node: node.name)):
4388 node_entry = [(constants.RS_NORMAL, node.name)]
4389 ret.append(node_entry)
4391 oob_program = _SupportsOob(self.cfg, node)
4394 node_entry.append((constants.RS_UNAVAIL, None))
4397 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4398 self.op.command, oob_program, node.name)
4399 result = self.rpc.call_run_oob(master_node, oob_program,
4400 self.op.command, node.name,
4404 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4405 node.name, result.fail_msg)
4406 node_entry.append((constants.RS_NODATA, None))
4409 self._CheckPayload(result)
4410 except errors.OpExecError, err:
4411 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4413 node_entry.append((constants.RS_NODATA, None))
4415 if self.op.command == constants.OOB_HEALTH:
4416 # For health we should log important events
4417 for item, status in result.payload:
4418 if status in [constants.OOB_STATUS_WARNING,
4419 constants.OOB_STATUS_CRITICAL]:
4420 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4421 item, node.name, status)
4423 if self.op.command == constants.OOB_POWER_ON:
4425 elif self.op.command == constants.OOB_POWER_OFF:
4426 node.powered = False
4427 elif self.op.command == constants.OOB_POWER_STATUS:
4428 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4429 if powered != node.powered:
4430 logging.warning(("Recorded power state (%s) of node '%s' does not"
4431 " match actual power state (%s)"), node.powered,
4434 # For configuration changing commands we should update the node
4435 if self.op.command in (constants.OOB_POWER_ON,
4436 constants.OOB_POWER_OFF):
4437 self.cfg.Update(node, feedback_fn)
4439 node_entry.append((constants.RS_NORMAL, result.payload))
4441 if (self.op.command == constants.OOB_POWER_ON and
4442 idx < len(self.nodes) - 1):
4443 time.sleep(self.op.power_delay)
4447 def _CheckPayload(self, result):
4448 """Checks if the payload is valid.
4450 @param result: RPC result
4451 @raises errors.OpExecError: If payload is not valid
4455 if self.op.command == constants.OOB_HEALTH:
4456 if not isinstance(result.payload, list):
4457 errs.append("command 'health' is expected to return a list but got %s" %
4458 type(result.payload))
4460 for item, status in result.payload:
4461 if status not in constants.OOB_STATUSES:
4462 errs.append("health item '%s' has invalid status '%s'" %
4465 if self.op.command == constants.OOB_POWER_STATUS:
4466 if not isinstance(result.payload, dict):
4467 errs.append("power-status is expected to return a dict but got %s" %
4468 type(result.payload))
4470 if self.op.command in [
4471 constants.OOB_POWER_ON,
4472 constants.OOB_POWER_OFF,
4473 constants.OOB_POWER_CYCLE,
4475 if result.payload is not None:
4476 errs.append("%s is expected to not return payload but got '%s'" %
4477 (self.op.command, result.payload))
4480 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4481 utils.CommaJoin(errs))
4484 class _OsQuery(_QueryBase):
4485 FIELDS = query.OS_FIELDS
4487 def ExpandNames(self, lu):
4488 # Lock all nodes in shared mode
4489 # Temporary removal of locks, should be reverted later
4490 # TODO: reintroduce locks when they are lighter-weight
4491 lu.needed_locks = {}
4492 #self.share_locks[locking.LEVEL_NODE] = 1
4493 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4495 # The following variables interact with _QueryBase._GetNames
4497 self.wanted = self.names
4499 self.wanted = locking.ALL_SET
4501 self.do_locking = self.use_locking
4503 def DeclareLocks(self, lu, level):
4507 def _DiagnoseByOS(rlist):
4508 """Remaps a per-node return list into an a per-os per-node dictionary
4510 @param rlist: a map with node names as keys and OS objects as values
4513 @return: a dictionary with osnames as keys and as value another
4514 map, with nodes as keys and tuples of (path, status, diagnose,
4515 variants, parameters, api_versions) as values, eg::
4517 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4518 (/srv/..., False, "invalid api")],
4519 "node2": [(/srv/..., True, "", [], [])]}
4524 # we build here the list of nodes that didn't fail the RPC (at RPC
4525 # level), so that nodes with a non-responding node daemon don't
4526 # make all OSes invalid
4527 good_nodes = [node_name for node_name in rlist
4528 if not rlist[node_name].fail_msg]
4529 for node_name, nr in rlist.items():
4530 if nr.fail_msg or not nr.payload:
4532 for (name, path, status, diagnose, variants,
4533 params, api_versions) in nr.payload:
4534 if name not in all_os:
4535 # build a list of nodes for this os containing empty lists
4536 # for each node in node_list
4538 for nname in good_nodes:
4539 all_os[name][nname] = []
4540 # convert params from [name, help] to (name, help)
4541 params = [tuple(v) for v in params]
4542 all_os[name][node_name].append((path, status, diagnose,
4543 variants, params, api_versions))
4546 def _GetQueryData(self, lu):
4547 """Computes the list of nodes and their attributes.
4550 # Locking is not used
4551 assert not (compat.any(lu.glm.is_owned(level)
4552 for level in locking.LEVELS
4553 if level != locking.LEVEL_CLUSTER) or
4554 self.do_locking or self.use_locking)
4556 valid_nodes = [node.name
4557 for node in lu.cfg.GetAllNodesInfo().values()
4558 if not node.offline and node.vm_capable]
4559 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4560 cluster = lu.cfg.GetClusterInfo()
4564 for (os_name, os_data) in pol.items():
4565 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4566 hidden=(os_name in cluster.hidden_os),
4567 blacklisted=(os_name in cluster.blacklisted_os))
4571 api_versions = set()
4573 for idx, osl in enumerate(os_data.values()):
4574 info.valid = bool(info.valid and osl and osl[0][1])
4578 (node_variants, node_params, node_api) = osl[0][3:6]
4581 variants.update(node_variants)
4582 parameters.update(node_params)
4583 api_versions.update(node_api)
4585 # Filter out inconsistent values
4586 variants.intersection_update(node_variants)
4587 parameters.intersection_update(node_params)
4588 api_versions.intersection_update(node_api)
4590 info.variants = list(variants)
4591 info.parameters = list(parameters)
4592 info.api_versions = list(api_versions)
4594 data[os_name] = info
4596 # Prepare data in requested order
4597 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4601 class LUOsDiagnose(NoHooksLU):
4602 """Logical unit for OS diagnose/query.
4608 def _BuildFilter(fields, names):
4609 """Builds a filter for querying OSes.
4612 name_filter = qlang.MakeSimpleFilter("name", names)
4614 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4615 # respective field is not requested
4616 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4617 for fname in ["hidden", "blacklisted"]
4618 if fname not in fields]
4619 if "valid" not in fields:
4620 status_filter.append([qlang.OP_TRUE, "valid"])
4623 status_filter.insert(0, qlang.OP_AND)
4625 status_filter = None
4627 if name_filter and status_filter:
4628 return [qlang.OP_AND, name_filter, status_filter]
4632 return status_filter
4634 def CheckArguments(self):
4635 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4636 self.op.output_fields, False)
4638 def ExpandNames(self):
4639 self.oq.ExpandNames(self)
4641 def Exec(self, feedback_fn):
4642 return self.oq.OldStyleQuery(self)
4645 class LUNodeRemove(LogicalUnit):
4646 """Logical unit for removing a node.
4649 HPATH = "node-remove"
4650 HTYPE = constants.HTYPE_NODE
4652 def BuildHooksEnv(self):
4655 This doesn't run on the target node in the pre phase as a failed
4656 node would then be impossible to remove.
4660 "OP_TARGET": self.op.node_name,
4661 "NODE_NAME": self.op.node_name,
4664 def BuildHooksNodes(self):
4665 """Build hooks nodes.
4668 all_nodes = self.cfg.GetNodeList()
4670 all_nodes.remove(self.op.node_name)
4672 logging.warning("Node '%s', which is about to be removed, was not found"
4673 " in the list of all nodes", self.op.node_name)
4674 return (all_nodes, all_nodes)
4676 def CheckPrereq(self):
4677 """Check prerequisites.
4680 - the node exists in the configuration
4681 - it does not have primary or secondary instances
4682 - it's not the master
4684 Any errors are signaled by raising errors.OpPrereqError.
4687 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4688 node = self.cfg.GetNodeInfo(self.op.node_name)
4689 assert node is not None
4691 masternode = self.cfg.GetMasterNode()
4692 if node.name == masternode:
4693 raise errors.OpPrereqError("Node is the master node, failover to another"
4694 " node is required", errors.ECODE_INVAL)
4696 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4697 if node.name in instance.all_nodes:
4698 raise errors.OpPrereqError("Instance %s is still running on the node,"
4699 " please remove first" % instance_name,
4701 self.op.node_name = node.name
4704 def Exec(self, feedback_fn):
4705 """Removes the node from the cluster.
4709 logging.info("Stopping the node daemon and removing configs from node %s",
4712 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4714 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4717 # Promote nodes to master candidate as needed
4718 _AdjustCandidatePool(self, exceptions=[node.name])
4719 self.context.RemoveNode(node.name)
4721 # Run post hooks on the node before it's removed
4722 _RunPostHook(self, node.name)
4724 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4725 msg = result.fail_msg
4727 self.LogWarning("Errors encountered on the remote node while leaving"
4728 " the cluster: %s", msg)
4730 # Remove node from our /etc/hosts
4731 if self.cfg.GetClusterInfo().modify_etc_hosts:
4732 master_node = self.cfg.GetMasterNode()
4733 result = self.rpc.call_etc_hosts_modify(master_node,
4734 constants.ETC_HOSTS_REMOVE,
4736 result.Raise("Can't update hosts file with new host data")
4737 _RedistributeAncillaryFiles(self)
4740 class _NodeQuery(_QueryBase):
4741 FIELDS = query.NODE_FIELDS
4743 def ExpandNames(self, lu):
4744 lu.needed_locks = {}
4745 lu.share_locks = _ShareAll()
4748 self.wanted = _GetWantedNodes(lu, self.names)
4750 self.wanted = locking.ALL_SET
4752 self.do_locking = (self.use_locking and
4753 query.NQ_LIVE in self.requested_data)
4756 # If any non-static field is requested we need to lock the nodes
4757 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4759 def DeclareLocks(self, lu, level):
4762 def _GetQueryData(self, lu):
4763 """Computes the list of nodes and their attributes.
4766 all_info = lu.cfg.GetAllNodesInfo()
4768 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4770 # Gather data as requested
4771 if query.NQ_LIVE in self.requested_data:
4772 # filter out non-vm_capable nodes
4773 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4775 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4776 [lu.cfg.GetHypervisorType()])
4777 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4778 for (name, nresult) in node_data.items()
4779 if not nresult.fail_msg and nresult.payload)
4783 if query.NQ_INST in self.requested_data:
4784 node_to_primary = dict([(name, set()) for name in nodenames])
4785 node_to_secondary = dict([(name, set()) for name in nodenames])
4787 inst_data = lu.cfg.GetAllInstancesInfo()
4789 for inst in inst_data.values():
4790 if inst.primary_node in node_to_primary:
4791 node_to_primary[inst.primary_node].add(inst.name)
4792 for secnode in inst.secondary_nodes:
4793 if secnode in node_to_secondary:
4794 node_to_secondary[secnode].add(inst.name)
4796 node_to_primary = None
4797 node_to_secondary = None
4799 if query.NQ_OOB in self.requested_data:
4800 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4801 for name, node in all_info.iteritems())
4805 if query.NQ_GROUP in self.requested_data:
4806 groups = lu.cfg.GetAllNodeGroupsInfo()
4810 return query.NodeQueryData([all_info[name] for name in nodenames],
4811 live_data, lu.cfg.GetMasterNode(),
4812 node_to_primary, node_to_secondary, groups,
4813 oob_support, lu.cfg.GetClusterInfo())
4816 class LUNodeQuery(NoHooksLU):
4817 """Logical unit for querying nodes.
4820 # pylint: disable=W0142
4823 def CheckArguments(self):
4824 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4825 self.op.output_fields, self.op.use_locking)
4827 def ExpandNames(self):
4828 self.nq.ExpandNames(self)
4830 def DeclareLocks(self, level):
4831 self.nq.DeclareLocks(self, level)
4833 def Exec(self, feedback_fn):
4834 return self.nq.OldStyleQuery(self)
4837 class LUNodeQueryvols(NoHooksLU):
4838 """Logical unit for getting volumes on node(s).
4842 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4843 _FIELDS_STATIC = utils.FieldSet("node")
4845 def CheckArguments(self):
4846 _CheckOutputFields(static=self._FIELDS_STATIC,
4847 dynamic=self._FIELDS_DYNAMIC,
4848 selected=self.op.output_fields)
4850 def ExpandNames(self):
4851 self.share_locks = _ShareAll()
4852 self.needed_locks = {}
4854 if not self.op.nodes:
4855 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4857 self.needed_locks[locking.LEVEL_NODE] = \
4858 _GetWantedNodes(self, self.op.nodes)
4860 def Exec(self, feedback_fn):
4861 """Computes the list of nodes and their attributes.
4864 nodenames = self.owned_locks(locking.LEVEL_NODE)
4865 volumes = self.rpc.call_node_volumes(nodenames)
4867 ilist = self.cfg.GetAllInstancesInfo()
4868 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4871 for node in nodenames:
4872 nresult = volumes[node]
4875 msg = nresult.fail_msg
4877 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4880 node_vols = sorted(nresult.payload,
4881 key=operator.itemgetter("dev"))
4883 for vol in node_vols:
4885 for field in self.op.output_fields:
4888 elif field == "phys":
4892 elif field == "name":
4894 elif field == "size":
4895 val = int(float(vol["size"]))
4896 elif field == "instance":
4897 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4899 raise errors.ParameterError(field)
4900 node_output.append(str(val))
4902 output.append(node_output)
4907 class LUNodeQueryStorage(NoHooksLU):
4908 """Logical unit for getting information on storage units on node(s).
4911 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4914 def CheckArguments(self):
4915 _CheckOutputFields(static=self._FIELDS_STATIC,
4916 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4917 selected=self.op.output_fields)
4919 def ExpandNames(self):
4920 self.share_locks = _ShareAll()
4921 self.needed_locks = {}
4924 self.needed_locks[locking.LEVEL_NODE] = \
4925 _GetWantedNodes(self, self.op.nodes)
4927 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4929 def Exec(self, feedback_fn):
4930 """Computes the list of nodes and their attributes.
4933 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4935 # Always get name to sort by
4936 if constants.SF_NAME in self.op.output_fields:
4937 fields = self.op.output_fields[:]
4939 fields = [constants.SF_NAME] + self.op.output_fields
4941 # Never ask for node or type as it's only known to the LU
4942 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4943 while extra in fields:
4944 fields.remove(extra)
4946 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4947 name_idx = field_idx[constants.SF_NAME]
4949 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4950 data = self.rpc.call_storage_list(self.nodes,
4951 self.op.storage_type, st_args,
4952 self.op.name, fields)
4956 for node in utils.NiceSort(self.nodes):
4957 nresult = data[node]
4961 msg = nresult.fail_msg
4963 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4966 rows = dict([(row[name_idx], row) for row in nresult.payload])
4968 for name in utils.NiceSort(rows.keys()):
4973 for field in self.op.output_fields:
4974 if field == constants.SF_NODE:
4976 elif field == constants.SF_TYPE:
4977 val = self.op.storage_type
4978 elif field in field_idx:
4979 val = row[field_idx[field]]
4981 raise errors.ParameterError(field)
4990 class _InstanceQuery(_QueryBase):
4991 FIELDS = query.INSTANCE_FIELDS
4993 def ExpandNames(self, lu):
4994 lu.needed_locks = {}
4995 lu.share_locks = _ShareAll()
4998 self.wanted = _GetWantedInstances(lu, self.names)
5000 self.wanted = locking.ALL_SET
5002 self.do_locking = (self.use_locking and
5003 query.IQ_LIVE in self.requested_data)
5005 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5007 lu.needed_locks[locking.LEVEL_NODE] = []
5008 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5010 self.do_grouplocks = (self.do_locking and
5011 query.IQ_NODES in self.requested_data)
5013 def DeclareLocks(self, lu, level):
5015 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5016 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5018 # Lock all groups used by instances optimistically; this requires going
5019 # via the node before it's locked, requiring verification later on
5020 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5022 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5023 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5024 elif level == locking.LEVEL_NODE:
5025 lu._LockInstancesNodes() # pylint: disable=W0212
5028 def _CheckGroupLocks(lu):
5029 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5030 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5032 # Check if node groups for locked instances are still correct
5033 for instance_name in owned_instances:
5034 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5036 def _GetQueryData(self, lu):
5037 """Computes the list of instances and their attributes.
5040 if self.do_grouplocks:
5041 self._CheckGroupLocks(lu)
5043 cluster = lu.cfg.GetClusterInfo()
5044 all_info = lu.cfg.GetAllInstancesInfo()
5046 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5048 instance_list = [all_info[name] for name in instance_names]
5049 nodes = frozenset(itertools.chain(*(inst.all_nodes
5050 for inst in instance_list)))
5051 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5054 wrongnode_inst = set()
5056 # Gather data as requested
5057 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5059 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5061 result = node_data[name]
5063 # offline nodes will be in both lists
5064 assert result.fail_msg
5065 offline_nodes.append(name)
5067 bad_nodes.append(name)
5068 elif result.payload:
5069 for inst in result.payload:
5070 if inst in all_info:
5071 if all_info[inst].primary_node == name:
5072 live_data.update(result.payload)
5074 wrongnode_inst.add(inst)
5076 # orphan instance; we don't list it here as we don't
5077 # handle this case yet in the output of instance listing
5078 logging.warning("Orphan instance '%s' found on node %s",
5080 # else no instance is alive
5084 if query.IQ_DISKUSAGE in self.requested_data:
5085 disk_usage = dict((inst.name,
5086 _ComputeDiskSize(inst.disk_template,
5087 [{constants.IDISK_SIZE: disk.size}
5088 for disk in inst.disks]))
5089 for inst in instance_list)
5093 if query.IQ_CONSOLE in self.requested_data:
5095 for inst in instance_list:
5096 if inst.name in live_data:
5097 # Instance is running
5098 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5100 consinfo[inst.name] = None
5101 assert set(consinfo.keys()) == set(instance_names)
5105 if query.IQ_NODES in self.requested_data:
5106 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5108 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5109 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5110 for uuid in set(map(operator.attrgetter("group"),
5116 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5117 disk_usage, offline_nodes, bad_nodes,
5118 live_data, wrongnode_inst, consinfo,
5122 class LUQuery(NoHooksLU):
5123 """Query for resources/items of a certain kind.
5126 # pylint: disable=W0142
5129 def CheckArguments(self):
5130 qcls = _GetQueryImplementation(self.op.what)
5132 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5134 def ExpandNames(self):
5135 self.impl.ExpandNames(self)
5137 def DeclareLocks(self, level):
5138 self.impl.DeclareLocks(self, level)
5140 def Exec(self, feedback_fn):
5141 return self.impl.NewStyleQuery(self)
5144 class LUQueryFields(NoHooksLU):
5145 """Query for resources/items of a certain kind.
5148 # pylint: disable=W0142
5151 def CheckArguments(self):
5152 self.qcls = _GetQueryImplementation(self.op.what)
5154 def ExpandNames(self):
5155 self.needed_locks = {}
5157 def Exec(self, feedback_fn):
5158 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5161 class LUNodeModifyStorage(NoHooksLU):
5162 """Logical unit for modifying a storage volume on a node.
5167 def CheckArguments(self):
5168 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5170 storage_type = self.op.storage_type
5173 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5175 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5176 " modified" % storage_type,
5179 diff = set(self.op.changes.keys()) - modifiable
5181 raise errors.OpPrereqError("The following fields can not be modified for"
5182 " storage units of type '%s': %r" %
5183 (storage_type, list(diff)),
5186 def ExpandNames(self):
5187 self.needed_locks = {
5188 locking.LEVEL_NODE: self.op.node_name,
5191 def Exec(self, feedback_fn):
5192 """Computes the list of nodes and their attributes.
5195 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5196 result = self.rpc.call_storage_modify(self.op.node_name,
5197 self.op.storage_type, st_args,
5198 self.op.name, self.op.changes)
5199 result.Raise("Failed to modify storage unit '%s' on %s" %
5200 (self.op.name, self.op.node_name))
5203 class LUNodeAdd(LogicalUnit):
5204 """Logical unit for adding node to the cluster.
5208 HTYPE = constants.HTYPE_NODE
5209 _NFLAGS = ["master_capable", "vm_capable"]
5211 def CheckArguments(self):
5212 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5213 # validate/normalize the node name
5214 self.hostname = netutils.GetHostname(name=self.op.node_name,
5215 family=self.primary_ip_family)
5216 self.op.node_name = self.hostname.name
5218 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5219 raise errors.OpPrereqError("Cannot readd the master node",
5222 if self.op.readd and self.op.group:
5223 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5224 " being readded", errors.ECODE_INVAL)
5226 def BuildHooksEnv(self):
5229 This will run on all nodes before, and on all nodes + the new node after.
5233 "OP_TARGET": self.op.node_name,
5234 "NODE_NAME": self.op.node_name,
5235 "NODE_PIP": self.op.primary_ip,
5236 "NODE_SIP": self.op.secondary_ip,
5237 "MASTER_CAPABLE": str(self.op.master_capable),
5238 "VM_CAPABLE": str(self.op.vm_capable),
5241 def BuildHooksNodes(self):
5242 """Build hooks nodes.
5245 # Exclude added node
5246 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5247 post_nodes = pre_nodes + [self.op.node_name, ]
5249 return (pre_nodes, post_nodes)
5251 def CheckPrereq(self):
5252 """Check prerequisites.
5255 - the new node is not already in the config
5257 - its parameters (single/dual homed) matches the cluster
5259 Any errors are signaled by raising errors.OpPrereqError.
5263 hostname = self.hostname
5264 node = hostname.name
5265 primary_ip = self.op.primary_ip = hostname.ip
5266 if self.op.secondary_ip is None:
5267 if self.primary_ip_family == netutils.IP6Address.family:
5268 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5269 " IPv4 address must be given as secondary",
5271 self.op.secondary_ip = primary_ip
5273 secondary_ip = self.op.secondary_ip
5274 if not netutils.IP4Address.IsValid(secondary_ip):
5275 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5276 " address" % secondary_ip, errors.ECODE_INVAL)
5278 node_list = cfg.GetNodeList()
5279 if not self.op.readd and node in node_list:
5280 raise errors.OpPrereqError("Node %s is already in the configuration" %
5281 node, errors.ECODE_EXISTS)
5282 elif self.op.readd and node not in node_list:
5283 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5286 self.changed_primary_ip = False
5288 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5289 if self.op.readd and node == existing_node_name:
5290 if existing_node.secondary_ip != secondary_ip:
5291 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5292 " address configuration as before",
5294 if existing_node.primary_ip != primary_ip:
5295 self.changed_primary_ip = True
5299 if (existing_node.primary_ip == primary_ip or
5300 existing_node.secondary_ip == primary_ip or
5301 existing_node.primary_ip == secondary_ip or
5302 existing_node.secondary_ip == secondary_ip):
5303 raise errors.OpPrereqError("New node ip address(es) conflict with"
5304 " existing node %s" % existing_node.name,
5305 errors.ECODE_NOTUNIQUE)
5307 # After this 'if' block, None is no longer a valid value for the
5308 # _capable op attributes
5310 old_node = self.cfg.GetNodeInfo(node)
5311 assert old_node is not None, "Can't retrieve locked node %s" % node
5312 for attr in self._NFLAGS:
5313 if getattr(self.op, attr) is None:
5314 setattr(self.op, attr, getattr(old_node, attr))
5316 for attr in self._NFLAGS:
5317 if getattr(self.op, attr) is None:
5318 setattr(self.op, attr, True)
5320 if self.op.readd and not self.op.vm_capable:
5321 pri, sec = cfg.GetNodeInstances(node)
5323 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5324 " flag set to false, but it already holds"
5325 " instances" % node,
5328 # check that the type of the node (single versus dual homed) is the
5329 # same as for the master
5330 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5331 master_singlehomed = myself.secondary_ip == myself.primary_ip
5332 newbie_singlehomed = secondary_ip == primary_ip
5333 if master_singlehomed != newbie_singlehomed:
5334 if master_singlehomed:
5335 raise errors.OpPrereqError("The master has no secondary ip but the"
5336 " new node has one",
5339 raise errors.OpPrereqError("The master has a secondary ip but the"
5340 " new node doesn't have one",
5343 # checks reachability
5344 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5345 raise errors.OpPrereqError("Node not reachable by ping",
5346 errors.ECODE_ENVIRON)
5348 if not newbie_singlehomed:
5349 # check reachability from my secondary ip to newbie's secondary ip
5350 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5351 source=myself.secondary_ip):
5352 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5353 " based ping to node daemon port",
5354 errors.ECODE_ENVIRON)
5361 if self.op.master_capable:
5362 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5364 self.master_candidate = False
5367 self.new_node = old_node
5369 node_group = cfg.LookupNodeGroup(self.op.group)
5370 self.new_node = objects.Node(name=node,
5371 primary_ip=primary_ip,
5372 secondary_ip=secondary_ip,
5373 master_candidate=self.master_candidate,
5374 offline=False, drained=False,
5377 if self.op.ndparams:
5378 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5380 if self.op.hv_state:
5381 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5383 if self.op.disk_state:
5384 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5386 def Exec(self, feedback_fn):
5387 """Adds the new node to the cluster.
5390 new_node = self.new_node
5391 node = new_node.name
5393 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5396 # We adding a new node so we assume it's powered
5397 new_node.powered = True
5399 # for re-adds, reset the offline/drained/master-candidate flags;
5400 # we need to reset here, otherwise offline would prevent RPC calls
5401 # later in the procedure; this also means that if the re-add
5402 # fails, we are left with a non-offlined, broken node
5404 new_node.drained = new_node.offline = False # pylint: disable=W0201
5405 self.LogInfo("Readding a node, the offline/drained flags were reset")
5406 # if we demote the node, we do cleanup later in the procedure
5407 new_node.master_candidate = self.master_candidate
5408 if self.changed_primary_ip:
5409 new_node.primary_ip = self.op.primary_ip
5411 # copy the master/vm_capable flags
5412 for attr in self._NFLAGS:
5413 setattr(new_node, attr, getattr(self.op, attr))
5415 # notify the user about any possible mc promotion
5416 if new_node.master_candidate:
5417 self.LogInfo("Node will be a master candidate")
5419 if self.op.ndparams:
5420 new_node.ndparams = self.op.ndparams
5422 new_node.ndparams = {}
5424 if self.op.hv_state:
5425 new_node.hv_state_static = self.new_hv_state
5427 if self.op.disk_state:
5428 new_node.disk_state_static = self.new_disk_state
5430 # check connectivity
5431 result = self.rpc.call_version([node])[node]
5432 result.Raise("Can't get version information from node %s" % node)
5433 if constants.PROTOCOL_VERSION == result.payload:
5434 logging.info("Communication to node %s fine, sw version %s match",
5435 node, result.payload)
5437 raise errors.OpExecError("Version mismatch master version %s,"
5438 " node version %s" %
5439 (constants.PROTOCOL_VERSION, result.payload))
5441 # Add node to our /etc/hosts, and add key to known_hosts
5442 if self.cfg.GetClusterInfo().modify_etc_hosts:
5443 master_node = self.cfg.GetMasterNode()
5444 result = self.rpc.call_etc_hosts_modify(master_node,
5445 constants.ETC_HOSTS_ADD,
5448 result.Raise("Can't update hosts file with new host data")
5450 if new_node.secondary_ip != new_node.primary_ip:
5451 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5454 node_verify_list = [self.cfg.GetMasterNode()]
5455 node_verify_param = {
5456 constants.NV_NODELIST: ([node], {}),
5457 # TODO: do a node-net-test as well?
5460 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5461 self.cfg.GetClusterName())
5462 for verifier in node_verify_list:
5463 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5464 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5466 for failed in nl_payload:
5467 feedback_fn("ssh/hostname verification failed"
5468 " (checking from %s): %s" %
5469 (verifier, nl_payload[failed]))
5470 raise errors.OpExecError("ssh/hostname verification failed")
5473 _RedistributeAncillaryFiles(self)
5474 self.context.ReaddNode(new_node)
5475 # make sure we redistribute the config
5476 self.cfg.Update(new_node, feedback_fn)
5477 # and make sure the new node will not have old files around
5478 if not new_node.master_candidate:
5479 result = self.rpc.call_node_demote_from_mc(new_node.name)
5480 msg = result.fail_msg
5482 self.LogWarning("Node failed to demote itself from master"
5483 " candidate status: %s" % msg)
5485 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5486 additional_vm=self.op.vm_capable)
5487 self.context.AddNode(new_node, self.proc.GetECId())
5490 class LUNodeSetParams(LogicalUnit):
5491 """Modifies the parameters of a node.
5493 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5494 to the node role (as _ROLE_*)
5495 @cvar _R2F: a dictionary from node role to tuples of flags
5496 @cvar _FLAGS: a list of attribute names corresponding to the flags
5499 HPATH = "node-modify"
5500 HTYPE = constants.HTYPE_NODE
5502 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5504 (True, False, False): _ROLE_CANDIDATE,
5505 (False, True, False): _ROLE_DRAINED,
5506 (False, False, True): _ROLE_OFFLINE,
5507 (False, False, False): _ROLE_REGULAR,
5509 _R2F = dict((v, k) for k, v in _F2R.items())
5510 _FLAGS = ["master_candidate", "drained", "offline"]
5512 def CheckArguments(self):
5513 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5514 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5515 self.op.master_capable, self.op.vm_capable,
5516 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5518 if all_mods.count(None) == len(all_mods):
5519 raise errors.OpPrereqError("Please pass at least one modification",
5521 if all_mods.count(True) > 1:
5522 raise errors.OpPrereqError("Can't set the node into more than one"
5523 " state at the same time",
5526 # Boolean value that tells us whether we might be demoting from MC
5527 self.might_demote = (self.op.master_candidate == False or
5528 self.op.offline == True or
5529 self.op.drained == True or
5530 self.op.master_capable == False)
5532 if self.op.secondary_ip:
5533 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5534 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5535 " address" % self.op.secondary_ip,
5538 self.lock_all = self.op.auto_promote and self.might_demote
5539 self.lock_instances = self.op.secondary_ip is not None
5541 def _InstanceFilter(self, instance):
5542 """Filter for getting affected instances.
5545 return (instance.disk_template in constants.DTS_INT_MIRROR and
5546 self.op.node_name in instance.all_nodes)
5548 def ExpandNames(self):
5550 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5552 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5554 # Since modifying a node can have severe effects on currently running
5555 # operations the resource lock is at least acquired in shared mode
5556 self.needed_locks[locking.LEVEL_NODE_RES] = \
5557 self.needed_locks[locking.LEVEL_NODE]
5559 # Get node resource and instance locks in shared mode; they are not used
5560 # for anything but read-only access
5561 self.share_locks[locking.LEVEL_NODE_RES] = 1
5562 self.share_locks[locking.LEVEL_INSTANCE] = 1
5564 if self.lock_instances:
5565 self.needed_locks[locking.LEVEL_INSTANCE] = \
5566 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5568 def BuildHooksEnv(self):
5571 This runs on the master node.
5575 "OP_TARGET": self.op.node_name,
5576 "MASTER_CANDIDATE": str(self.op.master_candidate),
5577 "OFFLINE": str(self.op.offline),
5578 "DRAINED": str(self.op.drained),
5579 "MASTER_CAPABLE": str(self.op.master_capable),
5580 "VM_CAPABLE": str(self.op.vm_capable),
5583 def BuildHooksNodes(self):
5584 """Build hooks nodes.
5587 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5590 def CheckPrereq(self):
5591 """Check prerequisites.
5593 This only checks the instance list against the existing names.
5596 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5598 if self.lock_instances:
5599 affected_instances = \
5600 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5602 # Verify instance locks
5603 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5604 wanted_instances = frozenset(affected_instances.keys())
5605 if wanted_instances - owned_instances:
5606 raise errors.OpPrereqError("Instances affected by changing node %s's"
5607 " secondary IP address have changed since"
5608 " locks were acquired, wanted '%s', have"
5609 " '%s'; retry the operation" %
5611 utils.CommaJoin(wanted_instances),
5612 utils.CommaJoin(owned_instances)),
5615 affected_instances = None
5617 if (self.op.master_candidate is not None or
5618 self.op.drained is not None or
5619 self.op.offline is not None):
5620 # we can't change the master's node flags
5621 if self.op.node_name == self.cfg.GetMasterNode():
5622 raise errors.OpPrereqError("The master role can be changed"
5623 " only via master-failover",
5626 if self.op.master_candidate and not node.master_capable:
5627 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5628 " it a master candidate" % node.name,
5631 if self.op.vm_capable == False:
5632 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5634 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5635 " the vm_capable flag" % node.name,
5638 if node.master_candidate and self.might_demote and not self.lock_all:
5639 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5640 # check if after removing the current node, we're missing master
5642 (mc_remaining, mc_should, _) = \
5643 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5644 if mc_remaining < mc_should:
5645 raise errors.OpPrereqError("Not enough master candidates, please"
5646 " pass auto promote option to allow"
5647 " promotion", errors.ECODE_STATE)
5649 self.old_flags = old_flags = (node.master_candidate,
5650 node.drained, node.offline)
5651 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5652 self.old_role = old_role = self._F2R[old_flags]
5654 # Check for ineffective changes
5655 for attr in self._FLAGS:
5656 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5657 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5658 setattr(self.op, attr, None)
5660 # Past this point, any flag change to False means a transition
5661 # away from the respective state, as only real changes are kept
5663 # TODO: We might query the real power state if it supports OOB
5664 if _SupportsOob(self.cfg, node):
5665 if self.op.offline is False and not (node.powered or
5666 self.op.powered == True):
5667 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5668 " offline status can be reset") %
5670 elif self.op.powered is not None:
5671 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5672 " as it does not support out-of-band"
5673 " handling") % self.op.node_name)
5675 # If we're being deofflined/drained, we'll MC ourself if needed
5676 if (self.op.drained == False or self.op.offline == False or
5677 (self.op.master_capable and not node.master_capable)):
5678 if _DecideSelfPromotion(self):
5679 self.op.master_candidate = True
5680 self.LogInfo("Auto-promoting node to master candidate")
5682 # If we're no longer master capable, we'll demote ourselves from MC
5683 if self.op.master_capable == False and node.master_candidate:
5684 self.LogInfo("Demoting from master candidate")
5685 self.op.master_candidate = False
5688 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5689 if self.op.master_candidate:
5690 new_role = self._ROLE_CANDIDATE
5691 elif self.op.drained:
5692 new_role = self._ROLE_DRAINED
5693 elif self.op.offline:
5694 new_role = self._ROLE_OFFLINE
5695 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5696 # False is still in new flags, which means we're un-setting (the
5698 new_role = self._ROLE_REGULAR
5699 else: # no new flags, nothing, keep old role
5702 self.new_role = new_role
5704 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5705 # Trying to transition out of offline status
5706 # TODO: Use standard RPC runner, but make sure it works when the node is
5707 # still marked offline
5708 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5710 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5711 " to report its version: %s" %
5712 (node.name, result.fail_msg),
5715 self.LogWarning("Transitioning node from offline to online state"
5716 " without using re-add. Please make sure the node"
5719 if self.op.secondary_ip:
5720 # Ok even without locking, because this can't be changed by any LU
5721 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5722 master_singlehomed = master.secondary_ip == master.primary_ip
5723 if master_singlehomed and self.op.secondary_ip:
5724 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5725 " homed cluster", errors.ECODE_INVAL)
5727 assert not (frozenset(affected_instances) -
5728 self.owned_locks(locking.LEVEL_INSTANCE))
5731 if affected_instances:
5732 raise errors.OpPrereqError("Cannot change secondary IP address:"
5733 " offline node has instances (%s)"
5734 " configured to use it" %
5735 utils.CommaJoin(affected_instances.keys()))
5737 # On online nodes, check that no instances are running, and that
5738 # the node has the new ip and we can reach it.
5739 for instance in affected_instances.values():
5740 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5741 msg="cannot change secondary ip")
5743 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5744 if master.name != node.name:
5745 # check reachability from master secondary ip to new secondary ip
5746 if not netutils.TcpPing(self.op.secondary_ip,
5747 constants.DEFAULT_NODED_PORT,
5748 source=master.secondary_ip):
5749 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5750 " based ping to node daemon port",
5751 errors.ECODE_ENVIRON)
5753 if self.op.ndparams:
5754 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5755 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5756 self.new_ndparams = new_ndparams
5758 if self.op.hv_state:
5759 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5760 self.node.hv_state_static)
5762 if self.op.disk_state:
5763 self.new_disk_state = \
5764 _MergeAndVerifyDiskState(self.op.disk_state,
5765 self.node.disk_state_static)
5767 def Exec(self, feedback_fn):
5772 old_role = self.old_role
5773 new_role = self.new_role
5777 if self.op.ndparams:
5778 node.ndparams = self.new_ndparams
5780 if self.op.powered is not None:
5781 node.powered = self.op.powered
5783 if self.op.hv_state:
5784 node.hv_state_static = self.new_hv_state
5786 if self.op.disk_state:
5787 node.disk_state_static = self.new_disk_state
5789 for attr in ["master_capable", "vm_capable"]:
5790 val = getattr(self.op, attr)
5792 setattr(node, attr, val)
5793 result.append((attr, str(val)))
5795 if new_role != old_role:
5796 # Tell the node to demote itself, if no longer MC and not offline
5797 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5798 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5800 self.LogWarning("Node failed to demote itself: %s", msg)
5802 new_flags = self._R2F[new_role]
5803 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5805 result.append((desc, str(nf)))
5806 (node.master_candidate, node.drained, node.offline) = new_flags
5808 # we locked all nodes, we adjust the CP before updating this node
5810 _AdjustCandidatePool(self, [node.name])
5812 if self.op.secondary_ip:
5813 node.secondary_ip = self.op.secondary_ip
5814 result.append(("secondary_ip", self.op.secondary_ip))
5816 # this will trigger configuration file update, if needed
5817 self.cfg.Update(node, feedback_fn)
5819 # this will trigger job queue propagation or cleanup if the mc
5821 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5822 self.context.ReaddNode(node)
5827 class LUNodePowercycle(NoHooksLU):
5828 """Powercycles a node.
5833 def CheckArguments(self):
5834 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5835 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5836 raise errors.OpPrereqError("The node is the master and the force"
5837 " parameter was not set",
5840 def ExpandNames(self):
5841 """Locking for PowercycleNode.
5843 This is a last-resort option and shouldn't block on other
5844 jobs. Therefore, we grab no locks.
5847 self.needed_locks = {}
5849 def Exec(self, feedback_fn):
5853 result = self.rpc.call_node_powercycle(self.op.node_name,
5854 self.cfg.GetHypervisorType())
5855 result.Raise("Failed to schedule the reboot")
5856 return result.payload
5859 class LUClusterQuery(NoHooksLU):
5860 """Query cluster configuration.
5865 def ExpandNames(self):
5866 self.needed_locks = {}
5868 def Exec(self, feedback_fn):
5869 """Return cluster config.
5872 cluster = self.cfg.GetClusterInfo()
5875 # Filter just for enabled hypervisors
5876 for os_name, hv_dict in cluster.os_hvp.items():
5877 os_hvp[os_name] = {}
5878 for hv_name, hv_params in hv_dict.items():
5879 if hv_name in cluster.enabled_hypervisors:
5880 os_hvp[os_name][hv_name] = hv_params
5882 # Convert ip_family to ip_version
5883 primary_ip_version = constants.IP4_VERSION
5884 if cluster.primary_ip_family == netutils.IP6Address.family:
5885 primary_ip_version = constants.IP6_VERSION
5888 "software_version": constants.RELEASE_VERSION,
5889 "protocol_version": constants.PROTOCOL_VERSION,
5890 "config_version": constants.CONFIG_VERSION,
5891 "os_api_version": max(constants.OS_API_VERSIONS),
5892 "export_version": constants.EXPORT_VERSION,
5893 "architecture": (platform.architecture()[0], platform.machine()),
5894 "name": cluster.cluster_name,
5895 "master": cluster.master_node,
5896 "default_hypervisor": cluster.primary_hypervisor,
5897 "enabled_hypervisors": cluster.enabled_hypervisors,
5898 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5899 for hypervisor_name in cluster.enabled_hypervisors]),
5901 "beparams": cluster.beparams,
5902 "osparams": cluster.osparams,
5903 "ipolicy": cluster.ipolicy,
5904 "nicparams": cluster.nicparams,
5905 "ndparams": cluster.ndparams,
5906 "candidate_pool_size": cluster.candidate_pool_size,
5907 "master_netdev": cluster.master_netdev,
5908 "master_netmask": cluster.master_netmask,
5909 "use_external_mip_script": cluster.use_external_mip_script,
5910 "volume_group_name": cluster.volume_group_name,
5911 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5912 "file_storage_dir": cluster.file_storage_dir,
5913 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5914 "maintain_node_health": cluster.maintain_node_health,
5915 "ctime": cluster.ctime,
5916 "mtime": cluster.mtime,
5917 "uuid": cluster.uuid,
5918 "tags": list(cluster.GetTags()),
5919 "uid_pool": cluster.uid_pool,
5920 "default_iallocator": cluster.default_iallocator,
5921 "reserved_lvs": cluster.reserved_lvs,
5922 "primary_ip_version": primary_ip_version,
5923 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5924 "hidden_os": cluster.hidden_os,
5925 "blacklisted_os": cluster.blacklisted_os,
5931 class LUClusterConfigQuery(NoHooksLU):
5932 """Return configuration values.
5936 _FIELDS_DYNAMIC = utils.FieldSet()
5937 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5938 "watcher_pause", "volume_group_name")
5940 def CheckArguments(self):
5941 _CheckOutputFields(static=self._FIELDS_STATIC,
5942 dynamic=self._FIELDS_DYNAMIC,
5943 selected=self.op.output_fields)
5945 def ExpandNames(self):
5946 self.needed_locks = {}
5948 def Exec(self, feedback_fn):
5949 """Dump a representation of the cluster config to the standard output.
5953 for field in self.op.output_fields:
5954 if field == "cluster_name":
5955 entry = self.cfg.GetClusterName()
5956 elif field == "master_node":
5957 entry = self.cfg.GetMasterNode()
5958 elif field == "drain_flag":
5959 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5960 elif field == "watcher_pause":
5961 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5962 elif field == "volume_group_name":
5963 entry = self.cfg.GetVGName()
5965 raise errors.ParameterError(field)
5966 values.append(entry)
5970 class LUInstanceActivateDisks(NoHooksLU):
5971 """Bring up an instance's disks.
5976 def ExpandNames(self):
5977 self._ExpandAndLockInstance()
5978 self.needed_locks[locking.LEVEL_NODE] = []
5979 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5981 def DeclareLocks(self, level):
5982 if level == locking.LEVEL_NODE:
5983 self._LockInstancesNodes()
5985 def CheckPrereq(self):
5986 """Check prerequisites.
5988 This checks that the instance is in the cluster.
5991 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5992 assert self.instance is not None, \
5993 "Cannot retrieve locked instance %s" % self.op.instance_name
5994 _CheckNodeOnline(self, self.instance.primary_node)
5996 def Exec(self, feedback_fn):
5997 """Activate the disks.
6000 disks_ok, disks_info = \
6001 _AssembleInstanceDisks(self, self.instance,
6002 ignore_size=self.op.ignore_size)
6004 raise errors.OpExecError("Cannot activate block devices")
6009 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6011 """Prepare the block devices for an instance.
6013 This sets up the block devices on all nodes.
6015 @type lu: L{LogicalUnit}
6016 @param lu: the logical unit on whose behalf we execute
6017 @type instance: L{objects.Instance}
6018 @param instance: the instance for whose disks we assemble
6019 @type disks: list of L{objects.Disk} or None
6020 @param disks: which disks to assemble (or all, if None)
6021 @type ignore_secondaries: boolean
6022 @param ignore_secondaries: if true, errors on secondary nodes
6023 won't result in an error return from the function
6024 @type ignore_size: boolean
6025 @param ignore_size: if true, the current known size of the disk
6026 will not be used during the disk activation, useful for cases
6027 when the size is wrong
6028 @return: False if the operation failed, otherwise a list of
6029 (host, instance_visible_name, node_visible_name)
6030 with the mapping from node devices to instance devices
6035 iname = instance.name
6036 disks = _ExpandCheckDisks(instance, disks)
6038 # With the two passes mechanism we try to reduce the window of
6039 # opportunity for the race condition of switching DRBD to primary
6040 # before handshaking occured, but we do not eliminate it
6042 # The proper fix would be to wait (with some limits) until the
6043 # connection has been made and drbd transitions from WFConnection
6044 # into any other network-connected state (Connected, SyncTarget,
6047 # 1st pass, assemble on all nodes in secondary mode
6048 for idx, inst_disk in enumerate(disks):
6049 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6051 node_disk = node_disk.Copy()
6052 node_disk.UnsetSize()
6053 lu.cfg.SetDiskID(node_disk, node)
6054 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6055 msg = result.fail_msg
6057 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6058 " (is_primary=False, pass=1): %s",
6059 inst_disk.iv_name, node, msg)
6060 if not ignore_secondaries:
6063 # FIXME: race condition on drbd migration to primary
6065 # 2nd pass, do only the primary node
6066 for idx, inst_disk in enumerate(disks):
6069 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6070 if node != instance.primary_node:
6073 node_disk = node_disk.Copy()
6074 node_disk.UnsetSize()
6075 lu.cfg.SetDiskID(node_disk, node)
6076 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6077 msg = result.fail_msg
6079 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6080 " (is_primary=True, pass=2): %s",
6081 inst_disk.iv_name, node, msg)
6084 dev_path = result.payload
6086 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6088 # leave the disks configured for the primary node
6089 # this is a workaround that would be fixed better by
6090 # improving the logical/physical id handling
6092 lu.cfg.SetDiskID(disk, instance.primary_node)
6094 return disks_ok, device_info
6097 def _StartInstanceDisks(lu, instance, force):
6098 """Start the disks of an instance.
6101 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6102 ignore_secondaries=force)
6104 _ShutdownInstanceDisks(lu, instance)
6105 if force is not None and not force:
6106 lu.proc.LogWarning("", hint="If the message above refers to a"
6108 " you can retry the operation using '--force'.")
6109 raise errors.OpExecError("Disk consistency error")
6112 class LUInstanceDeactivateDisks(NoHooksLU):
6113 """Shutdown an instance's disks.
6118 def ExpandNames(self):
6119 self._ExpandAndLockInstance()
6120 self.needed_locks[locking.LEVEL_NODE] = []
6121 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6123 def DeclareLocks(self, level):
6124 if level == locking.LEVEL_NODE:
6125 self._LockInstancesNodes()
6127 def CheckPrereq(self):
6128 """Check prerequisites.
6130 This checks that the instance is in the cluster.
6133 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6134 assert self.instance is not None, \
6135 "Cannot retrieve locked instance %s" % self.op.instance_name
6137 def Exec(self, feedback_fn):
6138 """Deactivate the disks
6141 instance = self.instance
6143 _ShutdownInstanceDisks(self, instance)
6145 _SafeShutdownInstanceDisks(self, instance)
6148 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6149 """Shutdown block devices of an instance.
6151 This function checks if an instance is running, before calling
6152 _ShutdownInstanceDisks.
6155 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6156 _ShutdownInstanceDisks(lu, instance, disks=disks)
6159 def _ExpandCheckDisks(instance, disks):
6160 """Return the instance disks selected by the disks list
6162 @type disks: list of L{objects.Disk} or None
6163 @param disks: selected disks
6164 @rtype: list of L{objects.Disk}
6165 @return: selected instance disks to act on
6169 return instance.disks
6171 if not set(disks).issubset(instance.disks):
6172 raise errors.ProgrammerError("Can only act on disks belonging to the"
6177 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6178 """Shutdown block devices of an instance.
6180 This does the shutdown on all nodes of the instance.
6182 If the ignore_primary is false, errors on the primary node are
6187 disks = _ExpandCheckDisks(instance, disks)
6190 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6191 lu.cfg.SetDiskID(top_disk, node)
6192 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6193 msg = result.fail_msg
6195 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6196 disk.iv_name, node, msg)
6197 if ((node == instance.primary_node and not ignore_primary) or
6198 (node != instance.primary_node and not result.offline)):
6203 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6204 """Checks if a node has enough free memory.
6206 This function check if a given node has the needed amount of free
6207 memory. In case the node has less memory or we cannot get the
6208 information from the node, this function raise an OpPrereqError
6211 @type lu: C{LogicalUnit}
6212 @param lu: a logical unit from which we get configuration data
6214 @param node: the node to check
6215 @type reason: C{str}
6216 @param reason: string to use in the error message
6217 @type requested: C{int}
6218 @param requested: the amount of memory in MiB to check for
6219 @type hypervisor_name: C{str}
6220 @param hypervisor_name: the hypervisor to ask for memory stats
6221 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6222 we cannot check the node
6225 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6226 nodeinfo[node].Raise("Can't get data from node %s" % node,
6227 prereq=True, ecode=errors.ECODE_ENVIRON)
6228 (_, _, (hv_info, )) = nodeinfo[node].payload
6230 free_mem = hv_info.get("memory_free", None)
6231 if not isinstance(free_mem, int):
6232 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6233 " was '%s'" % (node, free_mem),
6234 errors.ECODE_ENVIRON)
6235 if requested > free_mem:
6236 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6237 " needed %s MiB, available %s MiB" %
6238 (node, reason, requested, free_mem),
6242 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6243 """Checks if nodes have enough free disk space in the all VGs.
6245 This function check if all given nodes have the needed amount of
6246 free disk. In case any node has less disk or we cannot get the
6247 information from the node, this function raise an OpPrereqError
6250 @type lu: C{LogicalUnit}
6251 @param lu: a logical unit from which we get configuration data
6252 @type nodenames: C{list}
6253 @param nodenames: the list of node names to check
6254 @type req_sizes: C{dict}
6255 @param req_sizes: the hash of vg and corresponding amount of disk in
6257 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6258 or we cannot check the node
6261 for vg, req_size in req_sizes.items():
6262 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6265 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6266 """Checks if nodes have enough free disk space in the specified VG.
6268 This function check if all given nodes have the needed amount of
6269 free disk. In case any node has less disk or we cannot get the
6270 information from the node, this function raise an OpPrereqError
6273 @type lu: C{LogicalUnit}
6274 @param lu: a logical unit from which we get configuration data
6275 @type nodenames: C{list}
6276 @param nodenames: the list of node names to check
6278 @param vg: the volume group to check
6279 @type requested: C{int}
6280 @param requested: the amount of disk in MiB to check for
6281 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6282 or we cannot check the node
6285 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6286 for node in nodenames:
6287 info = nodeinfo[node]
6288 info.Raise("Cannot get current information from node %s" % node,
6289 prereq=True, ecode=errors.ECODE_ENVIRON)
6290 (_, (vg_info, ), _) = info.payload
6291 vg_free = vg_info.get("vg_free", None)
6292 if not isinstance(vg_free, int):
6293 raise errors.OpPrereqError("Can't compute free disk space on node"
6294 " %s for vg %s, result was '%s'" %
6295 (node, vg, vg_free), errors.ECODE_ENVIRON)
6296 if requested > vg_free:
6297 raise errors.OpPrereqError("Not enough disk space on target node %s"
6298 " vg %s: required %d MiB, available %d MiB" %
6299 (node, vg, requested, vg_free),
6303 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6304 """Checks if nodes have enough physical CPUs
6306 This function checks if all given nodes have the needed number of
6307 physical CPUs. In case any node has less CPUs or we cannot get the
6308 information from the node, this function raises an OpPrereqError
6311 @type lu: C{LogicalUnit}
6312 @param lu: a logical unit from which we get configuration data
6313 @type nodenames: C{list}
6314 @param nodenames: the list of node names to check
6315 @type requested: C{int}
6316 @param requested: the minimum acceptable number of physical CPUs
6317 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6318 or we cannot check the node
6321 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6322 for node in nodenames:
6323 info = nodeinfo[node]
6324 info.Raise("Cannot get current information from node %s" % node,
6325 prereq=True, ecode=errors.ECODE_ENVIRON)
6326 (_, _, (hv_info, )) = info.payload
6327 num_cpus = hv_info.get("cpu_total", None)
6328 if not isinstance(num_cpus, int):
6329 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6330 " on node %s, result was '%s'" %
6331 (node, num_cpus), errors.ECODE_ENVIRON)
6332 if requested > num_cpus:
6333 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6334 "required" % (node, num_cpus, requested),
6338 class LUInstanceStartup(LogicalUnit):
6339 """Starts an instance.
6342 HPATH = "instance-start"
6343 HTYPE = constants.HTYPE_INSTANCE
6346 def CheckArguments(self):
6348 if self.op.beparams:
6349 # fill the beparams dict
6350 objects.UpgradeBeParams(self.op.beparams)
6351 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6353 def ExpandNames(self):
6354 self._ExpandAndLockInstance()
6356 def BuildHooksEnv(self):
6359 This runs on master, primary and secondary nodes of the instance.
6363 "FORCE": self.op.force,
6366 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6370 def BuildHooksNodes(self):
6371 """Build hooks nodes.
6374 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6377 def CheckPrereq(self):
6378 """Check prerequisites.
6380 This checks that the instance is in the cluster.
6383 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6384 assert self.instance is not None, \
6385 "Cannot retrieve locked instance %s" % self.op.instance_name
6388 if self.op.hvparams:
6389 # check hypervisor parameter syntax (locally)
6390 cluster = self.cfg.GetClusterInfo()
6391 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6392 filled_hvp = cluster.FillHV(instance)
6393 filled_hvp.update(self.op.hvparams)
6394 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6395 hv_type.CheckParameterSyntax(filled_hvp)
6396 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6398 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6400 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6402 if self.primary_offline and self.op.ignore_offline_nodes:
6403 self.proc.LogWarning("Ignoring offline primary node")
6405 if self.op.hvparams or self.op.beparams:
6406 self.proc.LogWarning("Overridden parameters are ignored")
6408 _CheckNodeOnline(self, instance.primary_node)
6410 bep = self.cfg.GetClusterInfo().FillBE(instance)
6412 # check bridges existence
6413 _CheckInstanceBridgesExist(self, instance)
6415 remote_info = self.rpc.call_instance_info(instance.primary_node,
6417 instance.hypervisor)
6418 remote_info.Raise("Error checking node %s" % instance.primary_node,
6419 prereq=True, ecode=errors.ECODE_ENVIRON)
6420 if not remote_info.payload: # not running already
6421 _CheckNodeFreeMemory(self, instance.primary_node,
6422 "starting instance %s" % instance.name,
6423 bep[constants.BE_MAXMEM], instance.hypervisor)
6425 def Exec(self, feedback_fn):
6426 """Start the instance.
6429 instance = self.instance
6430 force = self.op.force
6432 if not self.op.no_remember:
6433 self.cfg.MarkInstanceUp(instance.name)
6435 if self.primary_offline:
6436 assert self.op.ignore_offline_nodes
6437 self.proc.LogInfo("Primary node offline, marked instance as started")
6439 node_current = instance.primary_node
6441 _StartInstanceDisks(self, instance, force)
6444 self.rpc.call_instance_start(node_current,
6445 (instance, self.op.hvparams,
6447 self.op.startup_paused)
6448 msg = result.fail_msg
6450 _ShutdownInstanceDisks(self, instance)
6451 raise errors.OpExecError("Could not start instance: %s" % msg)
6454 class LUInstanceReboot(LogicalUnit):
6455 """Reboot an instance.
6458 HPATH = "instance-reboot"
6459 HTYPE = constants.HTYPE_INSTANCE
6462 def ExpandNames(self):
6463 self._ExpandAndLockInstance()
6465 def BuildHooksEnv(self):
6468 This runs on master, primary and secondary nodes of the instance.
6472 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6473 "REBOOT_TYPE": self.op.reboot_type,
6474 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6477 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6481 def BuildHooksNodes(self):
6482 """Build hooks nodes.
6485 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6488 def CheckPrereq(self):
6489 """Check prerequisites.
6491 This checks that the instance is in the cluster.
6494 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6495 assert self.instance is not None, \
6496 "Cannot retrieve locked instance %s" % self.op.instance_name
6497 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6498 _CheckNodeOnline(self, instance.primary_node)
6500 # check bridges existence
6501 _CheckInstanceBridgesExist(self, instance)
6503 def Exec(self, feedback_fn):
6504 """Reboot the instance.
6507 instance = self.instance
6508 ignore_secondaries = self.op.ignore_secondaries
6509 reboot_type = self.op.reboot_type
6511 remote_info = self.rpc.call_instance_info(instance.primary_node,
6513 instance.hypervisor)
6514 remote_info.Raise("Error checking node %s" % instance.primary_node)
6515 instance_running = bool(remote_info.payload)
6517 node_current = instance.primary_node
6519 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6520 constants.INSTANCE_REBOOT_HARD]:
6521 for disk in instance.disks:
6522 self.cfg.SetDiskID(disk, node_current)
6523 result = self.rpc.call_instance_reboot(node_current, instance,
6525 self.op.shutdown_timeout)
6526 result.Raise("Could not reboot instance")
6528 if instance_running:
6529 result = self.rpc.call_instance_shutdown(node_current, instance,
6530 self.op.shutdown_timeout)
6531 result.Raise("Could not shutdown instance for full reboot")
6532 _ShutdownInstanceDisks(self, instance)
6534 self.LogInfo("Instance %s was already stopped, starting now",
6536 _StartInstanceDisks(self, instance, ignore_secondaries)
6537 result = self.rpc.call_instance_start(node_current,
6538 (instance, None, None), False)
6539 msg = result.fail_msg
6541 _ShutdownInstanceDisks(self, instance)
6542 raise errors.OpExecError("Could not start instance for"
6543 " full reboot: %s" % msg)
6545 self.cfg.MarkInstanceUp(instance.name)
6548 class LUInstanceShutdown(LogicalUnit):
6549 """Shutdown an instance.
6552 HPATH = "instance-stop"
6553 HTYPE = constants.HTYPE_INSTANCE
6556 def ExpandNames(self):
6557 self._ExpandAndLockInstance()
6559 def BuildHooksEnv(self):
6562 This runs on master, primary and secondary nodes of the instance.
6565 env = _BuildInstanceHookEnvByObject(self, self.instance)
6566 env["TIMEOUT"] = self.op.timeout
6569 def BuildHooksNodes(self):
6570 """Build hooks nodes.
6573 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6576 def CheckPrereq(self):
6577 """Check prerequisites.
6579 This checks that the instance is in the cluster.
6582 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583 assert self.instance is not None, \
6584 "Cannot retrieve locked instance %s" % self.op.instance_name
6586 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6588 self.primary_offline = \
6589 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6591 if self.primary_offline and self.op.ignore_offline_nodes:
6592 self.proc.LogWarning("Ignoring offline primary node")
6594 _CheckNodeOnline(self, self.instance.primary_node)
6596 def Exec(self, feedback_fn):
6597 """Shutdown the instance.
6600 instance = self.instance
6601 node_current = instance.primary_node
6602 timeout = self.op.timeout
6604 if not self.op.no_remember:
6605 self.cfg.MarkInstanceDown(instance.name)
6607 if self.primary_offline:
6608 assert self.op.ignore_offline_nodes
6609 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6611 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6612 msg = result.fail_msg
6614 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6616 _ShutdownInstanceDisks(self, instance)
6619 class LUInstanceReinstall(LogicalUnit):
6620 """Reinstall an instance.
6623 HPATH = "instance-reinstall"
6624 HTYPE = constants.HTYPE_INSTANCE
6627 def ExpandNames(self):
6628 self._ExpandAndLockInstance()
6630 def BuildHooksEnv(self):
6633 This runs on master, primary and secondary nodes of the instance.
6636 return _BuildInstanceHookEnvByObject(self, self.instance)
6638 def BuildHooksNodes(self):
6639 """Build hooks nodes.
6642 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6645 def CheckPrereq(self):
6646 """Check prerequisites.
6648 This checks that the instance is in the cluster and is not running.
6651 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6652 assert instance is not None, \
6653 "Cannot retrieve locked instance %s" % self.op.instance_name
6654 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6655 " offline, cannot reinstall")
6656 for node in instance.secondary_nodes:
6657 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6658 " cannot reinstall")
6660 if instance.disk_template == constants.DT_DISKLESS:
6661 raise errors.OpPrereqError("Instance '%s' has no disks" %
6662 self.op.instance_name,
6664 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6666 if self.op.os_type is not None:
6668 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6669 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6670 instance_os = self.op.os_type
6672 instance_os = instance.os
6674 nodelist = list(instance.all_nodes)
6676 if self.op.osparams:
6677 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6678 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6679 self.os_inst = i_osdict # the new dict (without defaults)
6683 self.instance = instance
6685 def Exec(self, feedback_fn):
6686 """Reinstall the instance.
6689 inst = self.instance
6691 if self.op.os_type is not None:
6692 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6693 inst.os = self.op.os_type
6694 # Write to configuration
6695 self.cfg.Update(inst, feedback_fn)
6697 _StartInstanceDisks(self, inst, None)
6699 feedback_fn("Running the instance OS create scripts...")
6700 # FIXME: pass debug option from opcode to backend
6701 result = self.rpc.call_instance_os_add(inst.primary_node,
6702 (inst, self.os_inst), True,
6703 self.op.debug_level)
6704 result.Raise("Could not install OS for instance %s on node %s" %
6705 (inst.name, inst.primary_node))
6707 _ShutdownInstanceDisks(self, inst)
6710 class LUInstanceRecreateDisks(LogicalUnit):
6711 """Recreate an instance's missing disks.
6714 HPATH = "instance-recreate-disks"
6715 HTYPE = constants.HTYPE_INSTANCE
6718 def CheckArguments(self):
6719 # normalise the disk list
6720 self.op.disks = sorted(frozenset(self.op.disks))
6722 def ExpandNames(self):
6723 self._ExpandAndLockInstance()
6724 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6726 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6727 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6729 self.needed_locks[locking.LEVEL_NODE] = []
6731 def DeclareLocks(self, level):
6732 if level == locking.LEVEL_NODE:
6733 # if we replace the nodes, we only need to lock the old primary,
6734 # otherwise we need to lock all nodes for disk re-creation
6735 primary_only = bool(self.op.nodes)
6736 self._LockInstancesNodes(primary_only=primary_only)
6737 elif level == locking.LEVEL_NODE_RES:
6739 self.needed_locks[locking.LEVEL_NODE_RES] = \
6740 self.needed_locks[locking.LEVEL_NODE][:]
6742 def BuildHooksEnv(self):
6745 This runs on master, primary and secondary nodes of the instance.
6748 return _BuildInstanceHookEnvByObject(self, self.instance)
6750 def BuildHooksNodes(self):
6751 """Build hooks nodes.
6754 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6757 def CheckPrereq(self):
6758 """Check prerequisites.
6760 This checks that the instance is in the cluster and is not running.
6763 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6764 assert instance is not None, \
6765 "Cannot retrieve locked instance %s" % self.op.instance_name
6767 if len(self.op.nodes) != len(instance.all_nodes):
6768 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6769 " %d replacement nodes were specified" %
6770 (instance.name, len(instance.all_nodes),
6771 len(self.op.nodes)),
6773 assert instance.disk_template != constants.DT_DRBD8 or \
6774 len(self.op.nodes) == 2
6775 assert instance.disk_template != constants.DT_PLAIN or \
6776 len(self.op.nodes) == 1
6777 primary_node = self.op.nodes[0]
6779 primary_node = instance.primary_node
6780 _CheckNodeOnline(self, primary_node)
6782 if instance.disk_template == constants.DT_DISKLESS:
6783 raise errors.OpPrereqError("Instance '%s' has no disks" %
6784 self.op.instance_name, errors.ECODE_INVAL)
6785 # if we replace nodes *and* the old primary is offline, we don't
6787 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6788 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6789 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6790 if not (self.op.nodes and old_pnode.offline):
6791 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6792 msg="cannot recreate disks")
6794 if not self.op.disks:
6795 self.op.disks = range(len(instance.disks))
6797 for idx in self.op.disks:
6798 if idx >= len(instance.disks):
6799 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6801 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6802 raise errors.OpPrereqError("Can't recreate disks partially and"
6803 " change the nodes at the same time",
6805 self.instance = instance
6807 def Exec(self, feedback_fn):
6808 """Recreate the disks.
6811 instance = self.instance
6813 assert (self.owned_locks(locking.LEVEL_NODE) ==
6814 self.owned_locks(locking.LEVEL_NODE_RES))
6817 mods = [] # keeps track of needed logical_id changes
6819 for idx, disk in enumerate(instance.disks):
6820 if idx not in self.op.disks: # disk idx has not been passed in
6823 # update secondaries for disks, if needed
6825 if disk.dev_type == constants.LD_DRBD8:
6826 # need to update the nodes and minors
6827 assert len(self.op.nodes) == 2
6828 assert len(disk.logical_id) == 6 # otherwise disk internals
6830 (_, _, old_port, _, _, old_secret) = disk.logical_id
6831 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6832 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6833 new_minors[0], new_minors[1], old_secret)
6834 assert len(disk.logical_id) == len(new_id)
6835 mods.append((idx, new_id))
6837 # now that we have passed all asserts above, we can apply the mods
6838 # in a single run (to avoid partial changes)
6839 for idx, new_id in mods:
6840 instance.disks[idx].logical_id = new_id
6842 # change primary node, if needed
6844 instance.primary_node = self.op.nodes[0]
6845 self.LogWarning("Changing the instance's nodes, you will have to"
6846 " remove any disks left on the older nodes manually")
6849 self.cfg.Update(instance, feedback_fn)
6851 _CreateDisks(self, instance, to_skip=to_skip)
6854 class LUInstanceRename(LogicalUnit):
6855 """Rename an instance.
6858 HPATH = "instance-rename"
6859 HTYPE = constants.HTYPE_INSTANCE
6861 def CheckArguments(self):
6865 if self.op.ip_check and not self.op.name_check:
6866 # TODO: make the ip check more flexible and not depend on the name check
6867 raise errors.OpPrereqError("IP address check requires a name check",
6870 def BuildHooksEnv(self):
6873 This runs on master, primary and secondary nodes of the instance.
6876 env = _BuildInstanceHookEnvByObject(self, self.instance)
6877 env["INSTANCE_NEW_NAME"] = self.op.new_name
6880 def BuildHooksNodes(self):
6881 """Build hooks nodes.
6884 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6887 def CheckPrereq(self):
6888 """Check prerequisites.
6890 This checks that the instance is in the cluster and is not running.
6893 self.op.instance_name = _ExpandInstanceName(self.cfg,
6894 self.op.instance_name)
6895 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6896 assert instance is not None
6897 _CheckNodeOnline(self, instance.primary_node)
6898 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6899 msg="cannot rename")
6900 self.instance = instance
6902 new_name = self.op.new_name
6903 if self.op.name_check:
6904 hostname = netutils.GetHostname(name=new_name)
6905 if hostname.name != new_name:
6906 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6908 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6909 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6910 " same as given hostname '%s'") %
6911 (hostname.name, self.op.new_name),
6913 new_name = self.op.new_name = hostname.name
6914 if (self.op.ip_check and
6915 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6916 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6917 (hostname.ip, new_name),
6918 errors.ECODE_NOTUNIQUE)
6920 instance_list = self.cfg.GetInstanceList()
6921 if new_name in instance_list and new_name != instance.name:
6922 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6923 new_name, errors.ECODE_EXISTS)
6925 def Exec(self, feedback_fn):
6926 """Rename the instance.
6929 inst = self.instance
6930 old_name = inst.name
6932 rename_file_storage = False
6933 if (inst.disk_template in constants.DTS_FILEBASED and
6934 self.op.new_name != inst.name):
6935 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6936 rename_file_storage = True
6938 self.cfg.RenameInstance(inst.name, self.op.new_name)
6939 # Change the instance lock. This is definitely safe while we hold the BGL.
6940 # Otherwise the new lock would have to be added in acquired mode.
6942 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6943 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6945 # re-read the instance from the configuration after rename
6946 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6948 if rename_file_storage:
6949 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6950 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6951 old_file_storage_dir,
6952 new_file_storage_dir)
6953 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6954 " (but the instance has been renamed in Ganeti)" %
6955 (inst.primary_node, old_file_storage_dir,
6956 new_file_storage_dir))
6958 _StartInstanceDisks(self, inst, None)
6960 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6961 old_name, self.op.debug_level)
6962 msg = result.fail_msg
6964 msg = ("Could not run OS rename script for instance %s on node %s"
6965 " (but the instance has been renamed in Ganeti): %s" %
6966 (inst.name, inst.primary_node, msg))
6967 self.proc.LogWarning(msg)
6969 _ShutdownInstanceDisks(self, inst)
6974 class LUInstanceRemove(LogicalUnit):
6975 """Remove an instance.
6978 HPATH = "instance-remove"
6979 HTYPE = constants.HTYPE_INSTANCE
6982 def ExpandNames(self):
6983 self._ExpandAndLockInstance()
6984 self.needed_locks[locking.LEVEL_NODE] = []
6985 self.needed_locks[locking.LEVEL_NODE_RES] = []
6986 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6988 def DeclareLocks(self, level):
6989 if level == locking.LEVEL_NODE:
6990 self._LockInstancesNodes()
6991 elif level == locking.LEVEL_NODE_RES:
6993 self.needed_locks[locking.LEVEL_NODE_RES] = \
6994 self.needed_locks[locking.LEVEL_NODE][:]
6996 def BuildHooksEnv(self):
6999 This runs on master, primary and secondary nodes of the instance.
7002 env = _BuildInstanceHookEnvByObject(self, self.instance)
7003 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7006 def BuildHooksNodes(self):
7007 """Build hooks nodes.
7010 nl = [self.cfg.GetMasterNode()]
7011 nl_post = list(self.instance.all_nodes) + nl
7012 return (nl, nl_post)
7014 def CheckPrereq(self):
7015 """Check prerequisites.
7017 This checks that the instance is in the cluster.
7020 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7021 assert self.instance is not None, \
7022 "Cannot retrieve locked instance %s" % self.op.instance_name
7024 def Exec(self, feedback_fn):
7025 """Remove the instance.
7028 instance = self.instance
7029 logging.info("Shutting down instance %s on node %s",
7030 instance.name, instance.primary_node)
7032 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7033 self.op.shutdown_timeout)
7034 msg = result.fail_msg
7036 if self.op.ignore_failures:
7037 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7039 raise errors.OpExecError("Could not shutdown instance %s on"
7041 (instance.name, instance.primary_node, msg))
7043 assert (self.owned_locks(locking.LEVEL_NODE) ==
7044 self.owned_locks(locking.LEVEL_NODE_RES))
7045 assert not (set(instance.all_nodes) -
7046 self.owned_locks(locking.LEVEL_NODE)), \
7047 "Not owning correct locks"
7049 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7052 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7053 """Utility function to remove an instance.
7056 logging.info("Removing block devices for instance %s", instance.name)
7058 if not _RemoveDisks(lu, instance):
7059 if not ignore_failures:
7060 raise errors.OpExecError("Can't remove instance's disks")
7061 feedback_fn("Warning: can't remove instance's disks")
7063 logging.info("Removing instance %s out of cluster config", instance.name)
7065 lu.cfg.RemoveInstance(instance.name)
7067 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7068 "Instance lock removal conflict"
7070 # Remove lock for the instance
7071 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7074 class LUInstanceQuery(NoHooksLU):
7075 """Logical unit for querying instances.
7078 # pylint: disable=W0142
7081 def CheckArguments(self):
7082 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7083 self.op.output_fields, self.op.use_locking)
7085 def ExpandNames(self):
7086 self.iq.ExpandNames(self)
7088 def DeclareLocks(self, level):
7089 self.iq.DeclareLocks(self, level)
7091 def Exec(self, feedback_fn):
7092 return self.iq.OldStyleQuery(self)
7095 class LUInstanceFailover(LogicalUnit):
7096 """Failover an instance.
7099 HPATH = "instance-failover"
7100 HTYPE = constants.HTYPE_INSTANCE
7103 def CheckArguments(self):
7104 """Check the arguments.
7107 self.iallocator = getattr(self.op, "iallocator", None)
7108 self.target_node = getattr(self.op, "target_node", None)
7110 def ExpandNames(self):
7111 self._ExpandAndLockInstance()
7113 if self.op.target_node is not None:
7114 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7116 self.needed_locks[locking.LEVEL_NODE] = []
7117 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7119 ignore_consistency = self.op.ignore_consistency
7120 shutdown_timeout = self.op.shutdown_timeout
7121 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7124 ignore_consistency=ignore_consistency,
7125 shutdown_timeout=shutdown_timeout)
7126 self.tasklets = [self._migrater]
7128 def DeclareLocks(self, level):
7129 if level == locking.LEVEL_NODE:
7130 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7131 if instance.disk_template in constants.DTS_EXT_MIRROR:
7132 if self.op.target_node is None:
7133 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7135 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7136 self.op.target_node]
7137 del self.recalculate_locks[locking.LEVEL_NODE]
7139 self._LockInstancesNodes()
7141 def BuildHooksEnv(self):
7144 This runs on master, primary and secondary nodes of the instance.
7147 instance = self._migrater.instance
7148 source_node = instance.primary_node
7149 target_node = self.op.target_node
7151 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7152 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7153 "OLD_PRIMARY": source_node,
7154 "NEW_PRIMARY": target_node,
7157 if instance.disk_template in constants.DTS_INT_MIRROR:
7158 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7159 env["NEW_SECONDARY"] = source_node
7161 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7163 env.update(_BuildInstanceHookEnvByObject(self, instance))
7167 def BuildHooksNodes(self):
7168 """Build hooks nodes.
7171 instance = self._migrater.instance
7172 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7173 return (nl, nl + [instance.primary_node])
7176 class LUInstanceMigrate(LogicalUnit):
7177 """Migrate an instance.
7179 This is migration without shutting down, compared to the failover,
7180 which is done with shutdown.
7183 HPATH = "instance-migrate"
7184 HTYPE = constants.HTYPE_INSTANCE
7187 def ExpandNames(self):
7188 self._ExpandAndLockInstance()
7190 if self.op.target_node is not None:
7191 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7193 self.needed_locks[locking.LEVEL_NODE] = []
7194 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7196 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7197 cleanup=self.op.cleanup,
7199 fallback=self.op.allow_failover)
7200 self.tasklets = [self._migrater]
7202 def DeclareLocks(self, level):
7203 if level == locking.LEVEL_NODE:
7204 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7205 if instance.disk_template in constants.DTS_EXT_MIRROR:
7206 if self.op.target_node is None:
7207 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7209 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7210 self.op.target_node]
7211 del self.recalculate_locks[locking.LEVEL_NODE]
7213 self._LockInstancesNodes()
7215 def BuildHooksEnv(self):
7218 This runs on master, primary and secondary nodes of the instance.
7221 instance = self._migrater.instance
7222 source_node = instance.primary_node
7223 target_node = self.op.target_node
7224 env = _BuildInstanceHookEnvByObject(self, instance)
7226 "MIGRATE_LIVE": self._migrater.live,
7227 "MIGRATE_CLEANUP": self.op.cleanup,
7228 "OLD_PRIMARY": source_node,
7229 "NEW_PRIMARY": target_node,
7232 if instance.disk_template in constants.DTS_INT_MIRROR:
7233 env["OLD_SECONDARY"] = target_node
7234 env["NEW_SECONDARY"] = source_node
7236 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7240 def BuildHooksNodes(self):
7241 """Build hooks nodes.
7244 instance = self._migrater.instance
7245 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7246 return (nl, nl + [instance.primary_node])
7249 class LUInstanceMove(LogicalUnit):
7250 """Move an instance by data-copying.
7253 HPATH = "instance-move"
7254 HTYPE = constants.HTYPE_INSTANCE
7257 def ExpandNames(self):
7258 self._ExpandAndLockInstance()
7259 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7260 self.op.target_node = target_node
7261 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7262 self.needed_locks[locking.LEVEL_NODE_RES] = []
7263 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7265 def DeclareLocks(self, level):
7266 if level == locking.LEVEL_NODE:
7267 self._LockInstancesNodes(primary_only=True)
7268 elif level == locking.LEVEL_NODE_RES:
7270 self.needed_locks[locking.LEVEL_NODE_RES] = \
7271 self.needed_locks[locking.LEVEL_NODE][:]
7273 def BuildHooksEnv(self):
7276 This runs on master, primary and secondary nodes of the instance.
7280 "TARGET_NODE": self.op.target_node,
7281 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7283 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7286 def BuildHooksNodes(self):
7287 """Build hooks nodes.
7291 self.cfg.GetMasterNode(),
7292 self.instance.primary_node,
7293 self.op.target_node,
7297 def CheckPrereq(self):
7298 """Check prerequisites.
7300 This checks that the instance is in the cluster.
7303 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7304 assert self.instance is not None, \
7305 "Cannot retrieve locked instance %s" % self.op.instance_name
7307 node = self.cfg.GetNodeInfo(self.op.target_node)
7308 assert node is not None, \
7309 "Cannot retrieve locked node %s" % self.op.target_node
7311 self.target_node = target_node = node.name
7313 if target_node == instance.primary_node:
7314 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7315 (instance.name, target_node),
7318 bep = self.cfg.GetClusterInfo().FillBE(instance)
7320 for idx, dsk in enumerate(instance.disks):
7321 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7322 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7323 " cannot copy" % idx, errors.ECODE_STATE)
7325 _CheckNodeOnline(self, target_node)
7326 _CheckNodeNotDrained(self, target_node)
7327 _CheckNodeVmCapable(self, target_node)
7329 if instance.admin_state == constants.ADMINST_UP:
7330 # check memory requirements on the secondary node
7331 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7332 instance.name, bep[constants.BE_MAXMEM],
7333 instance.hypervisor)
7335 self.LogInfo("Not checking memory on the secondary node as"
7336 " instance will not be started")
7338 # check bridge existance
7339 _CheckInstanceBridgesExist(self, instance, node=target_node)
7341 def Exec(self, feedback_fn):
7342 """Move an instance.
7344 The move is done by shutting it down on its present node, copying
7345 the data over (slow) and starting it on the new node.
7348 instance = self.instance
7350 source_node = instance.primary_node
7351 target_node = self.target_node
7353 self.LogInfo("Shutting down instance %s on source node %s",
7354 instance.name, source_node)
7356 assert (self.owned_locks(locking.LEVEL_NODE) ==
7357 self.owned_locks(locking.LEVEL_NODE_RES))
7359 result = self.rpc.call_instance_shutdown(source_node, instance,
7360 self.op.shutdown_timeout)
7361 msg = result.fail_msg
7363 if self.op.ignore_consistency:
7364 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7365 " Proceeding anyway. Please make sure node"
7366 " %s is down. Error details: %s",
7367 instance.name, source_node, source_node, msg)
7369 raise errors.OpExecError("Could not shutdown instance %s on"
7371 (instance.name, source_node, msg))
7373 # create the target disks
7375 _CreateDisks(self, instance, target_node=target_node)
7376 except errors.OpExecError:
7377 self.LogWarning("Device creation failed, reverting...")
7379 _RemoveDisks(self, instance, target_node=target_node)
7381 self.cfg.ReleaseDRBDMinors(instance.name)
7384 cluster_name = self.cfg.GetClusterInfo().cluster_name
7387 # activate, get path, copy the data over
7388 for idx, disk in enumerate(instance.disks):
7389 self.LogInfo("Copying data for disk %d", idx)
7390 result = self.rpc.call_blockdev_assemble(target_node, disk,
7391 instance.name, True, idx)
7393 self.LogWarning("Can't assemble newly created disk %d: %s",
7394 idx, result.fail_msg)
7395 errs.append(result.fail_msg)
7397 dev_path = result.payload
7398 result = self.rpc.call_blockdev_export(source_node, disk,
7399 target_node, dev_path,
7402 self.LogWarning("Can't copy data over for disk %d: %s",
7403 idx, result.fail_msg)
7404 errs.append(result.fail_msg)
7408 self.LogWarning("Some disks failed to copy, aborting")
7410 _RemoveDisks(self, instance, target_node=target_node)
7412 self.cfg.ReleaseDRBDMinors(instance.name)
7413 raise errors.OpExecError("Errors during disk copy: %s" %
7416 instance.primary_node = target_node
7417 self.cfg.Update(instance, feedback_fn)
7419 self.LogInfo("Removing the disks on the original node")
7420 _RemoveDisks(self, instance, target_node=source_node)
7422 # Only start the instance if it's marked as up
7423 if instance.admin_state == constants.ADMINST_UP:
7424 self.LogInfo("Starting instance %s on node %s",
7425 instance.name, target_node)
7427 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7428 ignore_secondaries=True)
7430 _ShutdownInstanceDisks(self, instance)
7431 raise errors.OpExecError("Can't activate the instance's disks")
7433 result = self.rpc.call_instance_start(target_node,
7434 (instance, None, None), False)
7435 msg = result.fail_msg
7437 _ShutdownInstanceDisks(self, instance)
7438 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7439 (instance.name, target_node, msg))
7442 class LUNodeMigrate(LogicalUnit):
7443 """Migrate all instances from a node.
7446 HPATH = "node-migrate"
7447 HTYPE = constants.HTYPE_NODE
7450 def CheckArguments(self):
7453 def ExpandNames(self):
7454 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7456 self.share_locks = _ShareAll()
7457 self.needed_locks = {
7458 locking.LEVEL_NODE: [self.op.node_name],
7461 def BuildHooksEnv(self):
7464 This runs on the master, the primary and all the secondaries.
7468 "NODE_NAME": self.op.node_name,
7471 def BuildHooksNodes(self):
7472 """Build hooks nodes.
7475 nl = [self.cfg.GetMasterNode()]
7478 def CheckPrereq(self):
7481 def Exec(self, feedback_fn):
7482 # Prepare jobs for migration instances
7484 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7487 iallocator=self.op.iallocator,
7488 target_node=self.op.target_node)]
7489 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7492 # TODO: Run iallocator in this opcode and pass correct placement options to
7493 # OpInstanceMigrate. Since other jobs can modify the cluster between
7494 # running the iallocator and the actual migration, a good consistency model
7495 # will have to be found.
7497 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7498 frozenset([self.op.node_name]))
7500 return ResultWithJobs(jobs)
7503 class TLMigrateInstance(Tasklet):
7504 """Tasklet class for instance migration.
7507 @ivar live: whether the migration will be done live or non-live;
7508 this variable is initalized only after CheckPrereq has run
7509 @type cleanup: boolean
7510 @ivar cleanup: Wheater we cleanup from a failed migration
7511 @type iallocator: string
7512 @ivar iallocator: The iallocator used to determine target_node
7513 @type target_node: string
7514 @ivar target_node: If given, the target_node to reallocate the instance to
7515 @type failover: boolean
7516 @ivar failover: Whether operation results in failover or migration
7517 @type fallback: boolean
7518 @ivar fallback: Whether fallback to failover is allowed if migration not
7520 @type ignore_consistency: boolean
7521 @ivar ignore_consistency: Wheter we should ignore consistency between source
7523 @type shutdown_timeout: int
7524 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7529 _MIGRATION_POLL_INTERVAL = 1 # seconds
7530 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7532 def __init__(self, lu, instance_name, cleanup=False,
7533 failover=False, fallback=False,
7534 ignore_consistency=False,
7535 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7536 """Initializes this class.
7539 Tasklet.__init__(self, lu)
7542 self.instance_name = instance_name
7543 self.cleanup = cleanup
7544 self.live = False # will be overridden later
7545 self.failover = failover
7546 self.fallback = fallback
7547 self.ignore_consistency = ignore_consistency
7548 self.shutdown_timeout = shutdown_timeout
7550 def CheckPrereq(self):
7551 """Check prerequisites.
7553 This checks that the instance is in the cluster.
7556 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7557 instance = self.cfg.GetInstanceInfo(instance_name)
7558 assert instance is not None
7559 self.instance = instance
7561 if (not self.cleanup and
7562 not instance.admin_state == constants.ADMINST_UP and
7563 not self.failover and self.fallback):
7564 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7565 " switching to failover")
7566 self.failover = True
7568 if instance.disk_template not in constants.DTS_MIRRORED:
7573 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7574 " %s" % (instance.disk_template, text),
7577 if instance.disk_template in constants.DTS_EXT_MIRROR:
7578 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7580 if self.lu.op.iallocator:
7581 self._RunAllocator()
7583 # We set set self.target_node as it is required by
7585 self.target_node = self.lu.op.target_node
7587 # self.target_node is already populated, either directly or by the
7589 target_node = self.target_node
7590 if self.target_node == instance.primary_node:
7591 raise errors.OpPrereqError("Cannot migrate instance %s"
7592 " to its primary (%s)" %
7593 (instance.name, instance.primary_node))
7595 if len(self.lu.tasklets) == 1:
7596 # It is safe to release locks only when we're the only tasklet
7598 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7599 keep=[instance.primary_node, self.target_node])
7602 secondary_nodes = instance.secondary_nodes
7603 if not secondary_nodes:
7604 raise errors.ConfigurationError("No secondary node but using"
7605 " %s disk template" %
7606 instance.disk_template)
7607 target_node = secondary_nodes[0]
7608 if self.lu.op.iallocator or (self.lu.op.target_node and
7609 self.lu.op.target_node != target_node):
7611 text = "failed over"
7614 raise errors.OpPrereqError("Instances with disk template %s cannot"
7615 " be %s to arbitrary nodes"
7616 " (neither an iallocator nor a target"
7617 " node can be passed)" %
7618 (instance.disk_template, text),
7621 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7623 # check memory requirements on the secondary node
7624 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7625 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7626 instance.name, i_be[constants.BE_MAXMEM],
7627 instance.hypervisor)
7629 self.lu.LogInfo("Not checking memory on the secondary node as"
7630 " instance will not be started")
7632 # check if failover must be forced instead of migration
7633 if (not self.cleanup and not self.failover and
7634 i_be[constants.BE_ALWAYS_FAILOVER]):
7636 self.lu.LogInfo("Instance configured to always failover; fallback"
7638 self.failover = True
7640 raise errors.OpPrereqError("This instance has been configured to"
7641 " always failover, please allow failover",
7644 # check bridge existance
7645 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7647 if not self.cleanup:
7648 _CheckNodeNotDrained(self.lu, target_node)
7649 if not self.failover:
7650 result = self.rpc.call_instance_migratable(instance.primary_node,
7652 if result.fail_msg and self.fallback:
7653 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7655 self.failover = True
7657 result.Raise("Can't migrate, please use failover",
7658 prereq=True, ecode=errors.ECODE_STATE)
7660 assert not (self.failover and self.cleanup)
7662 if not self.failover:
7663 if self.lu.op.live is not None and self.lu.op.mode is not None:
7664 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7665 " parameters are accepted",
7667 if self.lu.op.live is not None:
7669 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7671 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7672 # reset the 'live' parameter to None so that repeated
7673 # invocations of CheckPrereq do not raise an exception
7674 self.lu.op.live = None
7675 elif self.lu.op.mode is None:
7676 # read the default value from the hypervisor
7677 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7679 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7681 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7683 # Failover is never live
7686 def _RunAllocator(self):
7687 """Run the allocator based on input opcode.
7690 ial = IAllocator(self.cfg, self.rpc,
7691 mode=constants.IALLOCATOR_MODE_RELOC,
7692 name=self.instance_name,
7693 # TODO See why hail breaks with a single node below
7694 relocate_from=[self.instance.primary_node,
7695 self.instance.primary_node],
7698 ial.Run(self.lu.op.iallocator)
7701 raise errors.OpPrereqError("Can't compute nodes using"
7702 " iallocator '%s': %s" %
7703 (self.lu.op.iallocator, ial.info),
7705 if len(ial.result) != ial.required_nodes:
7706 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7707 " of nodes (%s), required %s" %
7708 (self.lu.op.iallocator, len(ial.result),
7709 ial.required_nodes), errors.ECODE_FAULT)
7710 self.target_node = ial.result[0]
7711 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7712 self.instance_name, self.lu.op.iallocator,
7713 utils.CommaJoin(ial.result))
7715 def _WaitUntilSync(self):
7716 """Poll with custom rpc for disk sync.
7718 This uses our own step-based rpc call.
7721 self.feedback_fn("* wait until resync is done")
7725 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7727 self.instance.disks)
7729 for node, nres in result.items():
7730 nres.Raise("Cannot resync disks on node %s" % node)
7731 node_done, node_percent = nres.payload
7732 all_done = all_done and node_done
7733 if node_percent is not None:
7734 min_percent = min(min_percent, node_percent)
7736 if min_percent < 100:
7737 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7740 def _EnsureSecondary(self, node):
7741 """Demote a node to secondary.
7744 self.feedback_fn("* switching node %s to secondary mode" % node)
7746 for dev in self.instance.disks:
7747 self.cfg.SetDiskID(dev, node)
7749 result = self.rpc.call_blockdev_close(node, self.instance.name,
7750 self.instance.disks)
7751 result.Raise("Cannot change disk to secondary on node %s" % node)
7753 def _GoStandalone(self):
7754 """Disconnect from the network.
7757 self.feedback_fn("* changing into standalone mode")
7758 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7759 self.instance.disks)
7760 for node, nres in result.items():
7761 nres.Raise("Cannot disconnect disks node %s" % node)
7763 def _GoReconnect(self, multimaster):
7764 """Reconnect to the network.
7770 msg = "single-master"
7771 self.feedback_fn("* changing disks into %s mode" % msg)
7772 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7773 self.instance.disks,
7774 self.instance.name, multimaster)
7775 for node, nres in result.items():
7776 nres.Raise("Cannot change disks config on node %s" % node)
7778 def _ExecCleanup(self):
7779 """Try to cleanup after a failed migration.
7781 The cleanup is done by:
7782 - check that the instance is running only on one node
7783 (and update the config if needed)
7784 - change disks on its secondary node to secondary
7785 - wait until disks are fully synchronized
7786 - disconnect from the network
7787 - change disks into single-master mode
7788 - wait again until disks are fully synchronized
7791 instance = self.instance
7792 target_node = self.target_node
7793 source_node = self.source_node
7795 # check running on only one node
7796 self.feedback_fn("* checking where the instance actually runs"
7797 " (if this hangs, the hypervisor might be in"
7799 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7800 for node, result in ins_l.items():
7801 result.Raise("Can't contact node %s" % node)
7803 runningon_source = instance.name in ins_l[source_node].payload
7804 runningon_target = instance.name in ins_l[target_node].payload
7806 if runningon_source and runningon_target:
7807 raise errors.OpExecError("Instance seems to be running on two nodes,"
7808 " or the hypervisor is confused; you will have"
7809 " to ensure manually that it runs only on one"
7810 " and restart this operation")
7812 if not (runningon_source or runningon_target):
7813 raise errors.OpExecError("Instance does not seem to be running at all;"
7814 " in this case it's safer to repair by"
7815 " running 'gnt-instance stop' to ensure disk"
7816 " shutdown, and then restarting it")
7818 if runningon_target:
7819 # the migration has actually succeeded, we need to update the config
7820 self.feedback_fn("* instance running on secondary node (%s),"
7821 " updating config" % target_node)
7822 instance.primary_node = target_node
7823 self.cfg.Update(instance, self.feedback_fn)
7824 demoted_node = source_node
7826 self.feedback_fn("* instance confirmed to be running on its"
7827 " primary node (%s)" % source_node)
7828 demoted_node = target_node
7830 if instance.disk_template in constants.DTS_INT_MIRROR:
7831 self._EnsureSecondary(demoted_node)
7833 self._WaitUntilSync()
7834 except errors.OpExecError:
7835 # we ignore here errors, since if the device is standalone, it
7836 # won't be able to sync
7838 self._GoStandalone()
7839 self._GoReconnect(False)
7840 self._WaitUntilSync()
7842 self.feedback_fn("* done")
7844 def _RevertDiskStatus(self):
7845 """Try to revert the disk status after a failed migration.
7848 target_node = self.target_node
7849 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7853 self._EnsureSecondary(target_node)
7854 self._GoStandalone()
7855 self._GoReconnect(False)
7856 self._WaitUntilSync()
7857 except errors.OpExecError, err:
7858 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7859 " please try to recover the instance manually;"
7860 " error '%s'" % str(err))
7862 def _AbortMigration(self):
7863 """Call the hypervisor code to abort a started migration.
7866 instance = self.instance
7867 target_node = self.target_node
7868 source_node = self.source_node
7869 migration_info = self.migration_info
7871 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7875 abort_msg = abort_result.fail_msg
7877 logging.error("Aborting migration failed on target node %s: %s",
7878 target_node, abort_msg)
7879 # Don't raise an exception here, as we stil have to try to revert the
7880 # disk status, even if this step failed.
7882 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7883 instance, False, self.live)
7884 abort_msg = abort_result.fail_msg
7886 logging.error("Aborting migration failed on source node %s: %s",
7887 source_node, abort_msg)
7889 def _ExecMigration(self):
7890 """Migrate an instance.
7892 The migrate is done by:
7893 - change the disks into dual-master mode
7894 - wait until disks are fully synchronized again
7895 - migrate the instance
7896 - change disks on the new secondary node (the old primary) to secondary
7897 - wait until disks are fully synchronized
7898 - change disks into single-master mode
7901 instance = self.instance
7902 target_node = self.target_node
7903 source_node = self.source_node
7905 # Check for hypervisor version mismatch and warn the user.
7906 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7907 None, [self.instance.hypervisor])
7908 for ninfo in nodeinfo.values():
7909 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7911 (_, _, (src_info, )) = nodeinfo[source_node].payload
7912 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7914 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7915 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7916 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7917 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7918 if src_version != dst_version:
7919 self.feedback_fn("* warning: hypervisor version mismatch between"
7920 " source (%s) and target (%s) node" %
7921 (src_version, dst_version))
7923 self.feedback_fn("* checking disk consistency between source and target")
7924 for dev in instance.disks:
7925 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7926 raise errors.OpExecError("Disk %s is degraded or not fully"
7927 " synchronized on target node,"
7928 " aborting migration" % dev.iv_name)
7930 # First get the migration information from the remote node
7931 result = self.rpc.call_migration_info(source_node, instance)
7932 msg = result.fail_msg
7934 log_err = ("Failed fetching source migration information from %s: %s" %
7936 logging.error(log_err)
7937 raise errors.OpExecError(log_err)
7939 self.migration_info = migration_info = result.payload
7941 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7942 # Then switch the disks to master/master mode
7943 self._EnsureSecondary(target_node)
7944 self._GoStandalone()
7945 self._GoReconnect(True)
7946 self._WaitUntilSync()
7948 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7949 result = self.rpc.call_accept_instance(target_node,
7952 self.nodes_ip[target_node])
7954 msg = result.fail_msg
7956 logging.error("Instance pre-migration failed, trying to revert"
7957 " disk status: %s", msg)
7958 self.feedback_fn("Pre-migration failed, aborting")
7959 self._AbortMigration()
7960 self._RevertDiskStatus()
7961 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7962 (instance.name, msg))
7964 self.feedback_fn("* migrating instance to %s" % target_node)
7965 result = self.rpc.call_instance_migrate(source_node, instance,
7966 self.nodes_ip[target_node],
7968 msg = result.fail_msg
7970 logging.error("Instance migration failed, trying to revert"
7971 " disk status: %s", msg)
7972 self.feedback_fn("Migration failed, aborting")
7973 self._AbortMigration()
7974 self._RevertDiskStatus()
7975 raise errors.OpExecError("Could not migrate instance %s: %s" %
7976 (instance.name, msg))
7978 self.feedback_fn("* starting memory transfer")
7979 last_feedback = time.time()
7981 result = self.rpc.call_instance_get_migration_status(source_node,
7983 msg = result.fail_msg
7984 ms = result.payload # MigrationStatus instance
7985 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7986 logging.error("Instance migration failed, trying to revert"
7987 " disk status: %s", msg)
7988 self.feedback_fn("Migration failed, aborting")
7989 self._AbortMigration()
7990 self._RevertDiskStatus()
7991 raise errors.OpExecError("Could not migrate instance %s: %s" %
7992 (instance.name, msg))
7994 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7995 self.feedback_fn("* memory transfer complete")
7998 if (utils.TimeoutExpired(last_feedback,
7999 self._MIGRATION_FEEDBACK_INTERVAL) and
8000 ms.transferred_ram is not None):
8001 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8002 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8003 last_feedback = time.time()
8005 time.sleep(self._MIGRATION_POLL_INTERVAL)
8007 result = self.rpc.call_instance_finalize_migration_src(source_node,
8011 msg = result.fail_msg
8013 logging.error("Instance migration succeeded, but finalization failed"
8014 " on the source node: %s", msg)
8015 raise errors.OpExecError("Could not finalize instance migration: %s" %
8018 instance.primary_node = target_node
8020 # distribute new instance config to the other nodes
8021 self.cfg.Update(instance, self.feedback_fn)
8023 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8027 msg = result.fail_msg
8029 logging.error("Instance migration succeeded, but finalization failed"
8030 " on the target node: %s", msg)
8031 raise errors.OpExecError("Could not finalize instance migration: %s" %
8034 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8035 self._EnsureSecondary(source_node)
8036 self._WaitUntilSync()
8037 self._GoStandalone()
8038 self._GoReconnect(False)
8039 self._WaitUntilSync()
8041 self.feedback_fn("* done")
8043 def _ExecFailover(self):
8044 """Failover an instance.
8046 The failover is done by shutting it down on its present node and
8047 starting it on the secondary.
8050 instance = self.instance
8051 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8053 source_node = instance.primary_node
8054 target_node = self.target_node
8056 if instance.admin_state == constants.ADMINST_UP:
8057 self.feedback_fn("* checking disk consistency between source and target")
8058 for dev in instance.disks:
8059 # for drbd, these are drbd over lvm
8060 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8061 if primary_node.offline:
8062 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8064 (primary_node.name, dev.iv_name, target_node))
8065 elif not self.ignore_consistency:
8066 raise errors.OpExecError("Disk %s is degraded on target node,"
8067 " aborting failover" % dev.iv_name)
8069 self.feedback_fn("* not checking disk consistency as instance is not"
8072 self.feedback_fn("* shutting down instance on source node")
8073 logging.info("Shutting down instance %s on node %s",
8074 instance.name, source_node)
8076 result = self.rpc.call_instance_shutdown(source_node, instance,
8077 self.shutdown_timeout)
8078 msg = result.fail_msg
8080 if self.ignore_consistency or primary_node.offline:
8081 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8082 " proceeding anyway; please make sure node"
8083 " %s is down; error details: %s",
8084 instance.name, source_node, source_node, msg)
8086 raise errors.OpExecError("Could not shutdown instance %s on"
8088 (instance.name, source_node, msg))
8090 self.feedback_fn("* deactivating the instance's disks on source node")
8091 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8092 raise errors.OpExecError("Can't shut down the instance's disks")
8094 instance.primary_node = target_node
8095 # distribute new instance config to the other nodes
8096 self.cfg.Update(instance, self.feedback_fn)
8098 # Only start the instance if it's marked as up
8099 if instance.admin_state == constants.ADMINST_UP:
8100 self.feedback_fn("* activating the instance's disks on target node %s" %
8102 logging.info("Starting instance %s on node %s",
8103 instance.name, target_node)
8105 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8106 ignore_secondaries=True)
8108 _ShutdownInstanceDisks(self.lu, instance)
8109 raise errors.OpExecError("Can't activate the instance's disks")
8111 self.feedback_fn("* starting the instance on the target node %s" %
8113 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8115 msg = result.fail_msg
8117 _ShutdownInstanceDisks(self.lu, instance)
8118 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8119 (instance.name, target_node, msg))
8121 def Exec(self, feedback_fn):
8122 """Perform the migration.
8125 self.feedback_fn = feedback_fn
8126 self.source_node = self.instance.primary_node
8128 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8129 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8130 self.target_node = self.instance.secondary_nodes[0]
8131 # Otherwise self.target_node has been populated either
8132 # directly, or through an iallocator.
8134 self.all_nodes = [self.source_node, self.target_node]
8135 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8136 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8139 feedback_fn("Failover instance %s" % self.instance.name)
8140 self._ExecFailover()
8142 feedback_fn("Migrating instance %s" % self.instance.name)
8145 return self._ExecCleanup()
8147 return self._ExecMigration()
8150 def _CreateBlockDev(lu, node, instance, device, force_create,
8152 """Create a tree of block devices on a given node.
8154 If this device type has to be created on secondaries, create it and
8157 If not, just recurse to children keeping the same 'force' value.
8159 @param lu: the lu on whose behalf we execute
8160 @param node: the node on which to create the device
8161 @type instance: L{objects.Instance}
8162 @param instance: the instance which owns the device
8163 @type device: L{objects.Disk}
8164 @param device: the device to create
8165 @type force_create: boolean
8166 @param force_create: whether to force creation of this device; this
8167 will be change to True whenever we find a device which has
8168 CreateOnSecondary() attribute
8169 @param info: the extra 'metadata' we should attach to the device
8170 (this will be represented as a LVM tag)
8171 @type force_open: boolean
8172 @param force_open: this parameter will be passes to the
8173 L{backend.BlockdevCreate} function where it specifies
8174 whether we run on primary or not, and it affects both
8175 the child assembly and the device own Open() execution
8178 if device.CreateOnSecondary():
8182 for child in device.children:
8183 _CreateBlockDev(lu, node, instance, child, force_create,
8186 if not force_create:
8189 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8192 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8193 """Create a single block device on a given node.
8195 This will not recurse over children of the device, so they must be
8198 @param lu: the lu on whose behalf we execute
8199 @param node: the node on which to create the device
8200 @type instance: L{objects.Instance}
8201 @param instance: the instance which owns the device
8202 @type device: L{objects.Disk}
8203 @param device: the device to create
8204 @param info: the extra 'metadata' we should attach to the device
8205 (this will be represented as a LVM tag)
8206 @type force_open: boolean
8207 @param force_open: this parameter will be passes to the
8208 L{backend.BlockdevCreate} function where it specifies
8209 whether we run on primary or not, and it affects both
8210 the child assembly and the device own Open() execution
8213 lu.cfg.SetDiskID(device, node)
8214 result = lu.rpc.call_blockdev_create(node, device, device.size,
8215 instance.name, force_open, info)
8216 result.Raise("Can't create block device %s on"
8217 " node %s for instance %s" % (device, node, instance.name))
8218 if device.physical_id is None:
8219 device.physical_id = result.payload
8222 def _GenerateUniqueNames(lu, exts):
8223 """Generate a suitable LV name.
8225 This will generate a logical volume name for the given instance.
8230 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8231 results.append("%s%s" % (new_id, val))
8235 def _ComputeLDParams(disk_template, disk_params):
8236 """Computes Logical Disk parameters from Disk Template parameters.
8238 @type disk_template: string
8239 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8240 @type disk_params: dict
8241 @param disk_params: disk template parameters; dict(template_name -> parameters
8243 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8244 contains the LD parameters of the node. The tree is flattened in-order.
8247 if disk_template not in constants.DISK_TEMPLATES:
8248 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8251 dt_params = disk_params[disk_template]
8252 if disk_template == constants.DT_DRBD8:
8254 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8255 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8256 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8257 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8258 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8259 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8260 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8261 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8262 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8263 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8264 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8265 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8269 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8272 result.append(drbd_params)
8276 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8279 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8281 result.append(data_params)
8285 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8288 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8290 result.append(meta_params)
8292 elif (disk_template == constants.DT_FILE or
8293 disk_template == constants.DT_SHARED_FILE):
8294 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8296 elif disk_template == constants.DT_PLAIN:
8298 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8301 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8303 result.append(params)
8305 elif disk_template == constants.DT_BLOCK:
8306 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8311 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8312 iv_name, p_minor, s_minor, drbd_params, data_params,
8314 """Generate a drbd8 device complete with its children.
8317 assert len(vgnames) == len(names) == 2
8318 port = lu.cfg.AllocatePort()
8319 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8321 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8322 logical_id=(vgnames[0], names[0]),
8324 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8325 logical_id=(vgnames[1], names[1]),
8327 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8328 logical_id=(primary, secondary, port,
8331 children=[dev_data, dev_meta],
8332 iv_name=iv_name, params=drbd_params)
8336 def _GenerateDiskTemplate(lu, template_name,
8337 instance_name, primary_node,
8338 secondary_nodes, disk_info,
8339 file_storage_dir, file_driver,
8340 base_index, feedback_fn, disk_params):
8341 """Generate the entire disk layout for a given template type.
8344 #TODO: compute space requirements
8346 vgname = lu.cfg.GetVGName()
8347 disk_count = len(disk_info)
8349 ld_params = _ComputeLDParams(template_name, disk_params)
8350 if template_name == constants.DT_DISKLESS:
8352 elif template_name == constants.DT_PLAIN:
8353 if len(secondary_nodes) != 0:
8354 raise errors.ProgrammerError("Wrong template configuration")
8356 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8357 for i in range(disk_count)])
8358 for idx, disk in enumerate(disk_info):
8359 disk_index = idx + base_index
8360 vg = disk.get(constants.IDISK_VG, vgname)
8361 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8362 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8363 size=disk[constants.IDISK_SIZE],
8364 logical_id=(vg, names[idx]),
8365 iv_name="disk/%d" % disk_index,
8366 mode=disk[constants.IDISK_MODE],
8367 params=ld_params[0])
8368 disks.append(disk_dev)
8369 elif template_name == constants.DT_DRBD8:
8370 drbd_params, data_params, meta_params = ld_params
8371 if len(secondary_nodes) != 1:
8372 raise errors.ProgrammerError("Wrong template configuration")
8373 remote_node = secondary_nodes[0]
8374 minors = lu.cfg.AllocateDRBDMinor(
8375 [primary_node, remote_node] * len(disk_info), instance_name)
8378 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8379 for i in range(disk_count)]):
8380 names.append(lv_prefix + "_data")
8381 names.append(lv_prefix + "_meta")
8382 for idx, disk in enumerate(disk_info):
8383 disk_index = idx + base_index
8384 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8385 data_vg = disk.get(constants.IDISK_VG, vgname)
8386 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8387 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8388 disk[constants.IDISK_SIZE],
8390 names[idx * 2:idx * 2 + 2],
8391 "disk/%d" % disk_index,
8392 minors[idx * 2], minors[idx * 2 + 1],
8393 drbd_params, data_params, meta_params)
8394 disk_dev.mode = disk[constants.IDISK_MODE]
8395 disks.append(disk_dev)
8396 elif template_name == constants.DT_FILE:
8397 if len(secondary_nodes) != 0:
8398 raise errors.ProgrammerError("Wrong template configuration")
8400 opcodes.RequireFileStorage()
8402 for idx, disk in enumerate(disk_info):
8403 disk_index = idx + base_index
8404 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8405 size=disk[constants.IDISK_SIZE],
8406 iv_name="disk/%d" % disk_index,
8407 logical_id=(file_driver,
8408 "%s/disk%d" % (file_storage_dir,
8410 mode=disk[constants.IDISK_MODE],
8411 params=ld_params[0])
8412 disks.append(disk_dev)
8413 elif template_name == constants.DT_SHARED_FILE:
8414 if len(secondary_nodes) != 0:
8415 raise errors.ProgrammerError("Wrong template configuration")
8417 opcodes.RequireSharedFileStorage()
8419 for idx, disk in enumerate(disk_info):
8420 disk_index = idx + base_index
8421 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8422 size=disk[constants.IDISK_SIZE],
8423 iv_name="disk/%d" % disk_index,
8424 logical_id=(file_driver,
8425 "%s/disk%d" % (file_storage_dir,
8427 mode=disk[constants.IDISK_MODE],
8428 params=ld_params[0])
8429 disks.append(disk_dev)
8430 elif template_name == constants.DT_BLOCK:
8431 if len(secondary_nodes) != 0:
8432 raise errors.ProgrammerError("Wrong template configuration")
8434 for idx, disk in enumerate(disk_info):
8435 disk_index = idx + base_index
8436 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8437 size=disk[constants.IDISK_SIZE],
8438 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8439 disk[constants.IDISK_ADOPT]),
8440 iv_name="disk/%d" % disk_index,
8441 mode=disk[constants.IDISK_MODE],
8442 params=ld_params[0])
8443 disks.append(disk_dev)
8446 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8450 def _GetInstanceInfoText(instance):
8451 """Compute that text that should be added to the disk's metadata.
8454 return "originstname+%s" % instance.name
8457 def _CalcEta(time_taken, written, total_size):
8458 """Calculates the ETA based on size written and total size.
8460 @param time_taken: The time taken so far
8461 @param written: amount written so far
8462 @param total_size: The total size of data to be written
8463 @return: The remaining time in seconds
8466 avg_time = time_taken / float(written)
8467 return (total_size - written) * avg_time
8470 def _WipeDisks(lu, instance):
8471 """Wipes instance disks.
8473 @type lu: L{LogicalUnit}
8474 @param lu: the logical unit on whose behalf we execute
8475 @type instance: L{objects.Instance}
8476 @param instance: the instance whose disks we should create
8477 @return: the success of the wipe
8480 node = instance.primary_node
8482 for device in instance.disks:
8483 lu.cfg.SetDiskID(device, node)
8485 logging.info("Pause sync of instance %s disks", instance.name)
8486 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8488 for idx, success in enumerate(result.payload):
8490 logging.warn("pause-sync of instance %s for disks %d failed",
8494 for idx, device in enumerate(instance.disks):
8495 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8496 # MAX_WIPE_CHUNK at max
8497 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8498 constants.MIN_WIPE_CHUNK_PERCENT)
8499 # we _must_ make this an int, otherwise rounding errors will
8501 wipe_chunk_size = int(wipe_chunk_size)
8503 lu.LogInfo("* Wiping disk %d", idx)
8504 logging.info("Wiping disk %d for instance %s, node %s using"
8505 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8510 start_time = time.time()
8512 while offset < size:
8513 wipe_size = min(wipe_chunk_size, size - offset)
8514 logging.debug("Wiping disk %d, offset %s, chunk %s",
8515 idx, offset, wipe_size)
8516 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8517 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8518 (idx, offset, wipe_size))
8521 if now - last_output >= 60:
8522 eta = _CalcEta(now - start_time, offset, size)
8523 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8524 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8527 logging.info("Resume sync of instance %s disks", instance.name)
8529 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8531 for idx, success in enumerate(result.payload):
8533 lu.LogWarning("Resume sync of disk %d failed, please have a"
8534 " look at the status and troubleshoot the issue", idx)
8535 logging.warn("resume-sync of instance %s for disks %d failed",
8539 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8540 """Create all disks for an instance.
8542 This abstracts away some work from AddInstance.
8544 @type lu: L{LogicalUnit}
8545 @param lu: the logical unit on whose behalf we execute
8546 @type instance: L{objects.Instance}
8547 @param instance: the instance whose disks we should create
8549 @param to_skip: list of indices to skip
8550 @type target_node: string
8551 @param target_node: if passed, overrides the target node for creation
8553 @return: the success of the creation
8556 info = _GetInstanceInfoText(instance)
8557 if target_node is None:
8558 pnode = instance.primary_node
8559 all_nodes = instance.all_nodes
8564 if instance.disk_template in constants.DTS_FILEBASED:
8565 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8566 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8568 result.Raise("Failed to create directory '%s' on"
8569 " node %s" % (file_storage_dir, pnode))
8571 # Note: this needs to be kept in sync with adding of disks in
8572 # LUInstanceSetParams
8573 for idx, device in enumerate(instance.disks):
8574 if to_skip and idx in to_skip:
8576 logging.info("Creating volume %s for instance %s",
8577 device.iv_name, instance.name)
8579 for node in all_nodes:
8580 f_create = node == pnode
8581 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8584 def _RemoveDisks(lu, instance, target_node=None):
8585 """Remove all disks for an instance.
8587 This abstracts away some work from `AddInstance()` and
8588 `RemoveInstance()`. Note that in case some of the devices couldn't
8589 be removed, the removal will continue with the other ones (compare
8590 with `_CreateDisks()`).
8592 @type lu: L{LogicalUnit}
8593 @param lu: the logical unit on whose behalf we execute
8594 @type instance: L{objects.Instance}
8595 @param instance: the instance whose disks we should remove
8596 @type target_node: string
8597 @param target_node: used to override the node on which to remove the disks
8599 @return: the success of the removal
8602 logging.info("Removing block devices for instance %s", instance.name)
8605 for device in instance.disks:
8607 edata = [(target_node, device)]
8609 edata = device.ComputeNodeTree(instance.primary_node)
8610 for node, disk in edata:
8611 lu.cfg.SetDiskID(disk, node)
8612 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8614 lu.LogWarning("Could not remove block device %s on node %s,"
8615 " continuing anyway: %s", device.iv_name, node, msg)
8618 # if this is a DRBD disk, return its port to the pool
8619 if device.dev_type in constants.LDS_DRBD:
8620 tcp_port = device.logical_id[2]
8621 lu.cfg.AddTcpUdpPort(tcp_port)
8623 if instance.disk_template == constants.DT_FILE:
8624 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8628 tgt = instance.primary_node
8629 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8631 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8632 file_storage_dir, instance.primary_node, result.fail_msg)
8638 def _ComputeDiskSizePerVG(disk_template, disks):
8639 """Compute disk size requirements in the volume group
8642 def _compute(disks, payload):
8643 """Universal algorithm.
8648 vgs[disk[constants.IDISK_VG]] = \
8649 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8653 # Required free disk space as a function of disk and swap space
8655 constants.DT_DISKLESS: {},
8656 constants.DT_PLAIN: _compute(disks, 0),
8657 # 128 MB are added for drbd metadata for each disk
8658 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8659 constants.DT_FILE: {},
8660 constants.DT_SHARED_FILE: {},
8663 if disk_template not in req_size_dict:
8664 raise errors.ProgrammerError("Disk template '%s' size requirement"
8665 " is unknown" % disk_template)
8667 return req_size_dict[disk_template]
8670 def _ComputeDiskSize(disk_template, disks):
8671 """Compute disk size requirements in the volume group
8674 # Required free disk space as a function of disk and swap space
8676 constants.DT_DISKLESS: None,
8677 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8678 # 128 MB are added for drbd metadata for each disk
8680 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8681 constants.DT_FILE: None,
8682 constants.DT_SHARED_FILE: 0,
8683 constants.DT_BLOCK: 0,
8686 if disk_template not in req_size_dict:
8687 raise errors.ProgrammerError("Disk template '%s' size requirement"
8688 " is unknown" % disk_template)
8690 return req_size_dict[disk_template]
8693 def _FilterVmNodes(lu, nodenames):
8694 """Filters out non-vm_capable nodes from a list.
8696 @type lu: L{LogicalUnit}
8697 @param lu: the logical unit for which we check
8698 @type nodenames: list
8699 @param nodenames: the list of nodes on which we should check
8701 @return: the list of vm-capable nodes
8704 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8705 return [name for name in nodenames if name not in vm_nodes]
8708 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8709 """Hypervisor parameter validation.
8711 This function abstract the hypervisor parameter validation to be
8712 used in both instance create and instance modify.
8714 @type lu: L{LogicalUnit}
8715 @param lu: the logical unit for which we check
8716 @type nodenames: list
8717 @param nodenames: the list of nodes on which we should check
8718 @type hvname: string
8719 @param hvname: the name of the hypervisor we should use
8720 @type hvparams: dict
8721 @param hvparams: the parameters which we need to check
8722 @raise errors.OpPrereqError: if the parameters are not valid
8725 nodenames = _FilterVmNodes(lu, nodenames)
8727 cluster = lu.cfg.GetClusterInfo()
8728 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8730 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8731 for node in nodenames:
8735 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8738 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8739 """OS parameters validation.
8741 @type lu: L{LogicalUnit}
8742 @param lu: the logical unit for which we check
8743 @type required: boolean
8744 @param required: whether the validation should fail if the OS is not
8746 @type nodenames: list
8747 @param nodenames: the list of nodes on which we should check
8748 @type osname: string
8749 @param osname: the name of the hypervisor we should use
8750 @type osparams: dict
8751 @param osparams: the parameters which we need to check
8752 @raise errors.OpPrereqError: if the parameters are not valid
8755 nodenames = _FilterVmNodes(lu, nodenames)
8756 result = lu.rpc.call_os_validate(nodenames, required, osname,
8757 [constants.OS_VALIDATE_PARAMETERS],
8759 for node, nres in result.items():
8760 # we don't check for offline cases since this should be run only
8761 # against the master node and/or an instance's nodes
8762 nres.Raise("OS Parameters validation failed on node %s" % node)
8763 if not nres.payload:
8764 lu.LogInfo("OS %s not found on node %s, validation skipped",
8768 class LUInstanceCreate(LogicalUnit):
8769 """Create an instance.
8772 HPATH = "instance-add"
8773 HTYPE = constants.HTYPE_INSTANCE
8776 def CheckArguments(self):
8780 # do not require name_check to ease forward/backward compatibility
8782 if self.op.no_install and self.op.start:
8783 self.LogInfo("No-installation mode selected, disabling startup")
8784 self.op.start = False
8785 # validate/normalize the instance name
8786 self.op.instance_name = \
8787 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8789 if self.op.ip_check and not self.op.name_check:
8790 # TODO: make the ip check more flexible and not depend on the name check
8791 raise errors.OpPrereqError("Cannot do IP address check without a name"
8792 " check", errors.ECODE_INVAL)
8794 # check nics' parameter names
8795 for nic in self.op.nics:
8796 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8798 # check disks. parameter names and consistent adopt/no-adopt strategy
8799 has_adopt = has_no_adopt = False
8800 for disk in self.op.disks:
8801 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8802 if constants.IDISK_ADOPT in disk:
8806 if has_adopt and has_no_adopt:
8807 raise errors.OpPrereqError("Either all disks are adopted or none is",
8810 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8811 raise errors.OpPrereqError("Disk adoption is not supported for the"
8812 " '%s' disk template" %
8813 self.op.disk_template,
8815 if self.op.iallocator is not None:
8816 raise errors.OpPrereqError("Disk adoption not allowed with an"
8817 " iallocator script", errors.ECODE_INVAL)
8818 if self.op.mode == constants.INSTANCE_IMPORT:
8819 raise errors.OpPrereqError("Disk adoption not allowed for"
8820 " instance import", errors.ECODE_INVAL)
8822 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8823 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8824 " but no 'adopt' parameter given" %
8825 self.op.disk_template,
8828 self.adopt_disks = has_adopt
8830 # instance name verification
8831 if self.op.name_check:
8832 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8833 self.op.instance_name = self.hostname1.name
8834 # used in CheckPrereq for ip ping check
8835 self.check_ip = self.hostname1.ip
8837 self.check_ip = None
8839 # file storage checks
8840 if (self.op.file_driver and
8841 not self.op.file_driver in constants.FILE_DRIVER):
8842 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8843 self.op.file_driver, errors.ECODE_INVAL)
8845 if self.op.disk_template == constants.DT_FILE:
8846 opcodes.RequireFileStorage()
8847 elif self.op.disk_template == constants.DT_SHARED_FILE:
8848 opcodes.RequireSharedFileStorage()
8850 ### Node/iallocator related checks
8851 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8853 if self.op.pnode is not None:
8854 if self.op.disk_template in constants.DTS_INT_MIRROR:
8855 if self.op.snode is None:
8856 raise errors.OpPrereqError("The networked disk templates need"
8857 " a mirror node", errors.ECODE_INVAL)
8859 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8861 self.op.snode = None
8863 self._cds = _GetClusterDomainSecret()
8865 if self.op.mode == constants.INSTANCE_IMPORT:
8866 # On import force_variant must be True, because if we forced it at
8867 # initial install, our only chance when importing it back is that it
8869 self.op.force_variant = True
8871 if self.op.no_install:
8872 self.LogInfo("No-installation mode has no effect during import")
8874 elif self.op.mode == constants.INSTANCE_CREATE:
8875 if self.op.os_type is None:
8876 raise errors.OpPrereqError("No guest OS specified",
8878 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8879 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8880 " installation" % self.op.os_type,
8882 if self.op.disk_template is None:
8883 raise errors.OpPrereqError("No disk template specified",
8886 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8887 # Check handshake to ensure both clusters have the same domain secret
8888 src_handshake = self.op.source_handshake
8889 if not src_handshake:
8890 raise errors.OpPrereqError("Missing source handshake",
8893 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8896 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8899 # Load and check source CA
8900 self.source_x509_ca_pem = self.op.source_x509_ca
8901 if not self.source_x509_ca_pem:
8902 raise errors.OpPrereqError("Missing source X509 CA",
8906 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8908 except OpenSSL.crypto.Error, err:
8909 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8910 (err, ), errors.ECODE_INVAL)
8912 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8913 if errcode is not None:
8914 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8917 self.source_x509_ca = cert
8919 src_instance_name = self.op.source_instance_name
8920 if not src_instance_name:
8921 raise errors.OpPrereqError("Missing source instance name",
8924 self.source_instance_name = \
8925 netutils.GetHostname(name=src_instance_name).name
8928 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8929 self.op.mode, errors.ECODE_INVAL)
8931 def ExpandNames(self):
8932 """ExpandNames for CreateInstance.
8934 Figure out the right locks for instance creation.
8937 self.needed_locks = {}
8939 instance_name = self.op.instance_name
8940 # this is just a preventive check, but someone might still add this
8941 # instance in the meantime, and creation will fail at lock-add time
8942 if instance_name in self.cfg.GetInstanceList():
8943 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8944 instance_name, errors.ECODE_EXISTS)
8946 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8948 if self.op.iallocator:
8949 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8950 # specifying a group on instance creation and then selecting nodes from
8952 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8953 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8955 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8956 nodelist = [self.op.pnode]
8957 if self.op.snode is not None:
8958 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8959 nodelist.append(self.op.snode)
8960 self.needed_locks[locking.LEVEL_NODE] = nodelist
8961 # Lock resources of instance's primary and secondary nodes (copy to
8962 # prevent accidential modification)
8963 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8965 # in case of import lock the source node too
8966 if self.op.mode == constants.INSTANCE_IMPORT:
8967 src_node = self.op.src_node
8968 src_path = self.op.src_path
8970 if src_path is None:
8971 self.op.src_path = src_path = self.op.instance_name
8973 if src_node is None:
8974 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8975 self.op.src_node = None
8976 if os.path.isabs(src_path):
8977 raise errors.OpPrereqError("Importing an instance from a path"
8978 " requires a source node option",
8981 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8982 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8983 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8984 if not os.path.isabs(src_path):
8985 self.op.src_path = src_path = \
8986 utils.PathJoin(constants.EXPORT_DIR, src_path)
8988 def _RunAllocator(self):
8989 """Run the allocator based on input opcode.
8992 nics = [n.ToDict() for n in self.nics]
8993 ial = IAllocator(self.cfg, self.rpc,
8994 mode=constants.IALLOCATOR_MODE_ALLOC,
8995 name=self.op.instance_name,
8996 disk_template=self.op.disk_template,
8999 vcpus=self.be_full[constants.BE_VCPUS],
9000 memory=self.be_full[constants.BE_MAXMEM],
9003 hypervisor=self.op.hypervisor,
9006 ial.Run(self.op.iallocator)
9009 raise errors.OpPrereqError("Can't compute nodes using"
9010 " iallocator '%s': %s" %
9011 (self.op.iallocator, ial.info),
9013 if len(ial.result) != ial.required_nodes:
9014 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9015 " of nodes (%s), required %s" %
9016 (self.op.iallocator, len(ial.result),
9017 ial.required_nodes), errors.ECODE_FAULT)
9018 self.op.pnode = ial.result[0]
9019 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9020 self.op.instance_name, self.op.iallocator,
9021 utils.CommaJoin(ial.result))
9022 if ial.required_nodes == 2:
9023 self.op.snode = ial.result[1]
9025 def BuildHooksEnv(self):
9028 This runs on master, primary and secondary nodes of the instance.
9032 "ADD_MODE": self.op.mode,
9034 if self.op.mode == constants.INSTANCE_IMPORT:
9035 env["SRC_NODE"] = self.op.src_node
9036 env["SRC_PATH"] = self.op.src_path
9037 env["SRC_IMAGES"] = self.src_images
9039 env.update(_BuildInstanceHookEnv(
9040 name=self.op.instance_name,
9041 primary_node=self.op.pnode,
9042 secondary_nodes=self.secondaries,
9043 status=self.op.start,
9044 os_type=self.op.os_type,
9045 minmem=self.be_full[constants.BE_MINMEM],
9046 maxmem=self.be_full[constants.BE_MAXMEM],
9047 vcpus=self.be_full[constants.BE_VCPUS],
9048 nics=_NICListToTuple(self, self.nics),
9049 disk_template=self.op.disk_template,
9050 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9051 for d in self.disks],
9054 hypervisor_name=self.op.hypervisor,
9060 def BuildHooksNodes(self):
9061 """Build hooks nodes.
9064 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9067 def _ReadExportInfo(self):
9068 """Reads the export information from disk.
9070 It will override the opcode source node and path with the actual
9071 information, if these two were not specified before.
9073 @return: the export information
9076 assert self.op.mode == constants.INSTANCE_IMPORT
9078 src_node = self.op.src_node
9079 src_path = self.op.src_path
9081 if src_node is None:
9082 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9083 exp_list = self.rpc.call_export_list(locked_nodes)
9085 for node in exp_list:
9086 if exp_list[node].fail_msg:
9088 if src_path in exp_list[node].payload:
9090 self.op.src_node = src_node = node
9091 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9095 raise errors.OpPrereqError("No export found for relative path %s" %
9096 src_path, errors.ECODE_INVAL)
9098 _CheckNodeOnline(self, src_node)
9099 result = self.rpc.call_export_info(src_node, src_path)
9100 result.Raise("No export or invalid export found in dir %s" % src_path)
9102 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9103 if not export_info.has_section(constants.INISECT_EXP):
9104 raise errors.ProgrammerError("Corrupted export config",
9105 errors.ECODE_ENVIRON)
9107 ei_version = export_info.get(constants.INISECT_EXP, "version")
9108 if (int(ei_version) != constants.EXPORT_VERSION):
9109 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9110 (ei_version, constants.EXPORT_VERSION),
9111 errors.ECODE_ENVIRON)
9114 def _ReadExportParams(self, einfo):
9115 """Use export parameters as defaults.
9117 In case the opcode doesn't specify (as in override) some instance
9118 parameters, then try to use them from the export information, if
9122 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9124 if self.op.disk_template is None:
9125 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9126 self.op.disk_template = einfo.get(constants.INISECT_INS,
9128 if self.op.disk_template not in constants.DISK_TEMPLATES:
9129 raise errors.OpPrereqError("Disk template specified in configuration"
9130 " file is not one of the allowed values:"
9131 " %s" % " ".join(constants.DISK_TEMPLATES))
9133 raise errors.OpPrereqError("No disk template specified and the export"
9134 " is missing the disk_template information",
9137 if not self.op.disks:
9139 # TODO: import the disk iv_name too
9140 for idx in range(constants.MAX_DISKS):
9141 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9142 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9143 disks.append({constants.IDISK_SIZE: disk_sz})
9144 self.op.disks = disks
9145 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9146 raise errors.OpPrereqError("No disk info specified and the export"
9147 " is missing the disk information",
9150 if not self.op.nics:
9152 for idx in range(constants.MAX_NICS):
9153 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9155 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9156 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9163 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9164 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9166 if (self.op.hypervisor is None and
9167 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9168 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9170 if einfo.has_section(constants.INISECT_HYP):
9171 # use the export parameters but do not override the ones
9172 # specified by the user
9173 for name, value in einfo.items(constants.INISECT_HYP):
9174 if name not in self.op.hvparams:
9175 self.op.hvparams[name] = value
9177 if einfo.has_section(constants.INISECT_BEP):
9178 # use the parameters, without overriding
9179 for name, value in einfo.items(constants.INISECT_BEP):
9180 if name not in self.op.beparams:
9181 self.op.beparams[name] = value
9182 # Compatibility for the old "memory" be param
9183 if name == constants.BE_MEMORY:
9184 if constants.BE_MAXMEM not in self.op.beparams:
9185 self.op.beparams[constants.BE_MAXMEM] = value
9186 if constants.BE_MINMEM not in self.op.beparams:
9187 self.op.beparams[constants.BE_MINMEM] = value
9189 # try to read the parameters old style, from the main section
9190 for name in constants.BES_PARAMETERS:
9191 if (name not in self.op.beparams and
9192 einfo.has_option(constants.INISECT_INS, name)):
9193 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9195 if einfo.has_section(constants.INISECT_OSP):
9196 # use the parameters, without overriding
9197 for name, value in einfo.items(constants.INISECT_OSP):
9198 if name not in self.op.osparams:
9199 self.op.osparams[name] = value
9201 def _RevertToDefaults(self, cluster):
9202 """Revert the instance parameters to the default values.
9206 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9207 for name in self.op.hvparams.keys():
9208 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9209 del self.op.hvparams[name]
9211 be_defs = cluster.SimpleFillBE({})
9212 for name in self.op.beparams.keys():
9213 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9214 del self.op.beparams[name]
9216 nic_defs = cluster.SimpleFillNIC({})
9217 for nic in self.op.nics:
9218 for name in constants.NICS_PARAMETERS:
9219 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9222 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9223 for name in self.op.osparams.keys():
9224 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9225 del self.op.osparams[name]
9227 def _CalculateFileStorageDir(self):
9228 """Calculate final instance file storage dir.
9231 # file storage dir calculation/check
9232 self.instance_file_storage_dir = None
9233 if self.op.disk_template in constants.DTS_FILEBASED:
9234 # build the full file storage dir path
9237 if self.op.disk_template == constants.DT_SHARED_FILE:
9238 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9240 get_fsd_fn = self.cfg.GetFileStorageDir
9242 cfg_storagedir = get_fsd_fn()
9243 if not cfg_storagedir:
9244 raise errors.OpPrereqError("Cluster file storage dir not defined")
9245 joinargs.append(cfg_storagedir)
9247 if self.op.file_storage_dir is not None:
9248 joinargs.append(self.op.file_storage_dir)
9250 joinargs.append(self.op.instance_name)
9252 # pylint: disable=W0142
9253 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9255 def CheckPrereq(self):
9256 """Check prerequisites.
9259 self._CalculateFileStorageDir()
9261 if self.op.mode == constants.INSTANCE_IMPORT:
9262 export_info = self._ReadExportInfo()
9263 self._ReadExportParams(export_info)
9265 if (not self.cfg.GetVGName() and
9266 self.op.disk_template not in constants.DTS_NOT_LVM):
9267 raise errors.OpPrereqError("Cluster does not support lvm-based"
9268 " instances", errors.ECODE_STATE)
9270 if (self.op.hypervisor is None or
9271 self.op.hypervisor == constants.VALUE_AUTO):
9272 self.op.hypervisor = self.cfg.GetHypervisorType()
9274 cluster = self.cfg.GetClusterInfo()
9275 enabled_hvs = cluster.enabled_hypervisors
9276 if self.op.hypervisor not in enabled_hvs:
9277 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9278 " cluster (%s)" % (self.op.hypervisor,
9279 ",".join(enabled_hvs)),
9282 # Check tag validity
9283 for tag in self.op.tags:
9284 objects.TaggableObject.ValidateTag(tag)
9286 # check hypervisor parameter syntax (locally)
9287 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9288 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9290 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9291 hv_type.CheckParameterSyntax(filled_hvp)
9292 self.hv_full = filled_hvp
9293 # check that we don't specify global parameters on an instance
9294 _CheckGlobalHvParams(self.op.hvparams)
9296 # fill and remember the beparams dict
9297 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9298 for param, value in self.op.beparams.iteritems():
9299 if value == constants.VALUE_AUTO:
9300 self.op.beparams[param] = default_beparams[param]
9301 objects.UpgradeBeParams(self.op.beparams)
9302 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9303 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9305 # build os parameters
9306 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9308 # now that hvp/bep are in final format, let's reset to defaults,
9310 if self.op.identify_defaults:
9311 self._RevertToDefaults(cluster)
9315 for idx, nic in enumerate(self.op.nics):
9316 nic_mode_req = nic.get(constants.INIC_MODE, None)
9317 nic_mode = nic_mode_req
9318 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9319 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9321 # in routed mode, for the first nic, the default ip is 'auto'
9322 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9323 default_ip_mode = constants.VALUE_AUTO
9325 default_ip_mode = constants.VALUE_NONE
9327 # ip validity checks
9328 ip = nic.get(constants.INIC_IP, default_ip_mode)
9329 if ip is None or ip.lower() == constants.VALUE_NONE:
9331 elif ip.lower() == constants.VALUE_AUTO:
9332 if not self.op.name_check:
9333 raise errors.OpPrereqError("IP address set to auto but name checks"
9334 " have been skipped",
9336 nic_ip = self.hostname1.ip
9338 if not netutils.IPAddress.IsValid(ip):
9339 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9343 # TODO: check the ip address for uniqueness
9344 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9345 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9348 # MAC address verification
9349 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9350 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9351 mac = utils.NormalizeAndValidateMac(mac)
9354 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9355 except errors.ReservationError:
9356 raise errors.OpPrereqError("MAC address %s already in use"
9357 " in cluster" % mac,
9358 errors.ECODE_NOTUNIQUE)
9360 # Build nic parameters
9361 link = nic.get(constants.INIC_LINK, None)
9362 if link == constants.VALUE_AUTO:
9363 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9366 nicparams[constants.NIC_MODE] = nic_mode
9368 nicparams[constants.NIC_LINK] = link
9370 check_params = cluster.SimpleFillNIC(nicparams)
9371 objects.NIC.CheckParameterSyntax(check_params)
9372 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9374 # disk checks/pre-build
9375 default_vg = self.cfg.GetVGName()
9377 for disk in self.op.disks:
9378 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9379 if mode not in constants.DISK_ACCESS_SET:
9380 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9381 mode, errors.ECODE_INVAL)
9382 size = disk.get(constants.IDISK_SIZE, None)
9384 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9387 except (TypeError, ValueError):
9388 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9391 data_vg = disk.get(constants.IDISK_VG, default_vg)
9393 constants.IDISK_SIZE: size,
9394 constants.IDISK_MODE: mode,
9395 constants.IDISK_VG: data_vg,
9397 if constants.IDISK_METAVG in disk:
9398 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9399 if constants.IDISK_ADOPT in disk:
9400 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9401 self.disks.append(new_disk)
9403 if self.op.mode == constants.INSTANCE_IMPORT:
9405 for idx in range(len(self.disks)):
9406 option = "disk%d_dump" % idx
9407 if export_info.has_option(constants.INISECT_INS, option):
9408 # FIXME: are the old os-es, disk sizes, etc. useful?
9409 export_name = export_info.get(constants.INISECT_INS, option)
9410 image = utils.PathJoin(self.op.src_path, export_name)
9411 disk_images.append(image)
9413 disk_images.append(False)
9415 self.src_images = disk_images
9417 old_name = export_info.get(constants.INISECT_INS, "name")
9418 if self.op.instance_name == old_name:
9419 for idx, nic in enumerate(self.nics):
9420 if nic.mac == constants.VALUE_AUTO:
9421 nic_mac_ini = "nic%d_mac" % idx
9422 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9424 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9426 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9427 if self.op.ip_check:
9428 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9429 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9430 (self.check_ip, self.op.instance_name),
9431 errors.ECODE_NOTUNIQUE)
9433 #### mac address generation
9434 # By generating here the mac address both the allocator and the hooks get
9435 # the real final mac address rather than the 'auto' or 'generate' value.
9436 # There is a race condition between the generation and the instance object
9437 # creation, which means that we know the mac is valid now, but we're not
9438 # sure it will be when we actually add the instance. If things go bad
9439 # adding the instance will abort because of a duplicate mac, and the
9440 # creation job will fail.
9441 for nic in self.nics:
9442 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9443 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9447 if self.op.iallocator is not None:
9448 self._RunAllocator()
9450 # Release all unneeded node locks
9451 _ReleaseLocks(self, locking.LEVEL_NODE,
9452 keep=filter(None, [self.op.pnode, self.op.snode,
9455 #### node related checks
9457 # check primary node
9458 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9459 assert self.pnode is not None, \
9460 "Cannot retrieve locked node %s" % self.op.pnode
9462 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9463 pnode.name, errors.ECODE_STATE)
9465 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9466 pnode.name, errors.ECODE_STATE)
9467 if not pnode.vm_capable:
9468 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9469 " '%s'" % pnode.name, errors.ECODE_STATE)
9471 self.secondaries = []
9473 # mirror node verification
9474 if self.op.disk_template in constants.DTS_INT_MIRROR:
9475 if self.op.snode == pnode.name:
9476 raise errors.OpPrereqError("The secondary node cannot be the"
9477 " primary node", errors.ECODE_INVAL)
9478 _CheckNodeOnline(self, self.op.snode)
9479 _CheckNodeNotDrained(self, self.op.snode)
9480 _CheckNodeVmCapable(self, self.op.snode)
9481 self.secondaries.append(self.op.snode)
9483 snode = self.cfg.GetNodeInfo(self.op.snode)
9484 if pnode.group != snode.group:
9485 self.LogWarning("The primary and secondary nodes are in two"
9486 " different node groups; the disk parameters"
9487 " from the first disk's node group will be"
9490 nodenames = [pnode.name] + self.secondaries
9492 # disk parameters (not customizable at instance or node level)
9493 # just use the primary node parameters, ignoring the secondary.
9494 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9496 if not self.adopt_disks:
9497 # Check lv size requirements, if not adopting
9498 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9499 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9501 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9502 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9503 disk[constants.IDISK_ADOPT])
9504 for disk in self.disks])
9505 if len(all_lvs) != len(self.disks):
9506 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9508 for lv_name in all_lvs:
9510 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9511 # to ReserveLV uses the same syntax
9512 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9513 except errors.ReservationError:
9514 raise errors.OpPrereqError("LV named %s used by another instance" %
9515 lv_name, errors.ECODE_NOTUNIQUE)
9517 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9518 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9520 node_lvs = self.rpc.call_lv_list([pnode.name],
9521 vg_names.payload.keys())[pnode.name]
9522 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9523 node_lvs = node_lvs.payload
9525 delta = all_lvs.difference(node_lvs.keys())
9527 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9528 utils.CommaJoin(delta),
9530 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9532 raise errors.OpPrereqError("Online logical volumes found, cannot"
9533 " adopt: %s" % utils.CommaJoin(online_lvs),
9535 # update the size of disk based on what is found
9536 for dsk in self.disks:
9537 dsk[constants.IDISK_SIZE] = \
9538 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9539 dsk[constants.IDISK_ADOPT])][0]))
9541 elif self.op.disk_template == constants.DT_BLOCK:
9542 # Normalize and de-duplicate device paths
9543 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9544 for disk in self.disks])
9545 if len(all_disks) != len(self.disks):
9546 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9548 baddisks = [d for d in all_disks
9549 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9551 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9552 " cannot be adopted" %
9553 (", ".join(baddisks),
9554 constants.ADOPTABLE_BLOCKDEV_ROOT),
9557 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9558 list(all_disks))[pnode.name]
9559 node_disks.Raise("Cannot get block device information from node %s" %
9561 node_disks = node_disks.payload
9562 delta = all_disks.difference(node_disks.keys())
9564 raise errors.OpPrereqError("Missing block device(s): %s" %
9565 utils.CommaJoin(delta),
9567 for dsk in self.disks:
9568 dsk[constants.IDISK_SIZE] = \
9569 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9571 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9573 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9574 # check OS parameters (remotely)
9575 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9577 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9579 # memory check on primary node
9580 #TODO(dynmem): use MINMEM for checking
9582 _CheckNodeFreeMemory(self, self.pnode.name,
9583 "creating instance %s" % self.op.instance_name,
9584 self.be_full[constants.BE_MAXMEM],
9587 self.dry_run_result = list(nodenames)
9589 def Exec(self, feedback_fn):
9590 """Create and add the instance to the cluster.
9593 instance = self.op.instance_name
9594 pnode_name = self.pnode.name
9596 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9597 self.owned_locks(locking.LEVEL_NODE)), \
9598 "Node locks differ from node resource locks"
9600 ht_kind = self.op.hypervisor
9601 if ht_kind in constants.HTS_REQ_PORT:
9602 network_port = self.cfg.AllocatePort()
9606 disks = _GenerateDiskTemplate(self,
9607 self.op.disk_template,
9608 instance, pnode_name,
9611 self.instance_file_storage_dir,
9612 self.op.file_driver,
9617 iobj = objects.Instance(name=instance, os=self.op.os_type,
9618 primary_node=pnode_name,
9619 nics=self.nics, disks=disks,
9620 disk_template=self.op.disk_template,
9621 admin_state=constants.ADMINST_DOWN,
9622 network_port=network_port,
9623 beparams=self.op.beparams,
9624 hvparams=self.op.hvparams,
9625 hypervisor=self.op.hypervisor,
9626 osparams=self.op.osparams,
9630 for tag in self.op.tags:
9633 if self.adopt_disks:
9634 if self.op.disk_template == constants.DT_PLAIN:
9635 # rename LVs to the newly-generated names; we need to construct
9636 # 'fake' LV disks with the old data, plus the new unique_id
9637 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9639 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9640 rename_to.append(t_dsk.logical_id)
9641 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9642 self.cfg.SetDiskID(t_dsk, pnode_name)
9643 result = self.rpc.call_blockdev_rename(pnode_name,
9644 zip(tmp_disks, rename_to))
9645 result.Raise("Failed to rename adoped LVs")
9647 feedback_fn("* creating instance disks...")
9649 _CreateDisks(self, iobj)
9650 except errors.OpExecError:
9651 self.LogWarning("Device creation failed, reverting...")
9653 _RemoveDisks(self, iobj)
9655 self.cfg.ReleaseDRBDMinors(instance)
9658 feedback_fn("adding instance %s to cluster config" % instance)
9660 self.cfg.AddInstance(iobj, self.proc.GetECId())
9662 # Declare that we don't want to remove the instance lock anymore, as we've
9663 # added the instance to the config
9664 del self.remove_locks[locking.LEVEL_INSTANCE]
9666 if self.op.mode == constants.INSTANCE_IMPORT:
9667 # Release unused nodes
9668 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9671 _ReleaseLocks(self, locking.LEVEL_NODE)
9674 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9675 feedback_fn("* wiping instance disks...")
9677 _WipeDisks(self, iobj)
9678 except errors.OpExecError, err:
9679 logging.exception("Wiping disks failed")
9680 self.LogWarning("Wiping instance disks failed (%s)", err)
9684 # Something is already wrong with the disks, don't do anything else
9686 elif self.op.wait_for_sync:
9687 disk_abort = not _WaitForSync(self, iobj)
9688 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9689 # make sure the disks are not degraded (still sync-ing is ok)
9690 feedback_fn("* checking mirrors status")
9691 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9696 _RemoveDisks(self, iobj)
9697 self.cfg.RemoveInstance(iobj.name)
9698 # Make sure the instance lock gets removed
9699 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9700 raise errors.OpExecError("There are some degraded disks for"
9703 # Release all node resource locks
9704 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9706 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9707 if self.op.mode == constants.INSTANCE_CREATE:
9708 if not self.op.no_install:
9709 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9710 not self.op.wait_for_sync)
9712 feedback_fn("* pausing disk sync to install instance OS")
9713 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9715 for idx, success in enumerate(result.payload):
9717 logging.warn("pause-sync of instance %s for disk %d failed",
9720 feedback_fn("* running the instance OS create scripts...")
9721 # FIXME: pass debug option from opcode to backend
9723 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9724 self.op.debug_level)
9726 feedback_fn("* resuming disk sync")
9727 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9729 for idx, success in enumerate(result.payload):
9731 logging.warn("resume-sync of instance %s for disk %d failed",
9734 os_add_result.Raise("Could not add os for instance %s"
9735 " on node %s" % (instance, pnode_name))
9737 elif self.op.mode == constants.INSTANCE_IMPORT:
9738 feedback_fn("* running the instance OS import scripts...")
9742 for idx, image in enumerate(self.src_images):
9746 # FIXME: pass debug option from opcode to backend
9747 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9748 constants.IEIO_FILE, (image, ),
9749 constants.IEIO_SCRIPT,
9750 (iobj.disks[idx], idx),
9752 transfers.append(dt)
9755 masterd.instance.TransferInstanceData(self, feedback_fn,
9756 self.op.src_node, pnode_name,
9757 self.pnode.secondary_ip,
9759 if not compat.all(import_result):
9760 self.LogWarning("Some disks for instance %s on node %s were not"
9761 " imported successfully" % (instance, pnode_name))
9763 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9764 feedback_fn("* preparing remote import...")
9765 # The source cluster will stop the instance before attempting to make a
9766 # connection. In some cases stopping an instance can take a long time,
9767 # hence the shutdown timeout is added to the connection timeout.
9768 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9769 self.op.source_shutdown_timeout)
9770 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9772 assert iobj.primary_node == self.pnode.name
9774 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9775 self.source_x509_ca,
9776 self._cds, timeouts)
9777 if not compat.all(disk_results):
9778 # TODO: Should the instance still be started, even if some disks
9779 # failed to import (valid for local imports, too)?
9780 self.LogWarning("Some disks for instance %s on node %s were not"
9781 " imported successfully" % (instance, pnode_name))
9783 # Run rename script on newly imported instance
9784 assert iobj.name == instance
9785 feedback_fn("Running rename script for %s" % instance)
9786 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9787 self.source_instance_name,
9788 self.op.debug_level)
9790 self.LogWarning("Failed to run rename script for %s on node"
9791 " %s: %s" % (instance, pnode_name, result.fail_msg))
9794 # also checked in the prereq part
9795 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9798 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9801 iobj.admin_state = constants.ADMINST_UP
9802 self.cfg.Update(iobj, feedback_fn)
9803 logging.info("Starting instance %s on node %s", instance, pnode_name)
9804 feedback_fn("* starting instance...")
9805 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9807 result.Raise("Could not start instance")
9809 return list(iobj.all_nodes)
9812 class LUInstanceConsole(NoHooksLU):
9813 """Connect to an instance's console.
9815 This is somewhat special in that it returns the command line that
9816 you need to run on the master node in order to connect to the
9822 def ExpandNames(self):
9823 self.share_locks = _ShareAll()
9824 self._ExpandAndLockInstance()
9826 def CheckPrereq(self):
9827 """Check prerequisites.
9829 This checks that the instance is in the cluster.
9832 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9833 assert self.instance is not None, \
9834 "Cannot retrieve locked instance %s" % self.op.instance_name
9835 _CheckNodeOnline(self, self.instance.primary_node)
9837 def Exec(self, feedback_fn):
9838 """Connect to the console of an instance
9841 instance = self.instance
9842 node = instance.primary_node
9844 node_insts = self.rpc.call_instance_list([node],
9845 [instance.hypervisor])[node]
9846 node_insts.Raise("Can't get node information from %s" % node)
9848 if instance.name not in node_insts.payload:
9849 if instance.admin_state == constants.ADMINST_UP:
9850 state = constants.INSTST_ERRORDOWN
9851 elif instance.admin_state == constants.ADMINST_DOWN:
9852 state = constants.INSTST_ADMINDOWN
9854 state = constants.INSTST_ADMINOFFLINE
9855 raise errors.OpExecError("Instance %s is not running (state %s)" %
9856 (instance.name, state))
9858 logging.debug("Connecting to console of %s on %s", instance.name, node)
9860 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9863 def _GetInstanceConsole(cluster, instance):
9864 """Returns console information for an instance.
9866 @type cluster: L{objects.Cluster}
9867 @type instance: L{objects.Instance}
9871 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9872 # beparams and hvparams are passed separately, to avoid editing the
9873 # instance and then saving the defaults in the instance itself.
9874 hvparams = cluster.FillHV(instance)
9875 beparams = cluster.FillBE(instance)
9876 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9878 assert console.instance == instance.name
9879 assert console.Validate()
9881 return console.ToDict()
9884 class LUInstanceReplaceDisks(LogicalUnit):
9885 """Replace the disks of an instance.
9888 HPATH = "mirrors-replace"
9889 HTYPE = constants.HTYPE_INSTANCE
9892 def CheckArguments(self):
9893 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9896 def ExpandNames(self):
9897 self._ExpandAndLockInstance()
9899 assert locking.LEVEL_NODE not in self.needed_locks
9900 assert locking.LEVEL_NODE_RES not in self.needed_locks
9901 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9903 assert self.op.iallocator is None or self.op.remote_node is None, \
9904 "Conflicting options"
9906 if self.op.remote_node is not None:
9907 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9909 # Warning: do not remove the locking of the new secondary here
9910 # unless DRBD8.AddChildren is changed to work in parallel;
9911 # currently it doesn't since parallel invocations of
9912 # FindUnusedMinor will conflict
9913 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9914 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9916 self.needed_locks[locking.LEVEL_NODE] = []
9917 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9919 if self.op.iallocator is not None:
9920 # iallocator will select a new node in the same group
9921 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9923 self.needed_locks[locking.LEVEL_NODE_RES] = []
9925 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9926 self.op.iallocator, self.op.remote_node,
9927 self.op.disks, False, self.op.early_release)
9929 self.tasklets = [self.replacer]
9931 def DeclareLocks(self, level):
9932 if level == locking.LEVEL_NODEGROUP:
9933 assert self.op.remote_node is None
9934 assert self.op.iallocator is not None
9935 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9937 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9938 # Lock all groups used by instance optimistically; this requires going
9939 # via the node before it's locked, requiring verification later on
9940 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9941 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9943 elif level == locking.LEVEL_NODE:
9944 if self.op.iallocator is not None:
9945 assert self.op.remote_node is None
9946 assert not self.needed_locks[locking.LEVEL_NODE]
9948 # Lock member nodes of all locked groups
9949 self.needed_locks[locking.LEVEL_NODE] = [node_name
9950 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9951 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9953 self._LockInstancesNodes()
9954 elif level == locking.LEVEL_NODE_RES:
9956 self.needed_locks[locking.LEVEL_NODE_RES] = \
9957 self.needed_locks[locking.LEVEL_NODE]
9959 def BuildHooksEnv(self):
9962 This runs on the master, the primary and all the secondaries.
9965 instance = self.replacer.instance
9967 "MODE": self.op.mode,
9968 "NEW_SECONDARY": self.op.remote_node,
9969 "OLD_SECONDARY": instance.secondary_nodes[0],
9971 env.update(_BuildInstanceHookEnvByObject(self, instance))
9974 def BuildHooksNodes(self):
9975 """Build hooks nodes.
9978 instance = self.replacer.instance
9980 self.cfg.GetMasterNode(),
9981 instance.primary_node,
9983 if self.op.remote_node is not None:
9984 nl.append(self.op.remote_node)
9987 def CheckPrereq(self):
9988 """Check prerequisites.
9991 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9992 self.op.iallocator is None)
9994 # Verify if node group locks are still correct
9995 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9997 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9999 return LogicalUnit.CheckPrereq(self)
10002 class TLReplaceDisks(Tasklet):
10003 """Replaces disks for an instance.
10005 Note: Locking is not within the scope of this class.
10008 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10009 disks, delay_iallocator, early_release):
10010 """Initializes this class.
10013 Tasklet.__init__(self, lu)
10016 self.instance_name = instance_name
10018 self.iallocator_name = iallocator_name
10019 self.remote_node = remote_node
10021 self.delay_iallocator = delay_iallocator
10022 self.early_release = early_release
10025 self.instance = None
10026 self.new_node = None
10027 self.target_node = None
10028 self.other_node = None
10029 self.remote_node_info = None
10030 self.node_secondary_ip = None
10033 def CheckArguments(mode, remote_node, iallocator):
10034 """Helper function for users of this class.
10037 # check for valid parameter combination
10038 if mode == constants.REPLACE_DISK_CHG:
10039 if remote_node is None and iallocator is None:
10040 raise errors.OpPrereqError("When changing the secondary either an"
10041 " iallocator script must be used or the"
10042 " new node given", errors.ECODE_INVAL)
10044 if remote_node is not None and iallocator is not None:
10045 raise errors.OpPrereqError("Give either the iallocator or the new"
10046 " secondary, not both", errors.ECODE_INVAL)
10048 elif remote_node is not None or iallocator is not None:
10049 # Not replacing the secondary
10050 raise errors.OpPrereqError("The iallocator and new node options can"
10051 " only be used when changing the"
10052 " secondary node", errors.ECODE_INVAL)
10055 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10056 """Compute a new secondary node using an IAllocator.
10059 ial = IAllocator(lu.cfg, lu.rpc,
10060 mode=constants.IALLOCATOR_MODE_RELOC,
10061 name=instance_name,
10062 relocate_from=list(relocate_from))
10064 ial.Run(iallocator_name)
10066 if not ial.success:
10067 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10068 " %s" % (iallocator_name, ial.info),
10069 errors.ECODE_NORES)
10071 if len(ial.result) != ial.required_nodes:
10072 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10073 " of nodes (%s), required %s" %
10075 len(ial.result), ial.required_nodes),
10076 errors.ECODE_FAULT)
10078 remote_node_name = ial.result[0]
10080 lu.LogInfo("Selected new secondary for instance '%s': %s",
10081 instance_name, remote_node_name)
10083 return remote_node_name
10085 def _FindFaultyDisks(self, node_name):
10086 """Wrapper for L{_FindFaultyInstanceDisks}.
10089 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10092 def _CheckDisksActivated(self, instance):
10093 """Checks if the instance disks are activated.
10095 @param instance: The instance to check disks
10096 @return: True if they are activated, False otherwise
10099 nodes = instance.all_nodes
10101 for idx, dev in enumerate(instance.disks):
10103 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10104 self.cfg.SetDiskID(dev, node)
10106 result = self.rpc.call_blockdev_find(node, dev)
10110 elif result.fail_msg or not result.payload:
10115 def CheckPrereq(self):
10116 """Check prerequisites.
10118 This checks that the instance is in the cluster.
10121 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10122 assert instance is not None, \
10123 "Cannot retrieve locked instance %s" % self.instance_name
10125 if instance.disk_template != constants.DT_DRBD8:
10126 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10127 " instances", errors.ECODE_INVAL)
10129 if len(instance.secondary_nodes) != 1:
10130 raise errors.OpPrereqError("The instance has a strange layout,"
10131 " expected one secondary but found %d" %
10132 len(instance.secondary_nodes),
10133 errors.ECODE_FAULT)
10135 if not self.delay_iallocator:
10136 self._CheckPrereq2()
10138 def _CheckPrereq2(self):
10139 """Check prerequisites, second part.
10141 This function should always be part of CheckPrereq. It was separated and is
10142 now called from Exec because during node evacuation iallocator was only
10143 called with an unmodified cluster model, not taking planned changes into
10147 instance = self.instance
10148 secondary_node = instance.secondary_nodes[0]
10150 if self.iallocator_name is None:
10151 remote_node = self.remote_node
10153 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10154 instance.name, instance.secondary_nodes)
10156 if remote_node is None:
10157 self.remote_node_info = None
10159 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10160 "Remote node '%s' is not locked" % remote_node
10162 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10163 assert self.remote_node_info is not None, \
10164 "Cannot retrieve locked node %s" % remote_node
10166 if remote_node == self.instance.primary_node:
10167 raise errors.OpPrereqError("The specified node is the primary node of"
10168 " the instance", errors.ECODE_INVAL)
10170 if remote_node == secondary_node:
10171 raise errors.OpPrereqError("The specified node is already the"
10172 " secondary node of the instance",
10173 errors.ECODE_INVAL)
10175 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10176 constants.REPLACE_DISK_CHG):
10177 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10178 errors.ECODE_INVAL)
10180 if self.mode == constants.REPLACE_DISK_AUTO:
10181 if not self._CheckDisksActivated(instance):
10182 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10183 " first" % self.instance_name,
10184 errors.ECODE_STATE)
10185 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10186 faulty_secondary = self._FindFaultyDisks(secondary_node)
10188 if faulty_primary and faulty_secondary:
10189 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10190 " one node and can not be repaired"
10191 " automatically" % self.instance_name,
10192 errors.ECODE_STATE)
10195 self.disks = faulty_primary
10196 self.target_node = instance.primary_node
10197 self.other_node = secondary_node
10198 check_nodes = [self.target_node, self.other_node]
10199 elif faulty_secondary:
10200 self.disks = faulty_secondary
10201 self.target_node = secondary_node
10202 self.other_node = instance.primary_node
10203 check_nodes = [self.target_node, self.other_node]
10209 # Non-automatic modes
10210 if self.mode == constants.REPLACE_DISK_PRI:
10211 self.target_node = instance.primary_node
10212 self.other_node = secondary_node
10213 check_nodes = [self.target_node, self.other_node]
10215 elif self.mode == constants.REPLACE_DISK_SEC:
10216 self.target_node = secondary_node
10217 self.other_node = instance.primary_node
10218 check_nodes = [self.target_node, self.other_node]
10220 elif self.mode == constants.REPLACE_DISK_CHG:
10221 self.new_node = remote_node
10222 self.other_node = instance.primary_node
10223 self.target_node = secondary_node
10224 check_nodes = [self.new_node, self.other_node]
10226 _CheckNodeNotDrained(self.lu, remote_node)
10227 _CheckNodeVmCapable(self.lu, remote_node)
10229 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10230 assert old_node_info is not None
10231 if old_node_info.offline and not self.early_release:
10232 # doesn't make sense to delay the release
10233 self.early_release = True
10234 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10235 " early-release mode", secondary_node)
10238 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10241 # If not specified all disks should be replaced
10243 self.disks = range(len(self.instance.disks))
10245 # TODO: compute disk parameters
10246 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10247 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10248 if primary_node_info.group != secondary_node_info.group:
10249 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10250 " different node groups; the disk parameters of the"
10251 " primary node's group will be applied.")
10253 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10255 for node in check_nodes:
10256 _CheckNodeOnline(self.lu, node)
10258 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10261 if node_name is not None)
10263 # Release unneeded node and node resource locks
10264 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10265 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10267 # Release any owned node group
10268 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10269 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10271 # Check whether disks are valid
10272 for disk_idx in self.disks:
10273 instance.FindDisk(disk_idx)
10275 # Get secondary node IP addresses
10276 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10277 in self.cfg.GetMultiNodeInfo(touched_nodes))
10279 def Exec(self, feedback_fn):
10280 """Execute disk replacement.
10282 This dispatches the disk replacement to the appropriate handler.
10285 if self.delay_iallocator:
10286 self._CheckPrereq2()
10289 # Verify owned locks before starting operation
10290 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10291 assert set(owned_nodes) == set(self.node_secondary_ip), \
10292 ("Incorrect node locks, owning %s, expected %s" %
10293 (owned_nodes, self.node_secondary_ip.keys()))
10294 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10295 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10297 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10298 assert list(owned_instances) == [self.instance_name], \
10299 "Instance '%s' not locked" % self.instance_name
10301 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10302 "Should not own any node group lock at this point"
10305 feedback_fn("No disks need replacement")
10308 feedback_fn("Replacing disk(s) %s for %s" %
10309 (utils.CommaJoin(self.disks), self.instance.name))
10311 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10313 # Activate the instance disks if we're replacing them on a down instance
10315 _StartInstanceDisks(self.lu, self.instance, True)
10318 # Should we replace the secondary node?
10319 if self.new_node is not None:
10320 fn = self._ExecDrbd8Secondary
10322 fn = self._ExecDrbd8DiskOnly
10324 result = fn(feedback_fn)
10326 # Deactivate the instance disks if we're replacing them on a
10329 _SafeShutdownInstanceDisks(self.lu, self.instance)
10331 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10334 # Verify owned locks
10335 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10336 nodes = frozenset(self.node_secondary_ip)
10337 assert ((self.early_release and not owned_nodes) or
10338 (not self.early_release and not (set(owned_nodes) - nodes))), \
10339 ("Not owning the correct locks, early_release=%s, owned=%r,"
10340 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10344 def _CheckVolumeGroup(self, nodes):
10345 self.lu.LogInfo("Checking volume groups")
10347 vgname = self.cfg.GetVGName()
10349 # Make sure volume group exists on all involved nodes
10350 results = self.rpc.call_vg_list(nodes)
10352 raise errors.OpExecError("Can't list volume groups on the nodes")
10355 res = results[node]
10356 res.Raise("Error checking node %s" % node)
10357 if vgname not in res.payload:
10358 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10361 def _CheckDisksExistence(self, nodes):
10362 # Check disk existence
10363 for idx, dev in enumerate(self.instance.disks):
10364 if idx not in self.disks:
10368 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10369 self.cfg.SetDiskID(dev, node)
10371 result = self.rpc.call_blockdev_find(node, dev)
10373 msg = result.fail_msg
10374 if msg or not result.payload:
10376 msg = "disk not found"
10377 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10380 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10381 for idx, dev in enumerate(self.instance.disks):
10382 if idx not in self.disks:
10385 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10388 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10390 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10391 " replace disks for instance %s" %
10392 (node_name, self.instance.name))
10394 def _CreateNewStorage(self, node_name):
10395 """Create new storage on the primary or secondary node.
10397 This is only used for same-node replaces, not for changing the
10398 secondary node, hence we don't want to modify the existing disk.
10403 for idx, dev in enumerate(self.instance.disks):
10404 if idx not in self.disks:
10407 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10409 self.cfg.SetDiskID(dev, node_name)
10411 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10412 names = _GenerateUniqueNames(self.lu, lv_names)
10414 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10416 vg_data = dev.children[0].logical_id[0]
10417 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10418 logical_id=(vg_data, names[0]), params=data_p)
10419 vg_meta = dev.children[1].logical_id[0]
10420 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10421 logical_id=(vg_meta, names[1]), params=meta_p)
10423 new_lvs = [lv_data, lv_meta]
10424 old_lvs = [child.Copy() for child in dev.children]
10425 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10427 # we pass force_create=True to force the LVM creation
10428 for new_lv in new_lvs:
10429 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10430 _GetInstanceInfoText(self.instance), False)
10434 def _CheckDevices(self, node_name, iv_names):
10435 for name, (dev, _, _) in iv_names.iteritems():
10436 self.cfg.SetDiskID(dev, node_name)
10438 result = self.rpc.call_blockdev_find(node_name, dev)
10440 msg = result.fail_msg
10441 if msg or not result.payload:
10443 msg = "disk not found"
10444 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10447 if result.payload.is_degraded:
10448 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10450 def _RemoveOldStorage(self, node_name, iv_names):
10451 for name, (_, old_lvs, _) in iv_names.iteritems():
10452 self.lu.LogInfo("Remove logical volumes for %s" % name)
10455 self.cfg.SetDiskID(lv, node_name)
10457 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10459 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10460 hint="remove unused LVs manually")
10462 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10463 """Replace a disk on the primary or secondary for DRBD 8.
10465 The algorithm for replace is quite complicated:
10467 1. for each disk to be replaced:
10469 1. create new LVs on the target node with unique names
10470 1. detach old LVs from the drbd device
10471 1. rename old LVs to name_replaced.<time_t>
10472 1. rename new LVs to old LVs
10473 1. attach the new LVs (with the old names now) to the drbd device
10475 1. wait for sync across all devices
10477 1. for each modified disk:
10479 1. remove old LVs (which have the name name_replaces.<time_t>)
10481 Failures are not very well handled.
10486 # Step: check device activation
10487 self.lu.LogStep(1, steps_total, "Check device existence")
10488 self._CheckDisksExistence([self.other_node, self.target_node])
10489 self._CheckVolumeGroup([self.target_node, self.other_node])
10491 # Step: check other node consistency
10492 self.lu.LogStep(2, steps_total, "Check peer consistency")
10493 self._CheckDisksConsistency(self.other_node,
10494 self.other_node == self.instance.primary_node,
10497 # Step: create new storage
10498 self.lu.LogStep(3, steps_total, "Allocate new storage")
10499 iv_names = self._CreateNewStorage(self.target_node)
10501 # Step: for each lv, detach+rename*2+attach
10502 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10503 for dev, old_lvs, new_lvs in iv_names.itervalues():
10504 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10506 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10508 result.Raise("Can't detach drbd from local storage on node"
10509 " %s for device %s" % (self.target_node, dev.iv_name))
10511 #cfg.Update(instance)
10513 # ok, we created the new LVs, so now we know we have the needed
10514 # storage; as such, we proceed on the target node to rename
10515 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10516 # using the assumption that logical_id == physical_id (which in
10517 # turn is the unique_id on that node)
10519 # FIXME(iustin): use a better name for the replaced LVs
10520 temp_suffix = int(time.time())
10521 ren_fn = lambda d, suff: (d.physical_id[0],
10522 d.physical_id[1] + "_replaced-%s" % suff)
10524 # Build the rename list based on what LVs exist on the node
10525 rename_old_to_new = []
10526 for to_ren in old_lvs:
10527 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10528 if not result.fail_msg and result.payload:
10530 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10532 self.lu.LogInfo("Renaming the old LVs on the target node")
10533 result = self.rpc.call_blockdev_rename(self.target_node,
10535 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10537 # Now we rename the new LVs to the old LVs
10538 self.lu.LogInfo("Renaming the new LVs on the target node")
10539 rename_new_to_old = [(new, old.physical_id)
10540 for old, new in zip(old_lvs, new_lvs)]
10541 result = self.rpc.call_blockdev_rename(self.target_node,
10543 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10545 # Intermediate steps of in memory modifications
10546 for old, new in zip(old_lvs, new_lvs):
10547 new.logical_id = old.logical_id
10548 self.cfg.SetDiskID(new, self.target_node)
10550 # We need to modify old_lvs so that removal later removes the
10551 # right LVs, not the newly added ones; note that old_lvs is a
10553 for disk in old_lvs:
10554 disk.logical_id = ren_fn(disk, temp_suffix)
10555 self.cfg.SetDiskID(disk, self.target_node)
10557 # Now that the new lvs have the old name, we can add them to the device
10558 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10559 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10561 msg = result.fail_msg
10563 for new_lv in new_lvs:
10564 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10567 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10568 hint=("cleanup manually the unused logical"
10570 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10572 cstep = itertools.count(5)
10574 if self.early_release:
10575 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10576 self._RemoveOldStorage(self.target_node, iv_names)
10577 # TODO: Check if releasing locks early still makes sense
10578 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10580 # Release all resource locks except those used by the instance
10581 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10582 keep=self.node_secondary_ip.keys())
10584 # Release all node locks while waiting for sync
10585 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10587 # TODO: Can the instance lock be downgraded here? Take the optional disk
10588 # shutdown in the caller into consideration.
10591 # This can fail as the old devices are degraded and _WaitForSync
10592 # does a combined result over all disks, so we don't check its return value
10593 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10594 _WaitForSync(self.lu, self.instance)
10596 # Check all devices manually
10597 self._CheckDevices(self.instance.primary_node, iv_names)
10599 # Step: remove old storage
10600 if not self.early_release:
10601 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10602 self._RemoveOldStorage(self.target_node, iv_names)
10604 def _ExecDrbd8Secondary(self, feedback_fn):
10605 """Replace the secondary node for DRBD 8.
10607 The algorithm for replace is quite complicated:
10608 - for all disks of the instance:
10609 - create new LVs on the new node with same names
10610 - shutdown the drbd device on the old secondary
10611 - disconnect the drbd network on the primary
10612 - create the drbd device on the new secondary
10613 - network attach the drbd on the primary, using an artifice:
10614 the drbd code for Attach() will connect to the network if it
10615 finds a device which is connected to the good local disks but
10616 not network enabled
10617 - wait for sync across all devices
10618 - remove all disks from the old secondary
10620 Failures are not very well handled.
10625 pnode = self.instance.primary_node
10627 # Step: check device activation
10628 self.lu.LogStep(1, steps_total, "Check device existence")
10629 self._CheckDisksExistence([self.instance.primary_node])
10630 self._CheckVolumeGroup([self.instance.primary_node])
10632 # Step: check other node consistency
10633 self.lu.LogStep(2, steps_total, "Check peer consistency")
10634 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10636 # Step: create new storage
10637 self.lu.LogStep(3, steps_total, "Allocate new storage")
10638 for idx, dev in enumerate(self.instance.disks):
10639 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10640 (self.new_node, idx))
10641 # we pass force_create=True to force LVM creation
10642 for new_lv in dev.children:
10643 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10644 _GetInstanceInfoText(self.instance), False)
10646 # Step 4: dbrd minors and drbd setups changes
10647 # after this, we must manually remove the drbd minors on both the
10648 # error and the success paths
10649 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10650 minors = self.cfg.AllocateDRBDMinor([self.new_node
10651 for dev in self.instance.disks],
10652 self.instance.name)
10653 logging.debug("Allocated minors %r", minors)
10656 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10657 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10658 (self.new_node, idx))
10659 # create new devices on new_node; note that we create two IDs:
10660 # one without port, so the drbd will be activated without
10661 # networking information on the new node at this stage, and one
10662 # with network, for the latter activation in step 4
10663 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10664 if self.instance.primary_node == o_node1:
10667 assert self.instance.primary_node == o_node2, "Three-node instance?"
10670 new_alone_id = (self.instance.primary_node, self.new_node, None,
10671 p_minor, new_minor, o_secret)
10672 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10673 p_minor, new_minor, o_secret)
10675 iv_names[idx] = (dev, dev.children, new_net_id)
10676 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10678 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10679 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10680 logical_id=new_alone_id,
10681 children=dev.children,
10683 params=drbd_params)
10685 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10686 _GetInstanceInfoText(self.instance), False)
10687 except errors.GenericError:
10688 self.cfg.ReleaseDRBDMinors(self.instance.name)
10691 # We have new devices, shutdown the drbd on the old secondary
10692 for idx, dev in enumerate(self.instance.disks):
10693 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10694 self.cfg.SetDiskID(dev, self.target_node)
10695 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10697 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10698 "node: %s" % (idx, msg),
10699 hint=("Please cleanup this device manually as"
10700 " soon as possible"))
10702 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10703 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10704 self.instance.disks)[pnode]
10706 msg = result.fail_msg
10708 # detaches didn't succeed (unlikely)
10709 self.cfg.ReleaseDRBDMinors(self.instance.name)
10710 raise errors.OpExecError("Can't detach the disks from the network on"
10711 " old node: %s" % (msg,))
10713 # if we managed to detach at least one, we update all the disks of
10714 # the instance to point to the new secondary
10715 self.lu.LogInfo("Updating instance configuration")
10716 for dev, _, new_logical_id in iv_names.itervalues():
10717 dev.logical_id = new_logical_id
10718 self.cfg.SetDiskID(dev, self.instance.primary_node)
10720 self.cfg.Update(self.instance, feedback_fn)
10722 # Release all node locks (the configuration has been updated)
10723 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10725 # and now perform the drbd attach
10726 self.lu.LogInfo("Attaching primary drbds to new secondary"
10727 " (standalone => connected)")
10728 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10730 self.node_secondary_ip,
10731 self.instance.disks,
10732 self.instance.name,
10734 for to_node, to_result in result.items():
10735 msg = to_result.fail_msg
10737 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10739 hint=("please do a gnt-instance info to see the"
10740 " status of disks"))
10742 cstep = itertools.count(5)
10744 if self.early_release:
10745 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10746 self._RemoveOldStorage(self.target_node, iv_names)
10747 # TODO: Check if releasing locks early still makes sense
10748 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10750 # Release all resource locks except those used by the instance
10751 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10752 keep=self.node_secondary_ip.keys())
10754 # TODO: Can the instance lock be downgraded here? Take the optional disk
10755 # shutdown in the caller into consideration.
10758 # This can fail as the old devices are degraded and _WaitForSync
10759 # does a combined result over all disks, so we don't check its return value
10760 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10761 _WaitForSync(self.lu, self.instance)
10763 # Check all devices manually
10764 self._CheckDevices(self.instance.primary_node, iv_names)
10766 # Step: remove old storage
10767 if not self.early_release:
10768 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10769 self._RemoveOldStorage(self.target_node, iv_names)
10772 class LURepairNodeStorage(NoHooksLU):
10773 """Repairs the volume group on a node.
10778 def CheckArguments(self):
10779 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10781 storage_type = self.op.storage_type
10783 if (constants.SO_FIX_CONSISTENCY not in
10784 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10785 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10786 " repaired" % storage_type,
10787 errors.ECODE_INVAL)
10789 def ExpandNames(self):
10790 self.needed_locks = {
10791 locking.LEVEL_NODE: [self.op.node_name],
10794 def _CheckFaultyDisks(self, instance, node_name):
10795 """Ensure faulty disks abort the opcode or at least warn."""
10797 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10799 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10800 " node '%s'" % (instance.name, node_name),
10801 errors.ECODE_STATE)
10802 except errors.OpPrereqError, err:
10803 if self.op.ignore_consistency:
10804 self.proc.LogWarning(str(err.args[0]))
10808 def CheckPrereq(self):
10809 """Check prerequisites.
10812 # Check whether any instance on this node has faulty disks
10813 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10814 if inst.admin_state != constants.ADMINST_UP:
10816 check_nodes = set(inst.all_nodes)
10817 check_nodes.discard(self.op.node_name)
10818 for inst_node_name in check_nodes:
10819 self._CheckFaultyDisks(inst, inst_node_name)
10821 def Exec(self, feedback_fn):
10822 feedback_fn("Repairing storage unit '%s' on %s ..." %
10823 (self.op.name, self.op.node_name))
10825 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10826 result = self.rpc.call_storage_execute(self.op.node_name,
10827 self.op.storage_type, st_args,
10829 constants.SO_FIX_CONSISTENCY)
10830 result.Raise("Failed to repair storage unit '%s' on %s" %
10831 (self.op.name, self.op.node_name))
10834 class LUNodeEvacuate(NoHooksLU):
10835 """Evacuates instances off a list of nodes.
10840 _MODE2IALLOCATOR = {
10841 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10842 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10843 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10845 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10846 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10847 constants.IALLOCATOR_NEVAC_MODES)
10849 def CheckArguments(self):
10850 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10852 def ExpandNames(self):
10853 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10855 if self.op.remote_node is not None:
10856 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10857 assert self.op.remote_node
10859 if self.op.remote_node == self.op.node_name:
10860 raise errors.OpPrereqError("Can not use evacuated node as a new"
10861 " secondary node", errors.ECODE_INVAL)
10863 if self.op.mode != constants.NODE_EVAC_SEC:
10864 raise errors.OpPrereqError("Without the use of an iallocator only"
10865 " secondary instances can be evacuated",
10866 errors.ECODE_INVAL)
10869 self.share_locks = _ShareAll()
10870 self.needed_locks = {
10871 locking.LEVEL_INSTANCE: [],
10872 locking.LEVEL_NODEGROUP: [],
10873 locking.LEVEL_NODE: [],
10876 # Determine nodes (via group) optimistically, needs verification once locks
10877 # have been acquired
10878 self.lock_nodes = self._DetermineNodes()
10880 def _DetermineNodes(self):
10881 """Gets the list of nodes to operate on.
10884 if self.op.remote_node is None:
10885 # Iallocator will choose any node(s) in the same group
10886 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10888 group_nodes = frozenset([self.op.remote_node])
10890 # Determine nodes to be locked
10891 return set([self.op.node_name]) | group_nodes
10893 def _DetermineInstances(self):
10894 """Builds list of instances to operate on.
10897 assert self.op.mode in constants.NODE_EVAC_MODES
10899 if self.op.mode == constants.NODE_EVAC_PRI:
10900 # Primary instances only
10901 inst_fn = _GetNodePrimaryInstances
10902 assert self.op.remote_node is None, \
10903 "Evacuating primary instances requires iallocator"
10904 elif self.op.mode == constants.NODE_EVAC_SEC:
10905 # Secondary instances only
10906 inst_fn = _GetNodeSecondaryInstances
10909 assert self.op.mode == constants.NODE_EVAC_ALL
10910 inst_fn = _GetNodeInstances
10911 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10913 raise errors.OpPrereqError("Due to an issue with the iallocator"
10914 " interface it is not possible to evacuate"
10915 " all instances at once; specify explicitly"
10916 " whether to evacuate primary or secondary"
10918 errors.ECODE_INVAL)
10920 return inst_fn(self.cfg, self.op.node_name)
10922 def DeclareLocks(self, level):
10923 if level == locking.LEVEL_INSTANCE:
10924 # Lock instances optimistically, needs verification once node and group
10925 # locks have been acquired
10926 self.needed_locks[locking.LEVEL_INSTANCE] = \
10927 set(i.name for i in self._DetermineInstances())
10929 elif level == locking.LEVEL_NODEGROUP:
10930 # Lock node groups for all potential target nodes optimistically, needs
10931 # verification once nodes have been acquired
10932 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10933 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10935 elif level == locking.LEVEL_NODE:
10936 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10938 def CheckPrereq(self):
10940 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10941 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10942 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10944 need_nodes = self._DetermineNodes()
10946 if not owned_nodes.issuperset(need_nodes):
10947 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10948 " locks were acquired, current nodes are"
10949 " are '%s', used to be '%s'; retry the"
10951 (self.op.node_name,
10952 utils.CommaJoin(need_nodes),
10953 utils.CommaJoin(owned_nodes)),
10954 errors.ECODE_STATE)
10956 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10957 if owned_groups != wanted_groups:
10958 raise errors.OpExecError("Node groups changed since locks were acquired,"
10959 " current groups are '%s', used to be '%s';"
10960 " retry the operation" %
10961 (utils.CommaJoin(wanted_groups),
10962 utils.CommaJoin(owned_groups)))
10964 # Determine affected instances
10965 self.instances = self._DetermineInstances()
10966 self.instance_names = [i.name for i in self.instances]
10968 if set(self.instance_names) != owned_instances:
10969 raise errors.OpExecError("Instances on node '%s' changed since locks"
10970 " were acquired, current instances are '%s',"
10971 " used to be '%s'; retry the operation" %
10972 (self.op.node_name,
10973 utils.CommaJoin(self.instance_names),
10974 utils.CommaJoin(owned_instances)))
10976 if self.instance_names:
10977 self.LogInfo("Evacuating instances from node '%s': %s",
10979 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10981 self.LogInfo("No instances to evacuate from node '%s'",
10984 if self.op.remote_node is not None:
10985 for i in self.instances:
10986 if i.primary_node == self.op.remote_node:
10987 raise errors.OpPrereqError("Node %s is the primary node of"
10988 " instance %s, cannot use it as"
10990 (self.op.remote_node, i.name),
10991 errors.ECODE_INVAL)
10993 def Exec(self, feedback_fn):
10994 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10996 if not self.instance_names:
10997 # No instances to evacuate
11000 elif self.op.iallocator is not None:
11001 # TODO: Implement relocation to other group
11002 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11003 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11004 instances=list(self.instance_names))
11006 ial.Run(self.op.iallocator)
11008 if not ial.success:
11009 raise errors.OpPrereqError("Can't compute node evacuation using"
11010 " iallocator '%s': %s" %
11011 (self.op.iallocator, ial.info),
11012 errors.ECODE_NORES)
11014 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11016 elif self.op.remote_node is not None:
11017 assert self.op.mode == constants.NODE_EVAC_SEC
11019 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11020 remote_node=self.op.remote_node,
11022 mode=constants.REPLACE_DISK_CHG,
11023 early_release=self.op.early_release)]
11024 for instance_name in self.instance_names
11028 raise errors.ProgrammerError("No iallocator or remote node")
11030 return ResultWithJobs(jobs)
11033 def _SetOpEarlyRelease(early_release, op):
11034 """Sets C{early_release} flag on opcodes if available.
11038 op.early_release = early_release
11039 except AttributeError:
11040 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11045 def _NodeEvacDest(use_nodes, group, nodes):
11046 """Returns group or nodes depending on caller's choice.
11050 return utils.CommaJoin(nodes)
11055 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11056 """Unpacks the result of change-group and node-evacuate iallocator requests.
11058 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11059 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11061 @type lu: L{LogicalUnit}
11062 @param lu: Logical unit instance
11063 @type alloc_result: tuple/list
11064 @param alloc_result: Result from iallocator
11065 @type early_release: bool
11066 @param early_release: Whether to release locks early if possible
11067 @type use_nodes: bool
11068 @param use_nodes: Whether to display node names instead of groups
11071 (moved, failed, jobs) = alloc_result
11074 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11075 for (name, reason) in failed)
11076 lu.LogWarning("Unable to evacuate instances %s", failreason)
11077 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11080 lu.LogInfo("Instances to be moved: %s",
11081 utils.CommaJoin("%s (to %s)" %
11082 (name, _NodeEvacDest(use_nodes, group, nodes))
11083 for (name, group, nodes) in moved))
11085 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11086 map(opcodes.OpCode.LoadOpCode, ops))
11090 class LUInstanceGrowDisk(LogicalUnit):
11091 """Grow a disk of an instance.
11094 HPATH = "disk-grow"
11095 HTYPE = constants.HTYPE_INSTANCE
11098 def ExpandNames(self):
11099 self._ExpandAndLockInstance()
11100 self.needed_locks[locking.LEVEL_NODE] = []
11101 self.needed_locks[locking.LEVEL_NODE_RES] = []
11102 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11104 def DeclareLocks(self, level):
11105 if level == locking.LEVEL_NODE:
11106 self._LockInstancesNodes()
11107 elif level == locking.LEVEL_NODE_RES:
11109 self.needed_locks[locking.LEVEL_NODE_RES] = \
11110 self.needed_locks[locking.LEVEL_NODE][:]
11112 def BuildHooksEnv(self):
11113 """Build hooks env.
11115 This runs on the master, the primary and all the secondaries.
11119 "DISK": self.op.disk,
11120 "AMOUNT": self.op.amount,
11122 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11125 def BuildHooksNodes(self):
11126 """Build hooks nodes.
11129 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11132 def CheckPrereq(self):
11133 """Check prerequisites.
11135 This checks that the instance is in the cluster.
11138 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11139 assert instance is not None, \
11140 "Cannot retrieve locked instance %s" % self.op.instance_name
11141 nodenames = list(instance.all_nodes)
11142 for node in nodenames:
11143 _CheckNodeOnline(self, node)
11145 self.instance = instance
11147 if instance.disk_template not in constants.DTS_GROWABLE:
11148 raise errors.OpPrereqError("Instance's disk layout does not support"
11149 " growing", errors.ECODE_INVAL)
11151 self.disk = instance.FindDisk(self.op.disk)
11153 if instance.disk_template not in (constants.DT_FILE,
11154 constants.DT_SHARED_FILE):
11155 # TODO: check the free disk space for file, when that feature will be
11157 _CheckNodesFreeDiskPerVG(self, nodenames,
11158 self.disk.ComputeGrowth(self.op.amount))
11160 def Exec(self, feedback_fn):
11161 """Execute disk grow.
11164 instance = self.instance
11167 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11168 assert (self.owned_locks(locking.LEVEL_NODE) ==
11169 self.owned_locks(locking.LEVEL_NODE_RES))
11171 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11173 raise errors.OpExecError("Cannot activate block device to grow")
11175 feedback_fn("Growing disk %s of instance '%s' by %s" %
11176 (self.op.disk, instance.name,
11177 utils.FormatUnit(self.op.amount, "h")))
11179 # First run all grow ops in dry-run mode
11180 for node in instance.all_nodes:
11181 self.cfg.SetDiskID(disk, node)
11182 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11183 result.Raise("Grow request failed to node %s" % node)
11185 # We know that (as far as we can test) operations across different
11186 # nodes will succeed, time to run it for real
11187 for node in instance.all_nodes:
11188 self.cfg.SetDiskID(disk, node)
11189 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11190 result.Raise("Grow request failed to node %s" % node)
11192 # TODO: Rewrite code to work properly
11193 # DRBD goes into sync mode for a short amount of time after executing the
11194 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11195 # calling "resize" in sync mode fails. Sleeping for a short amount of
11196 # time is a work-around.
11199 disk.RecordGrow(self.op.amount)
11200 self.cfg.Update(instance, feedback_fn)
11202 # Changes have been recorded, release node lock
11203 _ReleaseLocks(self, locking.LEVEL_NODE)
11205 # Downgrade lock while waiting for sync
11206 self.glm.downgrade(locking.LEVEL_INSTANCE)
11208 if self.op.wait_for_sync:
11209 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11211 self.proc.LogWarning("Disk sync-ing has not returned a good"
11212 " status; please check the instance")
11213 if instance.admin_state != constants.ADMINST_UP:
11214 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11215 elif instance.admin_state != constants.ADMINST_UP:
11216 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11217 " not supposed to be running because no wait for"
11218 " sync mode was requested")
11220 assert self.owned_locks(locking.LEVEL_NODE_RES)
11221 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11224 class LUInstanceQueryData(NoHooksLU):
11225 """Query runtime instance data.
11230 def ExpandNames(self):
11231 self.needed_locks = {}
11233 # Use locking if requested or when non-static information is wanted
11234 if not (self.op.static or self.op.use_locking):
11235 self.LogWarning("Non-static data requested, locks need to be acquired")
11236 self.op.use_locking = True
11238 if self.op.instances or not self.op.use_locking:
11239 # Expand instance names right here
11240 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11242 # Will use acquired locks
11243 self.wanted_names = None
11245 if self.op.use_locking:
11246 self.share_locks = _ShareAll()
11248 if self.wanted_names is None:
11249 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11251 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11253 self.needed_locks[locking.LEVEL_NODE] = []
11254 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11256 def DeclareLocks(self, level):
11257 if self.op.use_locking and level == locking.LEVEL_NODE:
11258 self._LockInstancesNodes()
11260 def CheckPrereq(self):
11261 """Check prerequisites.
11263 This only checks the optional instance list against the existing names.
11266 if self.wanted_names is None:
11267 assert self.op.use_locking, "Locking was not used"
11268 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11270 self.wanted_instances = \
11271 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11273 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11274 """Returns the status of a block device
11277 if self.op.static or not node:
11280 self.cfg.SetDiskID(dev, node)
11282 result = self.rpc.call_blockdev_find(node, dev)
11286 result.Raise("Can't compute disk status for %s" % instance_name)
11288 status = result.payload
11292 return (status.dev_path, status.major, status.minor,
11293 status.sync_percent, status.estimated_time,
11294 status.is_degraded, status.ldisk_status)
11296 def _ComputeDiskStatus(self, instance, snode, dev):
11297 """Compute block device status.
11300 if dev.dev_type in constants.LDS_DRBD:
11301 # we change the snode then (otherwise we use the one passed in)
11302 if dev.logical_id[0] == instance.primary_node:
11303 snode = dev.logical_id[1]
11305 snode = dev.logical_id[0]
11307 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11308 instance.name, dev)
11309 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11312 dev_children = map(compat.partial(self._ComputeDiskStatus,
11319 "iv_name": dev.iv_name,
11320 "dev_type": dev.dev_type,
11321 "logical_id": dev.logical_id,
11322 "physical_id": dev.physical_id,
11323 "pstatus": dev_pstatus,
11324 "sstatus": dev_sstatus,
11325 "children": dev_children,
11330 def Exec(self, feedback_fn):
11331 """Gather and return data"""
11334 cluster = self.cfg.GetClusterInfo()
11336 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11337 for i in self.wanted_instances)
11338 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11339 if self.op.static or pnode.offline:
11340 remote_state = None
11342 self.LogWarning("Primary node %s is marked offline, returning static"
11343 " information only for instance %s" %
11344 (pnode.name, instance.name))
11346 remote_info = self.rpc.call_instance_info(instance.primary_node,
11348 instance.hypervisor)
11349 remote_info.Raise("Error checking node %s" % instance.primary_node)
11350 remote_info = remote_info.payload
11351 if remote_info and "state" in remote_info:
11352 remote_state = "up"
11354 if instance.admin_state == constants.ADMINST_UP:
11355 remote_state = "down"
11357 remote_state = instance.admin_state
11359 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11362 result[instance.name] = {
11363 "name": instance.name,
11364 "config_state": instance.admin_state,
11365 "run_state": remote_state,
11366 "pnode": instance.primary_node,
11367 "snodes": instance.secondary_nodes,
11369 # this happens to be the same format used for hooks
11370 "nics": _NICListToTuple(self, instance.nics),
11371 "disk_template": instance.disk_template,
11373 "hypervisor": instance.hypervisor,
11374 "network_port": instance.network_port,
11375 "hv_instance": instance.hvparams,
11376 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11377 "be_instance": instance.beparams,
11378 "be_actual": cluster.FillBE(instance),
11379 "os_instance": instance.osparams,
11380 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11381 "serial_no": instance.serial_no,
11382 "mtime": instance.mtime,
11383 "ctime": instance.ctime,
11384 "uuid": instance.uuid,
11390 class LUInstanceSetParams(LogicalUnit):
11391 """Modifies an instances's parameters.
11394 HPATH = "instance-modify"
11395 HTYPE = constants.HTYPE_INSTANCE
11398 def CheckArguments(self):
11399 if not (self.op.nics or self.op.disks or self.op.disk_template or
11400 self.op.hvparams or self.op.beparams or self.op.os_name or
11401 self.op.online_inst or self.op.offline_inst):
11402 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11404 if self.op.hvparams:
11405 _CheckGlobalHvParams(self.op.hvparams)
11409 for disk_op, disk_dict in self.op.disks:
11410 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11411 if disk_op == constants.DDM_REMOVE:
11412 disk_addremove += 1
11414 elif disk_op == constants.DDM_ADD:
11415 disk_addremove += 1
11417 if not isinstance(disk_op, int):
11418 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11419 if not isinstance(disk_dict, dict):
11420 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11421 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11423 if disk_op == constants.DDM_ADD:
11424 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11425 if mode not in constants.DISK_ACCESS_SET:
11426 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11427 errors.ECODE_INVAL)
11428 size = disk_dict.get(constants.IDISK_SIZE, None)
11430 raise errors.OpPrereqError("Required disk parameter size missing",
11431 errors.ECODE_INVAL)
11434 except (TypeError, ValueError), err:
11435 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11436 str(err), errors.ECODE_INVAL)
11437 disk_dict[constants.IDISK_SIZE] = size
11439 # modification of disk
11440 if constants.IDISK_SIZE in disk_dict:
11441 raise errors.OpPrereqError("Disk size change not possible, use"
11442 " grow-disk", errors.ECODE_INVAL)
11444 if disk_addremove > 1:
11445 raise errors.OpPrereqError("Only one disk add or remove operation"
11446 " supported at a time", errors.ECODE_INVAL)
11448 if self.op.disks and self.op.disk_template is not None:
11449 raise errors.OpPrereqError("Disk template conversion and other disk"
11450 " changes not supported at the same time",
11451 errors.ECODE_INVAL)
11453 if (self.op.disk_template and
11454 self.op.disk_template in constants.DTS_INT_MIRROR and
11455 self.op.remote_node is None):
11456 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11457 " one requires specifying a secondary node",
11458 errors.ECODE_INVAL)
11462 for nic_op, nic_dict in self.op.nics:
11463 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11464 if nic_op == constants.DDM_REMOVE:
11467 elif nic_op == constants.DDM_ADD:
11470 if not isinstance(nic_op, int):
11471 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11472 if not isinstance(nic_dict, dict):
11473 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11474 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11476 # nic_dict should be a dict
11477 nic_ip = nic_dict.get(constants.INIC_IP, None)
11478 if nic_ip is not None:
11479 if nic_ip.lower() == constants.VALUE_NONE:
11480 nic_dict[constants.INIC_IP] = None
11482 if not netutils.IPAddress.IsValid(nic_ip):
11483 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11484 errors.ECODE_INVAL)
11486 nic_bridge = nic_dict.get("bridge", None)
11487 nic_link = nic_dict.get(constants.INIC_LINK, None)
11488 if nic_bridge and nic_link:
11489 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11490 " at the same time", errors.ECODE_INVAL)
11491 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11492 nic_dict["bridge"] = None
11493 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11494 nic_dict[constants.INIC_LINK] = None
11496 if nic_op == constants.DDM_ADD:
11497 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11498 if nic_mac is None:
11499 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11501 if constants.INIC_MAC in nic_dict:
11502 nic_mac = nic_dict[constants.INIC_MAC]
11503 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11504 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11506 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11507 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11508 " modifying an existing nic",
11509 errors.ECODE_INVAL)
11511 if nic_addremove > 1:
11512 raise errors.OpPrereqError("Only one NIC add or remove operation"
11513 " supported at a time", errors.ECODE_INVAL)
11515 def ExpandNames(self):
11516 self._ExpandAndLockInstance()
11517 # Can't even acquire node locks in shared mode as upcoming changes in
11518 # Ganeti 2.6 will start to modify the node object on disk conversion
11519 self.needed_locks[locking.LEVEL_NODE] = []
11520 self.needed_locks[locking.LEVEL_NODE_RES] = []
11521 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11523 def DeclareLocks(self, level):
11524 if level == locking.LEVEL_NODE:
11525 self._LockInstancesNodes()
11526 if self.op.disk_template and self.op.remote_node:
11527 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11528 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11529 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11531 self.needed_locks[locking.LEVEL_NODE_RES] = \
11532 self.needed_locks[locking.LEVEL_NODE][:]
11534 def BuildHooksEnv(self):
11535 """Build hooks env.
11537 This runs on the master, primary and secondaries.
11541 if constants.BE_MINMEM in self.be_new:
11542 args["minmem"] = self.be_new[constants.BE_MINMEM]
11543 if constants.BE_MAXMEM in self.be_new:
11544 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11545 if constants.BE_VCPUS in self.be_new:
11546 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11547 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11548 # information at all.
11551 nic_override = dict(self.op.nics)
11552 for idx, nic in enumerate(self.instance.nics):
11553 if idx in nic_override:
11554 this_nic_override = nic_override[idx]
11556 this_nic_override = {}
11557 if constants.INIC_IP in this_nic_override:
11558 ip = this_nic_override[constants.INIC_IP]
11561 if constants.INIC_MAC in this_nic_override:
11562 mac = this_nic_override[constants.INIC_MAC]
11565 if idx in self.nic_pnew:
11566 nicparams = self.nic_pnew[idx]
11568 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11569 mode = nicparams[constants.NIC_MODE]
11570 link = nicparams[constants.NIC_LINK]
11571 args["nics"].append((ip, mac, mode, link))
11572 if constants.DDM_ADD in nic_override:
11573 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11574 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11575 nicparams = self.nic_pnew[constants.DDM_ADD]
11576 mode = nicparams[constants.NIC_MODE]
11577 link = nicparams[constants.NIC_LINK]
11578 args["nics"].append((ip, mac, mode, link))
11579 elif constants.DDM_REMOVE in nic_override:
11580 del args["nics"][-1]
11582 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11583 if self.op.disk_template:
11584 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11588 def BuildHooksNodes(self):
11589 """Build hooks nodes.
11592 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11595 def CheckPrereq(self):
11596 """Check prerequisites.
11598 This only checks the instance list against the existing names.
11601 # checking the new params on the primary/secondary nodes
11603 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11604 cluster = self.cluster = self.cfg.GetClusterInfo()
11605 assert self.instance is not None, \
11606 "Cannot retrieve locked instance %s" % self.op.instance_name
11607 pnode = instance.primary_node
11608 nodelist = list(instance.all_nodes)
11609 pnode_info = self.cfg.GetNodeInfo(pnode)
11610 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11613 if self.op.os_name and not self.op.force:
11614 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11615 self.op.force_variant)
11616 instance_os = self.op.os_name
11618 instance_os = instance.os
11620 if self.op.disk_template:
11621 if instance.disk_template == self.op.disk_template:
11622 raise errors.OpPrereqError("Instance already has disk template %s" %
11623 instance.disk_template, errors.ECODE_INVAL)
11625 if (instance.disk_template,
11626 self.op.disk_template) not in self._DISK_CONVERSIONS:
11627 raise errors.OpPrereqError("Unsupported disk template conversion from"
11628 " %s to %s" % (instance.disk_template,
11629 self.op.disk_template),
11630 errors.ECODE_INVAL)
11631 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11632 msg="cannot change disk template")
11633 if self.op.disk_template in constants.DTS_INT_MIRROR:
11634 if self.op.remote_node == pnode:
11635 raise errors.OpPrereqError("Given new secondary node %s is the same"
11636 " as the primary node of the instance" %
11637 self.op.remote_node, errors.ECODE_STATE)
11638 _CheckNodeOnline(self, self.op.remote_node)
11639 _CheckNodeNotDrained(self, self.op.remote_node)
11640 # FIXME: here we assume that the old instance type is DT_PLAIN
11641 assert instance.disk_template == constants.DT_PLAIN
11642 disks = [{constants.IDISK_SIZE: d.size,
11643 constants.IDISK_VG: d.logical_id[0]}
11644 for d in instance.disks]
11645 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11646 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11648 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11649 if pnode_info.group != snode_info.group:
11650 self.LogWarning("The primary and secondary nodes are in two"
11651 " different node groups; the disk parameters"
11652 " from the first disk's node group will be"
11655 # hvparams processing
11656 if self.op.hvparams:
11657 hv_type = instance.hypervisor
11658 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11659 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11660 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11663 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11664 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11665 self.hv_proposed = self.hv_new = hv_new # the new actual values
11666 self.hv_inst = i_hvdict # the new dict (without defaults)
11668 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11670 self.hv_new = self.hv_inst = {}
11672 # beparams processing
11673 if self.op.beparams:
11674 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11676 objects.UpgradeBeParams(i_bedict)
11677 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11678 be_new = cluster.SimpleFillBE(i_bedict)
11679 self.be_proposed = self.be_new = be_new # the new actual values
11680 self.be_inst = i_bedict # the new dict (without defaults)
11682 self.be_new = self.be_inst = {}
11683 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11684 be_old = cluster.FillBE(instance)
11686 # CPU param validation -- checking every time a paramtere is
11687 # changed to cover all cases where either CPU mask or vcpus have
11689 if (constants.BE_VCPUS in self.be_proposed and
11690 constants.HV_CPU_MASK in self.hv_proposed):
11692 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11693 # Verify mask is consistent with number of vCPUs. Can skip this
11694 # test if only 1 entry in the CPU mask, which means same mask
11695 # is applied to all vCPUs.
11696 if (len(cpu_list) > 1 and
11697 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11698 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11700 (self.be_proposed[constants.BE_VCPUS],
11701 self.hv_proposed[constants.HV_CPU_MASK]),
11702 errors.ECODE_INVAL)
11704 # Only perform this test if a new CPU mask is given
11705 if constants.HV_CPU_MASK in self.hv_new:
11706 # Calculate the largest CPU number requested
11707 max_requested_cpu = max(map(max, cpu_list))
11708 # Check that all of the instance's nodes have enough physical CPUs to
11709 # satisfy the requested CPU mask
11710 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11711 max_requested_cpu + 1, instance.hypervisor)
11713 # osparams processing
11714 if self.op.osparams:
11715 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11716 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11717 self.os_inst = i_osdict # the new dict (without defaults)
11723 #TODO(dynmem): do the appropriate check involving MINMEM
11724 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11725 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11726 mem_check_list = [pnode]
11727 if be_new[constants.BE_AUTO_BALANCE]:
11728 # either we changed auto_balance to yes or it was from before
11729 mem_check_list.extend(instance.secondary_nodes)
11730 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11731 instance.hypervisor)
11732 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11733 [instance.hypervisor])
11734 pninfo = nodeinfo[pnode]
11735 msg = pninfo.fail_msg
11737 # Assume the primary node is unreachable and go ahead
11738 self.warn.append("Can't get info from primary node %s: %s" %
11741 (_, _, (pnhvinfo, )) = pninfo.payload
11742 if not isinstance(pnhvinfo.get("memory_free", None), int):
11743 self.warn.append("Node data from primary node %s doesn't contain"
11744 " free memory information" % pnode)
11745 elif instance_info.fail_msg:
11746 self.warn.append("Can't get instance runtime information: %s" %
11747 instance_info.fail_msg)
11749 if instance_info.payload:
11750 current_mem = int(instance_info.payload["memory"])
11752 # Assume instance not running
11753 # (there is a slight race condition here, but it's not very
11754 # probable, and we have no other way to check)
11755 # TODO: Describe race condition
11757 #TODO(dynmem): do the appropriate check involving MINMEM
11758 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11759 pnhvinfo["memory_free"])
11761 raise errors.OpPrereqError("This change will prevent the instance"
11762 " from starting, due to %d MB of memory"
11763 " missing on its primary node" %
11765 errors.ECODE_NORES)
11767 if be_new[constants.BE_AUTO_BALANCE]:
11768 for node, nres in nodeinfo.items():
11769 if node not in instance.secondary_nodes:
11771 nres.Raise("Can't get info from secondary node %s" % node,
11772 prereq=True, ecode=errors.ECODE_STATE)
11773 (_, _, (nhvinfo, )) = nres.payload
11774 if not isinstance(nhvinfo.get("memory_free", None), int):
11775 raise errors.OpPrereqError("Secondary node %s didn't return free"
11776 " memory information" % node,
11777 errors.ECODE_STATE)
11778 #TODO(dynmem): do the appropriate check involving MINMEM
11779 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11780 raise errors.OpPrereqError("This change will prevent the instance"
11781 " from failover to its secondary node"
11782 " %s, due to not enough memory" % node,
11783 errors.ECODE_STATE)
11787 self.nic_pinst = {}
11788 for nic_op, nic_dict in self.op.nics:
11789 if nic_op == constants.DDM_REMOVE:
11790 if not instance.nics:
11791 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11792 errors.ECODE_INVAL)
11794 if nic_op != constants.DDM_ADD:
11796 if not instance.nics:
11797 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11798 " no NICs" % nic_op,
11799 errors.ECODE_INVAL)
11800 if nic_op < 0 or nic_op >= len(instance.nics):
11801 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11803 (nic_op, len(instance.nics) - 1),
11804 errors.ECODE_INVAL)
11805 old_nic_params = instance.nics[nic_op].nicparams
11806 old_nic_ip = instance.nics[nic_op].ip
11808 old_nic_params = {}
11811 update_params_dict = dict([(key, nic_dict[key])
11812 for key in constants.NICS_PARAMETERS
11813 if key in nic_dict])
11815 if "bridge" in nic_dict:
11816 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11818 new_nic_params = _GetUpdatedParams(old_nic_params,
11819 update_params_dict)
11820 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11821 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11822 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11823 self.nic_pinst[nic_op] = new_nic_params
11824 self.nic_pnew[nic_op] = new_filled_nic_params
11825 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11827 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11828 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11829 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11831 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11833 self.warn.append(msg)
11835 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11836 if new_nic_mode == constants.NIC_MODE_ROUTED:
11837 if constants.INIC_IP in nic_dict:
11838 nic_ip = nic_dict[constants.INIC_IP]
11840 nic_ip = old_nic_ip
11842 raise errors.OpPrereqError("Cannot set the nic ip to None"
11843 " on a routed nic", errors.ECODE_INVAL)
11844 if constants.INIC_MAC in nic_dict:
11845 nic_mac = nic_dict[constants.INIC_MAC]
11846 if nic_mac is None:
11847 raise errors.OpPrereqError("Cannot set the nic mac to None",
11848 errors.ECODE_INVAL)
11849 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11850 # otherwise generate the mac
11851 nic_dict[constants.INIC_MAC] = \
11852 self.cfg.GenerateMAC(self.proc.GetECId())
11854 # or validate/reserve the current one
11856 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11857 except errors.ReservationError:
11858 raise errors.OpPrereqError("MAC address %s already in use"
11859 " in cluster" % nic_mac,
11860 errors.ECODE_NOTUNIQUE)
11863 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11864 raise errors.OpPrereqError("Disk operations not supported for"
11865 " diskless instances",
11866 errors.ECODE_INVAL)
11867 for disk_op, _ in self.op.disks:
11868 if disk_op == constants.DDM_REMOVE:
11869 if len(instance.disks) == 1:
11870 raise errors.OpPrereqError("Cannot remove the last disk of"
11871 " an instance", errors.ECODE_INVAL)
11872 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11873 msg="cannot remove disks")
11875 if (disk_op == constants.DDM_ADD and
11876 len(instance.disks) >= constants.MAX_DISKS):
11877 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11878 " add more" % constants.MAX_DISKS,
11879 errors.ECODE_STATE)
11880 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11882 if disk_op < 0 or disk_op >= len(instance.disks):
11883 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11885 (disk_op, len(instance.disks)),
11886 errors.ECODE_INVAL)
11888 # disabling the instance
11889 if self.op.offline_inst:
11890 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11891 msg="cannot change instance state to offline")
11893 # enabling the instance
11894 if self.op.online_inst:
11895 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11896 msg="cannot make instance go online")
11898 def _ConvertPlainToDrbd(self, feedback_fn):
11899 """Converts an instance from plain to drbd.
11902 feedback_fn("Converting template to drbd")
11903 instance = self.instance
11904 pnode = instance.primary_node
11905 snode = self.op.remote_node
11907 assert instance.disk_template == constants.DT_PLAIN
11909 # create a fake disk info for _GenerateDiskTemplate
11910 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11911 constants.IDISK_VG: d.logical_id[0]}
11912 for d in instance.disks]
11913 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11914 instance.name, pnode, [snode],
11915 disk_info, None, None, 0, feedback_fn,
11917 info = _GetInstanceInfoText(instance)
11918 feedback_fn("Creating aditional volumes...")
11919 # first, create the missing data and meta devices
11920 for disk in new_disks:
11921 # unfortunately this is... not too nice
11922 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11924 for child in disk.children:
11925 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11926 # at this stage, all new LVs have been created, we can rename the
11928 feedback_fn("Renaming original volumes...")
11929 rename_list = [(o, n.children[0].logical_id)
11930 for (o, n) in zip(instance.disks, new_disks)]
11931 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11932 result.Raise("Failed to rename original LVs")
11934 feedback_fn("Initializing DRBD devices...")
11935 # all child devices are in place, we can now create the DRBD devices
11936 for disk in new_disks:
11937 for node in [pnode, snode]:
11938 f_create = node == pnode
11939 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11941 # at this point, the instance has been modified
11942 instance.disk_template = constants.DT_DRBD8
11943 instance.disks = new_disks
11944 self.cfg.Update(instance, feedback_fn)
11946 # Release node locks while waiting for sync
11947 _ReleaseLocks(self, locking.LEVEL_NODE)
11949 # disks are created, waiting for sync
11950 disk_abort = not _WaitForSync(self, instance,
11951 oneshot=not self.op.wait_for_sync)
11953 raise errors.OpExecError("There are some degraded disks for"
11954 " this instance, please cleanup manually")
11956 # Node resource locks will be released by caller
11958 def _ConvertDrbdToPlain(self, feedback_fn):
11959 """Converts an instance from drbd to plain.
11962 instance = self.instance
11964 assert len(instance.secondary_nodes) == 1
11965 assert instance.disk_template == constants.DT_DRBD8
11967 pnode = instance.primary_node
11968 snode = instance.secondary_nodes[0]
11969 feedback_fn("Converting template to plain")
11971 old_disks = instance.disks
11972 new_disks = [d.children[0] for d in old_disks]
11974 # copy over size and mode
11975 for parent, child in zip(old_disks, new_disks):
11976 child.size = parent.size
11977 child.mode = parent.mode
11979 # update instance structure
11980 instance.disks = new_disks
11981 instance.disk_template = constants.DT_PLAIN
11982 self.cfg.Update(instance, feedback_fn)
11984 # Release locks in case removing disks takes a while
11985 _ReleaseLocks(self, locking.LEVEL_NODE)
11987 feedback_fn("Removing volumes on the secondary node...")
11988 for disk in old_disks:
11989 self.cfg.SetDiskID(disk, snode)
11990 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11992 self.LogWarning("Could not remove block device %s on node %s,"
11993 " continuing anyway: %s", disk.iv_name, snode, msg)
11995 feedback_fn("Removing unneeded volumes on the primary node...")
11996 for idx, disk in enumerate(old_disks):
11997 meta = disk.children[1]
11998 self.cfg.SetDiskID(meta, pnode)
11999 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12001 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12002 " continuing anyway: %s", idx, pnode, msg)
12004 # this is a DRBD disk, return its port to the pool
12005 for disk in old_disks:
12006 tcp_port = disk.logical_id[2]
12007 self.cfg.AddTcpUdpPort(tcp_port)
12009 # Node resource locks will be released by caller
12011 def Exec(self, feedback_fn):
12012 """Modifies an instance.
12014 All parameters take effect only at the next restart of the instance.
12017 # Process here the warnings from CheckPrereq, as we don't have a
12018 # feedback_fn there.
12019 for warn in self.warn:
12020 feedback_fn("WARNING: %s" % warn)
12022 assert ((self.op.disk_template is None) ^
12023 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12024 "Not owning any node resource locks"
12027 instance = self.instance
12029 for disk_op, disk_dict in self.op.disks:
12030 if disk_op == constants.DDM_REMOVE:
12031 # remove the last disk
12032 device = instance.disks.pop()
12033 device_idx = len(instance.disks)
12034 for node, disk in device.ComputeNodeTree(instance.primary_node):
12035 self.cfg.SetDiskID(disk, node)
12036 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12038 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12039 " continuing anyway", device_idx, node, msg)
12040 result.append(("disk/%d" % device_idx, "remove"))
12042 # if this is a DRBD disk, return its port to the pool
12043 if device.dev_type in constants.LDS_DRBD:
12044 tcp_port = device.logical_id[2]
12045 self.cfg.AddTcpUdpPort(tcp_port)
12046 elif disk_op == constants.DDM_ADD:
12048 if instance.disk_template in (constants.DT_FILE,
12049 constants.DT_SHARED_FILE):
12050 file_driver, file_path = instance.disks[0].logical_id
12051 file_path = os.path.dirname(file_path)
12053 file_driver = file_path = None
12054 disk_idx_base = len(instance.disks)
12055 new_disk = _GenerateDiskTemplate(self,
12056 instance.disk_template,
12057 instance.name, instance.primary_node,
12058 instance.secondary_nodes,
12064 self.diskparams)[0]
12065 instance.disks.append(new_disk)
12066 info = _GetInstanceInfoText(instance)
12068 logging.info("Creating volume %s for instance %s",
12069 new_disk.iv_name, instance.name)
12070 # Note: this needs to be kept in sync with _CreateDisks
12072 for node in instance.all_nodes:
12073 f_create = node == instance.primary_node
12075 _CreateBlockDev(self, node, instance, new_disk,
12076 f_create, info, f_create)
12077 except errors.OpExecError, err:
12078 self.LogWarning("Failed to create volume %s (%s) on"
12080 new_disk.iv_name, new_disk, node, err)
12081 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12082 (new_disk.size, new_disk.mode)))
12084 # change a given disk
12085 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12086 result.append(("disk.mode/%d" % disk_op,
12087 disk_dict[constants.IDISK_MODE]))
12089 if self.op.disk_template:
12091 check_nodes = set(instance.all_nodes)
12092 if self.op.remote_node:
12093 check_nodes.add(self.op.remote_node)
12094 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12095 owned = self.owned_locks(level)
12096 assert not (check_nodes - owned), \
12097 ("Not owning the correct locks, owning %r, expected at least %r" %
12098 (owned, check_nodes))
12100 r_shut = _ShutdownInstanceDisks(self, instance)
12102 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12103 " proceed with disk template conversion")
12104 mode = (instance.disk_template, self.op.disk_template)
12106 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12108 self.cfg.ReleaseDRBDMinors(instance.name)
12110 result.append(("disk_template", self.op.disk_template))
12112 assert instance.disk_template == self.op.disk_template, \
12113 ("Expected disk template '%s', found '%s'" %
12114 (self.op.disk_template, instance.disk_template))
12116 # Release node and resource locks if there are any (they might already have
12117 # been released during disk conversion)
12118 _ReleaseLocks(self, locking.LEVEL_NODE)
12119 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12122 for nic_op, nic_dict in self.op.nics:
12123 if nic_op == constants.DDM_REMOVE:
12124 # remove the last nic
12125 del instance.nics[-1]
12126 result.append(("nic.%d" % len(instance.nics), "remove"))
12127 elif nic_op == constants.DDM_ADD:
12128 # mac and bridge should be set, by now
12129 mac = nic_dict[constants.INIC_MAC]
12130 ip = nic_dict.get(constants.INIC_IP, None)
12131 nicparams = self.nic_pinst[constants.DDM_ADD]
12132 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12133 instance.nics.append(new_nic)
12134 result.append(("nic.%d" % (len(instance.nics) - 1),
12135 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12136 (new_nic.mac, new_nic.ip,
12137 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12138 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12141 for key in (constants.INIC_MAC, constants.INIC_IP):
12142 if key in nic_dict:
12143 setattr(instance.nics[nic_op], key, nic_dict[key])
12144 if nic_op in self.nic_pinst:
12145 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12146 for key, val in nic_dict.iteritems():
12147 result.append(("nic.%s/%d" % (key, nic_op), val))
12150 if self.op.hvparams:
12151 instance.hvparams = self.hv_inst
12152 for key, val in self.op.hvparams.iteritems():
12153 result.append(("hv/%s" % key, val))
12156 if self.op.beparams:
12157 instance.beparams = self.be_inst
12158 for key, val in self.op.beparams.iteritems():
12159 result.append(("be/%s" % key, val))
12162 if self.op.os_name:
12163 instance.os = self.op.os_name
12166 if self.op.osparams:
12167 instance.osparams = self.os_inst
12168 for key, val in self.op.osparams.iteritems():
12169 result.append(("os/%s" % key, val))
12171 # online/offline instance
12172 if self.op.online_inst:
12173 self.cfg.MarkInstanceDown(instance.name)
12174 result.append(("admin_state", constants.ADMINST_DOWN))
12175 if self.op.offline_inst:
12176 self.cfg.MarkInstanceOffline(instance.name)
12177 result.append(("admin_state", constants.ADMINST_OFFLINE))
12179 self.cfg.Update(instance, feedback_fn)
12181 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12182 self.owned_locks(locking.LEVEL_NODE)), \
12183 "All node locks should have been released by now"
12187 _DISK_CONVERSIONS = {
12188 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12189 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12193 class LUInstanceChangeGroup(LogicalUnit):
12194 HPATH = "instance-change-group"
12195 HTYPE = constants.HTYPE_INSTANCE
12198 def ExpandNames(self):
12199 self.share_locks = _ShareAll()
12200 self.needed_locks = {
12201 locking.LEVEL_NODEGROUP: [],
12202 locking.LEVEL_NODE: [],
12205 self._ExpandAndLockInstance()
12207 if self.op.target_groups:
12208 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12209 self.op.target_groups)
12211 self.req_target_uuids = None
12213 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12215 def DeclareLocks(self, level):
12216 if level == locking.LEVEL_NODEGROUP:
12217 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12219 if self.req_target_uuids:
12220 lock_groups = set(self.req_target_uuids)
12222 # Lock all groups used by instance optimistically; this requires going
12223 # via the node before it's locked, requiring verification later on
12224 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12225 lock_groups.update(instance_groups)
12227 # No target groups, need to lock all of them
12228 lock_groups = locking.ALL_SET
12230 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12232 elif level == locking.LEVEL_NODE:
12233 if self.req_target_uuids:
12234 # Lock all nodes used by instances
12235 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12236 self._LockInstancesNodes()
12238 # Lock all nodes in all potential target groups
12239 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12240 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12241 member_nodes = [node_name
12242 for group in lock_groups
12243 for node_name in self.cfg.GetNodeGroup(group).members]
12244 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12246 # Lock all nodes as all groups are potential targets
12247 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12249 def CheckPrereq(self):
12250 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12251 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12252 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12254 assert (self.req_target_uuids is None or
12255 owned_groups.issuperset(self.req_target_uuids))
12256 assert owned_instances == set([self.op.instance_name])
12258 # Get instance information
12259 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12261 # Check if node groups for locked instance are still correct
12262 assert owned_nodes.issuperset(self.instance.all_nodes), \
12263 ("Instance %s's nodes changed while we kept the lock" %
12264 self.op.instance_name)
12266 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12269 if self.req_target_uuids:
12270 # User requested specific target groups
12271 self.target_uuids = self.req_target_uuids
12273 # All groups except those used by the instance are potential targets
12274 self.target_uuids = owned_groups - inst_groups
12276 conflicting_groups = self.target_uuids & inst_groups
12277 if conflicting_groups:
12278 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12279 " used by the instance '%s'" %
12280 (utils.CommaJoin(conflicting_groups),
12281 self.op.instance_name),
12282 errors.ECODE_INVAL)
12284 if not self.target_uuids:
12285 raise errors.OpPrereqError("There are no possible target groups",
12286 errors.ECODE_INVAL)
12288 def BuildHooksEnv(self):
12289 """Build hooks env.
12292 assert self.target_uuids
12295 "TARGET_GROUPS": " ".join(self.target_uuids),
12298 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12302 def BuildHooksNodes(self):
12303 """Build hooks nodes.
12306 mn = self.cfg.GetMasterNode()
12307 return ([mn], [mn])
12309 def Exec(self, feedback_fn):
12310 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12312 assert instances == [self.op.instance_name], "Instance not locked"
12314 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12315 instances=instances, target_groups=list(self.target_uuids))
12317 ial.Run(self.op.iallocator)
12319 if not ial.success:
12320 raise errors.OpPrereqError("Can't compute solution for changing group of"
12321 " instance '%s' using iallocator '%s': %s" %
12322 (self.op.instance_name, self.op.iallocator,
12324 errors.ECODE_NORES)
12326 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12328 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12329 " instance '%s'", len(jobs), self.op.instance_name)
12331 return ResultWithJobs(jobs)
12334 class LUBackupQuery(NoHooksLU):
12335 """Query the exports list
12340 def ExpandNames(self):
12341 self.needed_locks = {}
12342 self.share_locks[locking.LEVEL_NODE] = 1
12343 if not self.op.nodes:
12344 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12346 self.needed_locks[locking.LEVEL_NODE] = \
12347 _GetWantedNodes(self, self.op.nodes)
12349 def Exec(self, feedback_fn):
12350 """Compute the list of all the exported system images.
12353 @return: a dictionary with the structure node->(export-list)
12354 where export-list is a list of the instances exported on
12358 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12359 rpcresult = self.rpc.call_export_list(self.nodes)
12361 for node in rpcresult:
12362 if rpcresult[node].fail_msg:
12363 result[node] = False
12365 result[node] = rpcresult[node].payload
12370 class LUBackupPrepare(NoHooksLU):
12371 """Prepares an instance for an export and returns useful information.
12376 def ExpandNames(self):
12377 self._ExpandAndLockInstance()
12379 def CheckPrereq(self):
12380 """Check prerequisites.
12383 instance_name = self.op.instance_name
12385 self.instance = self.cfg.GetInstanceInfo(instance_name)
12386 assert self.instance is not None, \
12387 "Cannot retrieve locked instance %s" % self.op.instance_name
12388 _CheckNodeOnline(self, self.instance.primary_node)
12390 self._cds = _GetClusterDomainSecret()
12392 def Exec(self, feedback_fn):
12393 """Prepares an instance for an export.
12396 instance = self.instance
12398 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12399 salt = utils.GenerateSecret(8)
12401 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12402 result = self.rpc.call_x509_cert_create(instance.primary_node,
12403 constants.RIE_CERT_VALIDITY)
12404 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12406 (name, cert_pem) = result.payload
12408 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12412 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12413 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12415 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12421 class LUBackupExport(LogicalUnit):
12422 """Export an instance to an image in the cluster.
12425 HPATH = "instance-export"
12426 HTYPE = constants.HTYPE_INSTANCE
12429 def CheckArguments(self):
12430 """Check the arguments.
12433 self.x509_key_name = self.op.x509_key_name
12434 self.dest_x509_ca_pem = self.op.destination_x509_ca
12436 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12437 if not self.x509_key_name:
12438 raise errors.OpPrereqError("Missing X509 key name for encryption",
12439 errors.ECODE_INVAL)
12441 if not self.dest_x509_ca_pem:
12442 raise errors.OpPrereqError("Missing destination X509 CA",
12443 errors.ECODE_INVAL)
12445 def ExpandNames(self):
12446 self._ExpandAndLockInstance()
12448 # Lock all nodes for local exports
12449 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12450 # FIXME: lock only instance primary and destination node
12452 # Sad but true, for now we have do lock all nodes, as we don't know where
12453 # the previous export might be, and in this LU we search for it and
12454 # remove it from its current node. In the future we could fix this by:
12455 # - making a tasklet to search (share-lock all), then create the
12456 # new one, then one to remove, after
12457 # - removing the removal operation altogether
12458 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12460 def DeclareLocks(self, level):
12461 """Last minute lock declaration."""
12462 # All nodes are locked anyway, so nothing to do here.
12464 def BuildHooksEnv(self):
12465 """Build hooks env.
12467 This will run on the master, primary node and target node.
12471 "EXPORT_MODE": self.op.mode,
12472 "EXPORT_NODE": self.op.target_node,
12473 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12474 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12475 # TODO: Generic function for boolean env variables
12476 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12479 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12483 def BuildHooksNodes(self):
12484 """Build hooks nodes.
12487 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12489 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12490 nl.append(self.op.target_node)
12494 def CheckPrereq(self):
12495 """Check prerequisites.
12497 This checks that the instance and node names are valid.
12500 instance_name = self.op.instance_name
12502 self.instance = self.cfg.GetInstanceInfo(instance_name)
12503 assert self.instance is not None, \
12504 "Cannot retrieve locked instance %s" % self.op.instance_name
12505 _CheckNodeOnline(self, self.instance.primary_node)
12507 if (self.op.remove_instance and
12508 self.instance.admin_state == constants.ADMINST_UP and
12509 not self.op.shutdown):
12510 raise errors.OpPrereqError("Can not remove instance without shutting it"
12513 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12514 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12515 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12516 assert self.dst_node is not None
12518 _CheckNodeOnline(self, self.dst_node.name)
12519 _CheckNodeNotDrained(self, self.dst_node.name)
12522 self.dest_disk_info = None
12523 self.dest_x509_ca = None
12525 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12526 self.dst_node = None
12528 if len(self.op.target_node) != len(self.instance.disks):
12529 raise errors.OpPrereqError(("Received destination information for %s"
12530 " disks, but instance %s has %s disks") %
12531 (len(self.op.target_node), instance_name,
12532 len(self.instance.disks)),
12533 errors.ECODE_INVAL)
12535 cds = _GetClusterDomainSecret()
12537 # Check X509 key name
12539 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12540 except (TypeError, ValueError), err:
12541 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12543 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12544 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12545 errors.ECODE_INVAL)
12547 # Load and verify CA
12549 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12550 except OpenSSL.crypto.Error, err:
12551 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12552 (err, ), errors.ECODE_INVAL)
12554 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12555 if errcode is not None:
12556 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12557 (msg, ), errors.ECODE_INVAL)
12559 self.dest_x509_ca = cert
12561 # Verify target information
12563 for idx, disk_data in enumerate(self.op.target_node):
12565 (host, port, magic) = \
12566 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12567 except errors.GenericError, err:
12568 raise errors.OpPrereqError("Target info for disk %s: %s" %
12569 (idx, err), errors.ECODE_INVAL)
12571 disk_info.append((host, port, magic))
12573 assert len(disk_info) == len(self.op.target_node)
12574 self.dest_disk_info = disk_info
12577 raise errors.ProgrammerError("Unhandled export mode %r" %
12580 # instance disk type verification
12581 # TODO: Implement export support for file-based disks
12582 for disk in self.instance.disks:
12583 if disk.dev_type == constants.LD_FILE:
12584 raise errors.OpPrereqError("Export not supported for instances with"
12585 " file-based disks", errors.ECODE_INVAL)
12587 def _CleanupExports(self, feedback_fn):
12588 """Removes exports of current instance from all other nodes.
12590 If an instance in a cluster with nodes A..D was exported to node C, its
12591 exports will be removed from the nodes A, B and D.
12594 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12596 nodelist = self.cfg.GetNodeList()
12597 nodelist.remove(self.dst_node.name)
12599 # on one-node clusters nodelist will be empty after the removal
12600 # if we proceed the backup would be removed because OpBackupQuery
12601 # substitutes an empty list with the full cluster node list.
12602 iname = self.instance.name
12604 feedback_fn("Removing old exports for instance %s" % iname)
12605 exportlist = self.rpc.call_export_list(nodelist)
12606 for node in exportlist:
12607 if exportlist[node].fail_msg:
12609 if iname in exportlist[node].payload:
12610 msg = self.rpc.call_export_remove(node, iname).fail_msg
12612 self.LogWarning("Could not remove older export for instance %s"
12613 " on node %s: %s", iname, node, msg)
12615 def Exec(self, feedback_fn):
12616 """Export an instance to an image in the cluster.
12619 assert self.op.mode in constants.EXPORT_MODES
12621 instance = self.instance
12622 src_node = instance.primary_node
12624 if self.op.shutdown:
12625 # shutdown the instance, but not the disks
12626 feedback_fn("Shutting down instance %s" % instance.name)
12627 result = self.rpc.call_instance_shutdown(src_node, instance,
12628 self.op.shutdown_timeout)
12629 # TODO: Maybe ignore failures if ignore_remove_failures is set
12630 result.Raise("Could not shutdown instance %s on"
12631 " node %s" % (instance.name, src_node))
12633 # set the disks ID correctly since call_instance_start needs the
12634 # correct drbd minor to create the symlinks
12635 for disk in instance.disks:
12636 self.cfg.SetDiskID(disk, src_node)
12638 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12641 # Activate the instance disks if we'exporting a stopped instance
12642 feedback_fn("Activating disks for %s" % instance.name)
12643 _StartInstanceDisks(self, instance, None)
12646 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12649 helper.CreateSnapshots()
12651 if (self.op.shutdown and
12652 instance.admin_state == constants.ADMINST_UP and
12653 not self.op.remove_instance):
12654 assert not activate_disks
12655 feedback_fn("Starting instance %s" % instance.name)
12656 result = self.rpc.call_instance_start(src_node,
12657 (instance, None, None), False)
12658 msg = result.fail_msg
12660 feedback_fn("Failed to start instance: %s" % msg)
12661 _ShutdownInstanceDisks(self, instance)
12662 raise errors.OpExecError("Could not start instance: %s" % msg)
12664 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12665 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12666 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12667 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12668 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12670 (key_name, _, _) = self.x509_key_name
12673 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12676 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12677 key_name, dest_ca_pem,
12682 # Check for backwards compatibility
12683 assert len(dresults) == len(instance.disks)
12684 assert compat.all(isinstance(i, bool) for i in dresults), \
12685 "Not all results are boolean: %r" % dresults
12689 feedback_fn("Deactivating disks for %s" % instance.name)
12690 _ShutdownInstanceDisks(self, instance)
12692 if not (compat.all(dresults) and fin_resu):
12695 failures.append("export finalization")
12696 if not compat.all(dresults):
12697 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12699 failures.append("disk export: disk(s) %s" % fdsk)
12701 raise errors.OpExecError("Export failed, errors in %s" %
12702 utils.CommaJoin(failures))
12704 # At this point, the export was successful, we can cleanup/finish
12706 # Remove instance if requested
12707 if self.op.remove_instance:
12708 feedback_fn("Removing instance %s" % instance.name)
12709 _RemoveInstance(self, feedback_fn, instance,
12710 self.op.ignore_remove_failures)
12712 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12713 self._CleanupExports(feedback_fn)
12715 return fin_resu, dresults
12718 class LUBackupRemove(NoHooksLU):
12719 """Remove exports related to the named instance.
12724 def ExpandNames(self):
12725 self.needed_locks = {}
12726 # We need all nodes to be locked in order for RemoveExport to work, but we
12727 # don't need to lock the instance itself, as nothing will happen to it (and
12728 # we can remove exports also for a removed instance)
12729 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12731 def Exec(self, feedback_fn):
12732 """Remove any export.
12735 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12736 # If the instance was not found we'll try with the name that was passed in.
12737 # This will only work if it was an FQDN, though.
12739 if not instance_name:
12741 instance_name = self.op.instance_name
12743 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12744 exportlist = self.rpc.call_export_list(locked_nodes)
12746 for node in exportlist:
12747 msg = exportlist[node].fail_msg
12749 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12751 if instance_name in exportlist[node].payload:
12753 result = self.rpc.call_export_remove(node, instance_name)
12754 msg = result.fail_msg
12756 logging.error("Could not remove export for instance %s"
12757 " on node %s: %s", instance_name, node, msg)
12759 if fqdn_warn and not found:
12760 feedback_fn("Export not found. If trying to remove an export belonging"
12761 " to a deleted instance please use its Fully Qualified"
12765 class LUGroupAdd(LogicalUnit):
12766 """Logical unit for creating node groups.
12769 HPATH = "group-add"
12770 HTYPE = constants.HTYPE_GROUP
12773 def ExpandNames(self):
12774 # We need the new group's UUID here so that we can create and acquire the
12775 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12776 # that it should not check whether the UUID exists in the configuration.
12777 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12778 self.needed_locks = {}
12779 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12781 def CheckPrereq(self):
12782 """Check prerequisites.
12784 This checks that the given group name is not an existing node group
12789 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12790 except errors.OpPrereqError:
12793 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12794 " node group (UUID: %s)" %
12795 (self.op.group_name, existing_uuid),
12796 errors.ECODE_EXISTS)
12798 if self.op.ndparams:
12799 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12801 if self.op.hv_state:
12802 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12804 self.new_hv_state = None
12806 if self.op.disk_state:
12807 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12809 self.new_disk_state = None
12811 if self.op.diskparams:
12812 for templ in constants.DISK_TEMPLATES:
12813 if templ not in self.op.diskparams:
12814 self.op.diskparams[templ] = {}
12815 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12817 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12819 if self.op.ipolicy:
12820 cluster = self.cfg.GetClusterInfo()
12821 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12822 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12824 def BuildHooksEnv(self):
12825 """Build hooks env.
12829 "GROUP_NAME": self.op.group_name,
12832 def BuildHooksNodes(self):
12833 """Build hooks nodes.
12836 mn = self.cfg.GetMasterNode()
12837 return ([mn], [mn])
12839 def Exec(self, feedback_fn):
12840 """Add the node group to the cluster.
12843 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12844 uuid=self.group_uuid,
12845 alloc_policy=self.op.alloc_policy,
12846 ndparams=self.op.ndparams,
12847 diskparams=self.op.diskparams,
12848 ipolicy=self.op.ipolicy,
12849 hv_state_static=self.new_hv_state,
12850 disk_state_static=self.new_disk_state)
12852 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12853 del self.remove_locks[locking.LEVEL_NODEGROUP]
12856 class LUGroupAssignNodes(NoHooksLU):
12857 """Logical unit for assigning nodes to groups.
12862 def ExpandNames(self):
12863 # These raise errors.OpPrereqError on their own:
12864 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12865 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12867 # We want to lock all the affected nodes and groups. We have readily
12868 # available the list of nodes, and the *destination* group. To gather the
12869 # list of "source" groups, we need to fetch node information later on.
12870 self.needed_locks = {
12871 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12872 locking.LEVEL_NODE: self.op.nodes,
12875 def DeclareLocks(self, level):
12876 if level == locking.LEVEL_NODEGROUP:
12877 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12879 # Try to get all affected nodes' groups without having the group or node
12880 # lock yet. Needs verification later in the code flow.
12881 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12883 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12885 def CheckPrereq(self):
12886 """Check prerequisites.
12889 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12890 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12891 frozenset(self.op.nodes))
12893 expected_locks = (set([self.group_uuid]) |
12894 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12895 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12896 if actual_locks != expected_locks:
12897 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12898 " current groups are '%s', used to be '%s'" %
12899 (utils.CommaJoin(expected_locks),
12900 utils.CommaJoin(actual_locks)))
12902 self.node_data = self.cfg.GetAllNodesInfo()
12903 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12904 instance_data = self.cfg.GetAllInstancesInfo()
12906 if self.group is None:
12907 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12908 (self.op.group_name, self.group_uuid))
12910 (new_splits, previous_splits) = \
12911 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12912 for node in self.op.nodes],
12913 self.node_data, instance_data)
12916 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12918 if not self.op.force:
12919 raise errors.OpExecError("The following instances get split by this"
12920 " change and --force was not given: %s" %
12923 self.LogWarning("This operation will split the following instances: %s",
12926 if previous_splits:
12927 self.LogWarning("In addition, these already-split instances continue"
12928 " to be split across groups: %s",
12929 utils.CommaJoin(utils.NiceSort(previous_splits)))
12931 def Exec(self, feedback_fn):
12932 """Assign nodes to a new group.
12935 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12937 self.cfg.AssignGroupNodes(mods)
12940 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12941 """Check for split instances after a node assignment.
12943 This method considers a series of node assignments as an atomic operation,
12944 and returns information about split instances after applying the set of
12947 In particular, it returns information about newly split instances, and
12948 instances that were already split, and remain so after the change.
12950 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12953 @type changes: list of (node_name, new_group_uuid) pairs.
12954 @param changes: list of node assignments to consider.
12955 @param node_data: a dict with data for all nodes
12956 @param instance_data: a dict with all instances to consider
12957 @rtype: a two-tuple
12958 @return: a list of instances that were previously okay and result split as a
12959 consequence of this change, and a list of instances that were previously
12960 split and this change does not fix.
12963 changed_nodes = dict((node, group) for node, group in changes
12964 if node_data[node].group != group)
12966 all_split_instances = set()
12967 previously_split_instances = set()
12969 def InstanceNodes(instance):
12970 return [instance.primary_node] + list(instance.secondary_nodes)
12972 for inst in instance_data.values():
12973 if inst.disk_template not in constants.DTS_INT_MIRROR:
12976 instance_nodes = InstanceNodes(inst)
12978 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12979 previously_split_instances.add(inst.name)
12981 if len(set(changed_nodes.get(node, node_data[node].group)
12982 for node in instance_nodes)) > 1:
12983 all_split_instances.add(inst.name)
12985 return (list(all_split_instances - previously_split_instances),
12986 list(previously_split_instances & all_split_instances))
12989 class _GroupQuery(_QueryBase):
12990 FIELDS = query.GROUP_FIELDS
12992 def ExpandNames(self, lu):
12993 lu.needed_locks = {}
12995 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12996 self._cluster = lu.cfg.GetClusterInfo()
12997 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13000 self.wanted = [name_to_uuid[name]
13001 for name in utils.NiceSort(name_to_uuid.keys())]
13003 # Accept names to be either names or UUIDs.
13006 all_uuid = frozenset(self._all_groups.keys())
13008 for name in self.names:
13009 if name in all_uuid:
13010 self.wanted.append(name)
13011 elif name in name_to_uuid:
13012 self.wanted.append(name_to_uuid[name])
13014 missing.append(name)
13017 raise errors.OpPrereqError("Some groups do not exist: %s" %
13018 utils.CommaJoin(missing),
13019 errors.ECODE_NOENT)
13021 def DeclareLocks(self, lu, level):
13024 def _GetQueryData(self, lu):
13025 """Computes the list of node groups and their attributes.
13028 do_nodes = query.GQ_NODE in self.requested_data
13029 do_instances = query.GQ_INST in self.requested_data
13031 group_to_nodes = None
13032 group_to_instances = None
13034 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13035 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13036 # latter GetAllInstancesInfo() is not enough, for we have to go through
13037 # instance->node. Hence, we will need to process nodes even if we only need
13038 # instance information.
13039 if do_nodes or do_instances:
13040 all_nodes = lu.cfg.GetAllNodesInfo()
13041 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13044 for node in all_nodes.values():
13045 if node.group in group_to_nodes:
13046 group_to_nodes[node.group].append(node.name)
13047 node_to_group[node.name] = node.group
13050 all_instances = lu.cfg.GetAllInstancesInfo()
13051 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13053 for instance in all_instances.values():
13054 node = instance.primary_node
13055 if node in node_to_group:
13056 group_to_instances[node_to_group[node]].append(instance.name)
13059 # Do not pass on node information if it was not requested.
13060 group_to_nodes = None
13062 return query.GroupQueryData(self._cluster,
13063 [self._all_groups[uuid]
13064 for uuid in self.wanted],
13065 group_to_nodes, group_to_instances)
13068 class LUGroupQuery(NoHooksLU):
13069 """Logical unit for querying node groups.
13074 def CheckArguments(self):
13075 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13076 self.op.output_fields, False)
13078 def ExpandNames(self):
13079 self.gq.ExpandNames(self)
13081 def DeclareLocks(self, level):
13082 self.gq.DeclareLocks(self, level)
13084 def Exec(self, feedback_fn):
13085 return self.gq.OldStyleQuery(self)
13088 class LUGroupSetParams(LogicalUnit):
13089 """Modifies the parameters of a node group.
13092 HPATH = "group-modify"
13093 HTYPE = constants.HTYPE_GROUP
13096 def CheckArguments(self):
13099 self.op.diskparams,
13100 self.op.alloc_policy,
13102 self.op.disk_state,
13106 if all_changes.count(None) == len(all_changes):
13107 raise errors.OpPrereqError("Please pass at least one modification",
13108 errors.ECODE_INVAL)
13110 def ExpandNames(self):
13111 # This raises errors.OpPrereqError on its own:
13112 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13114 self.needed_locks = {
13115 locking.LEVEL_NODEGROUP: [self.group_uuid],
13118 def CheckPrereq(self):
13119 """Check prerequisites.
13122 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13124 if self.group is None:
13125 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13126 (self.op.group_name, self.group_uuid))
13128 if self.op.ndparams:
13129 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13130 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13131 self.new_ndparams = new_ndparams
13133 if self.op.diskparams:
13134 self.new_diskparams = dict()
13135 for templ in constants.DISK_TEMPLATES:
13136 if templ not in self.op.diskparams:
13137 self.op.diskparams[templ] = {}
13138 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13139 self.op.diskparams[templ])
13140 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13141 self.new_diskparams[templ] = new_templ_params
13143 if self.op.hv_state:
13144 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13145 self.group.hv_state_static)
13147 if self.op.disk_state:
13148 self.new_disk_state = \
13149 _MergeAndVerifyDiskState(self.op.disk_state,
13150 self.group.disk_state_static)
13152 if self.op.ipolicy:
13154 for key, value in self.op.ipolicy.iteritems():
13155 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13158 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13159 self.new_ipolicy = g_ipolicy
13160 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13162 def BuildHooksEnv(self):
13163 """Build hooks env.
13167 "GROUP_NAME": self.op.group_name,
13168 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13171 def BuildHooksNodes(self):
13172 """Build hooks nodes.
13175 mn = self.cfg.GetMasterNode()
13176 return ([mn], [mn])
13178 def Exec(self, feedback_fn):
13179 """Modifies the node group.
13184 if self.op.ndparams:
13185 self.group.ndparams = self.new_ndparams
13186 result.append(("ndparams", str(self.group.ndparams)))
13188 if self.op.diskparams:
13189 self.group.diskparams = self.new_diskparams
13190 result.append(("diskparams", str(self.group.diskparams)))
13192 if self.op.alloc_policy:
13193 self.group.alloc_policy = self.op.alloc_policy
13195 if self.op.hv_state:
13196 self.group.hv_state_static = self.new_hv_state
13198 if self.op.disk_state:
13199 self.group.disk_state_static = self.new_disk_state
13201 if self.op.ipolicy:
13202 self.group.ipolicy = self.new_ipolicy
13204 self.cfg.Update(self.group, feedback_fn)
13208 class LUGroupRemove(LogicalUnit):
13209 HPATH = "group-remove"
13210 HTYPE = constants.HTYPE_GROUP
13213 def ExpandNames(self):
13214 # This will raises errors.OpPrereqError on its own:
13215 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13216 self.needed_locks = {
13217 locking.LEVEL_NODEGROUP: [self.group_uuid],
13220 def CheckPrereq(self):
13221 """Check prerequisites.
13223 This checks that the given group name exists as a node group, that is
13224 empty (i.e., contains no nodes), and that is not the last group of the
13228 # Verify that the group is empty.
13229 group_nodes = [node.name
13230 for node in self.cfg.GetAllNodesInfo().values()
13231 if node.group == self.group_uuid]
13234 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13236 (self.op.group_name,
13237 utils.CommaJoin(utils.NiceSort(group_nodes))),
13238 errors.ECODE_STATE)
13240 # Verify the cluster would not be left group-less.
13241 if len(self.cfg.GetNodeGroupList()) == 1:
13242 raise errors.OpPrereqError("Group '%s' is the only group,"
13243 " cannot be removed" %
13244 self.op.group_name,
13245 errors.ECODE_STATE)
13247 def BuildHooksEnv(self):
13248 """Build hooks env.
13252 "GROUP_NAME": self.op.group_name,
13255 def BuildHooksNodes(self):
13256 """Build hooks nodes.
13259 mn = self.cfg.GetMasterNode()
13260 return ([mn], [mn])
13262 def Exec(self, feedback_fn):
13263 """Remove the node group.
13267 self.cfg.RemoveNodeGroup(self.group_uuid)
13268 except errors.ConfigurationError:
13269 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13270 (self.op.group_name, self.group_uuid))
13272 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13275 class LUGroupRename(LogicalUnit):
13276 HPATH = "group-rename"
13277 HTYPE = constants.HTYPE_GROUP
13280 def ExpandNames(self):
13281 # This raises errors.OpPrereqError on its own:
13282 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13284 self.needed_locks = {
13285 locking.LEVEL_NODEGROUP: [self.group_uuid],
13288 def CheckPrereq(self):
13289 """Check prerequisites.
13291 Ensures requested new name is not yet used.
13295 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13296 except errors.OpPrereqError:
13299 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13300 " node group (UUID: %s)" %
13301 (self.op.new_name, new_name_uuid),
13302 errors.ECODE_EXISTS)
13304 def BuildHooksEnv(self):
13305 """Build hooks env.
13309 "OLD_NAME": self.op.group_name,
13310 "NEW_NAME": self.op.new_name,
13313 def BuildHooksNodes(self):
13314 """Build hooks nodes.
13317 mn = self.cfg.GetMasterNode()
13319 all_nodes = self.cfg.GetAllNodesInfo()
13320 all_nodes.pop(mn, None)
13323 run_nodes.extend(node.name for node in all_nodes.values()
13324 if node.group == self.group_uuid)
13326 return (run_nodes, run_nodes)
13328 def Exec(self, feedback_fn):
13329 """Rename the node group.
13332 group = self.cfg.GetNodeGroup(self.group_uuid)
13335 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13336 (self.op.group_name, self.group_uuid))
13338 group.name = self.op.new_name
13339 self.cfg.Update(group, feedback_fn)
13341 return self.op.new_name
13344 class LUGroupEvacuate(LogicalUnit):
13345 HPATH = "group-evacuate"
13346 HTYPE = constants.HTYPE_GROUP
13349 def ExpandNames(self):
13350 # This raises errors.OpPrereqError on its own:
13351 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13353 if self.op.target_groups:
13354 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13355 self.op.target_groups)
13357 self.req_target_uuids = []
13359 if self.group_uuid in self.req_target_uuids:
13360 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13361 " as a target group (targets are %s)" %
13363 utils.CommaJoin(self.req_target_uuids)),
13364 errors.ECODE_INVAL)
13366 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13368 self.share_locks = _ShareAll()
13369 self.needed_locks = {
13370 locking.LEVEL_INSTANCE: [],
13371 locking.LEVEL_NODEGROUP: [],
13372 locking.LEVEL_NODE: [],
13375 def DeclareLocks(self, level):
13376 if level == locking.LEVEL_INSTANCE:
13377 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13379 # Lock instances optimistically, needs verification once node and group
13380 # locks have been acquired
13381 self.needed_locks[locking.LEVEL_INSTANCE] = \
13382 self.cfg.GetNodeGroupInstances(self.group_uuid)
13384 elif level == locking.LEVEL_NODEGROUP:
13385 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13387 if self.req_target_uuids:
13388 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13390 # Lock all groups used by instances optimistically; this requires going
13391 # via the node before it's locked, requiring verification later on
13392 lock_groups.update(group_uuid
13393 for instance_name in
13394 self.owned_locks(locking.LEVEL_INSTANCE)
13396 self.cfg.GetInstanceNodeGroups(instance_name))
13398 # No target groups, need to lock all of them
13399 lock_groups = locking.ALL_SET
13401 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13403 elif level == locking.LEVEL_NODE:
13404 # This will only lock the nodes in the group to be evacuated which
13405 # contain actual instances
13406 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13407 self._LockInstancesNodes()
13409 # Lock all nodes in group to be evacuated and target groups
13410 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13411 assert self.group_uuid in owned_groups
13412 member_nodes = [node_name
13413 for group in owned_groups
13414 for node_name in self.cfg.GetNodeGroup(group).members]
13415 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13417 def CheckPrereq(self):
13418 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13419 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13420 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13422 assert owned_groups.issuperset(self.req_target_uuids)
13423 assert self.group_uuid in owned_groups
13425 # Check if locked instances are still correct
13426 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13428 # Get instance information
13429 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13431 # Check if node groups for locked instances are still correct
13432 for instance_name in owned_instances:
13433 inst = self.instances[instance_name]
13434 assert owned_nodes.issuperset(inst.all_nodes), \
13435 "Instance %s's nodes changed while we kept the lock" % instance_name
13437 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13440 assert self.group_uuid in inst_groups, \
13441 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13443 if self.req_target_uuids:
13444 # User requested specific target groups
13445 self.target_uuids = self.req_target_uuids
13447 # All groups except the one to be evacuated are potential targets
13448 self.target_uuids = [group_uuid for group_uuid in owned_groups
13449 if group_uuid != self.group_uuid]
13451 if not self.target_uuids:
13452 raise errors.OpPrereqError("There are no possible target groups",
13453 errors.ECODE_INVAL)
13455 def BuildHooksEnv(self):
13456 """Build hooks env.
13460 "GROUP_NAME": self.op.group_name,
13461 "TARGET_GROUPS": " ".join(self.target_uuids),
13464 def BuildHooksNodes(self):
13465 """Build hooks nodes.
13468 mn = self.cfg.GetMasterNode()
13470 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13472 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13474 return (run_nodes, run_nodes)
13476 def Exec(self, feedback_fn):
13477 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13479 assert self.group_uuid not in self.target_uuids
13481 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13482 instances=instances, target_groups=self.target_uuids)
13484 ial.Run(self.op.iallocator)
13486 if not ial.success:
13487 raise errors.OpPrereqError("Can't compute group evacuation using"
13488 " iallocator '%s': %s" %
13489 (self.op.iallocator, ial.info),
13490 errors.ECODE_NORES)
13492 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13494 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13495 len(jobs), self.op.group_name)
13497 return ResultWithJobs(jobs)
13500 class TagsLU(NoHooksLU): # pylint: disable=W0223
13501 """Generic tags LU.
13503 This is an abstract class which is the parent of all the other tags LUs.
13506 def ExpandNames(self):
13507 self.group_uuid = None
13508 self.needed_locks = {}
13509 if self.op.kind == constants.TAG_NODE:
13510 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13511 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13512 elif self.op.kind == constants.TAG_INSTANCE:
13513 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13514 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13515 elif self.op.kind == constants.TAG_NODEGROUP:
13516 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13518 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13519 # not possible to acquire the BGL based on opcode parameters)
13521 def CheckPrereq(self):
13522 """Check prerequisites.
13525 if self.op.kind == constants.TAG_CLUSTER:
13526 self.target = self.cfg.GetClusterInfo()
13527 elif self.op.kind == constants.TAG_NODE:
13528 self.target = self.cfg.GetNodeInfo(self.op.name)
13529 elif self.op.kind == constants.TAG_INSTANCE:
13530 self.target = self.cfg.GetInstanceInfo(self.op.name)
13531 elif self.op.kind == constants.TAG_NODEGROUP:
13532 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13534 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13535 str(self.op.kind), errors.ECODE_INVAL)
13538 class LUTagsGet(TagsLU):
13539 """Returns the tags of a given object.
13544 def ExpandNames(self):
13545 TagsLU.ExpandNames(self)
13547 # Share locks as this is only a read operation
13548 self.share_locks = _ShareAll()
13550 def Exec(self, feedback_fn):
13551 """Returns the tag list.
13554 return list(self.target.GetTags())
13557 class LUTagsSearch(NoHooksLU):
13558 """Searches the tags for a given pattern.
13563 def ExpandNames(self):
13564 self.needed_locks = {}
13566 def CheckPrereq(self):
13567 """Check prerequisites.
13569 This checks the pattern passed for validity by compiling it.
13573 self.re = re.compile(self.op.pattern)
13574 except re.error, err:
13575 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13576 (self.op.pattern, err), errors.ECODE_INVAL)
13578 def Exec(self, feedback_fn):
13579 """Returns the tag list.
13583 tgts = [("/cluster", cfg.GetClusterInfo())]
13584 ilist = cfg.GetAllInstancesInfo().values()
13585 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13586 nlist = cfg.GetAllNodesInfo().values()
13587 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13588 tgts.extend(("/nodegroup/%s" % n.name, n)
13589 for n in cfg.GetAllNodeGroupsInfo().values())
13591 for path, target in tgts:
13592 for tag in target.GetTags():
13593 if self.re.search(tag):
13594 results.append((path, tag))
13598 class LUTagsSet(TagsLU):
13599 """Sets a tag on a given object.
13604 def CheckPrereq(self):
13605 """Check prerequisites.
13607 This checks the type and length of the tag name and value.
13610 TagsLU.CheckPrereq(self)
13611 for tag in self.op.tags:
13612 objects.TaggableObject.ValidateTag(tag)
13614 def Exec(self, feedback_fn):
13619 for tag in self.op.tags:
13620 self.target.AddTag(tag)
13621 except errors.TagError, err:
13622 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13623 self.cfg.Update(self.target, feedback_fn)
13626 class LUTagsDel(TagsLU):
13627 """Delete a list of tags from a given object.
13632 def CheckPrereq(self):
13633 """Check prerequisites.
13635 This checks that we have the given tag.
13638 TagsLU.CheckPrereq(self)
13639 for tag in self.op.tags:
13640 objects.TaggableObject.ValidateTag(tag)
13641 del_tags = frozenset(self.op.tags)
13642 cur_tags = self.target.GetTags()
13644 diff_tags = del_tags - cur_tags
13646 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13647 raise errors.OpPrereqError("Tag(s) %s not found" %
13648 (utils.CommaJoin(diff_names), ),
13649 errors.ECODE_NOENT)
13651 def Exec(self, feedback_fn):
13652 """Remove the tag from the object.
13655 for tag in self.op.tags:
13656 self.target.RemoveTag(tag)
13657 self.cfg.Update(self.target, feedback_fn)
13660 class LUTestDelay(NoHooksLU):
13661 """Sleep for a specified amount of time.
13663 This LU sleeps on the master and/or nodes for a specified amount of
13669 def ExpandNames(self):
13670 """Expand names and set required locks.
13672 This expands the node list, if any.
13675 self.needed_locks = {}
13676 if self.op.on_nodes:
13677 # _GetWantedNodes can be used here, but is not always appropriate to use
13678 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13679 # more information.
13680 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13681 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13683 def _TestDelay(self):
13684 """Do the actual sleep.
13687 if self.op.on_master:
13688 if not utils.TestDelay(self.op.duration):
13689 raise errors.OpExecError("Error during master delay test")
13690 if self.op.on_nodes:
13691 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13692 for node, node_result in result.items():
13693 node_result.Raise("Failure during rpc call to node %s" % node)
13695 def Exec(self, feedback_fn):
13696 """Execute the test delay opcode, with the wanted repetitions.
13699 if self.op.repeat == 0:
13702 top_value = self.op.repeat - 1
13703 for i in range(self.op.repeat):
13704 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13708 class LUTestJqueue(NoHooksLU):
13709 """Utility LU to test some aspects of the job queue.
13714 # Must be lower than default timeout for WaitForJobChange to see whether it
13715 # notices changed jobs
13716 _CLIENT_CONNECT_TIMEOUT = 20.0
13717 _CLIENT_CONFIRM_TIMEOUT = 60.0
13720 def _NotifyUsingSocket(cls, cb, errcls):
13721 """Opens a Unix socket and waits for another program to connect.
13724 @param cb: Callback to send socket name to client
13725 @type errcls: class
13726 @param errcls: Exception class to use for errors
13729 # Using a temporary directory as there's no easy way to create temporary
13730 # sockets without writing a custom loop around tempfile.mktemp and
13732 tmpdir = tempfile.mkdtemp()
13734 tmpsock = utils.PathJoin(tmpdir, "sock")
13736 logging.debug("Creating temporary socket at %s", tmpsock)
13737 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13742 # Send details to client
13745 # Wait for client to connect before continuing
13746 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13748 (conn, _) = sock.accept()
13749 except socket.error, err:
13750 raise errcls("Client didn't connect in time (%s)" % err)
13754 # Remove as soon as client is connected
13755 shutil.rmtree(tmpdir)
13757 # Wait for client to close
13760 # pylint: disable=E1101
13761 # Instance of '_socketobject' has no ... member
13762 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13764 except socket.error, err:
13765 raise errcls("Client failed to confirm notification (%s)" % err)
13769 def _SendNotification(self, test, arg, sockname):
13770 """Sends a notification to the client.
13773 @param test: Test name
13774 @param arg: Test argument (depends on test)
13775 @type sockname: string
13776 @param sockname: Socket path
13779 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13781 def _Notify(self, prereq, test, arg):
13782 """Notifies the client of a test.
13785 @param prereq: Whether this is a prereq-phase test
13787 @param test: Test name
13788 @param arg: Test argument (depends on test)
13792 errcls = errors.OpPrereqError
13794 errcls = errors.OpExecError
13796 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13800 def CheckArguments(self):
13801 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13802 self.expandnames_calls = 0
13804 def ExpandNames(self):
13805 checkargs_calls = getattr(self, "checkargs_calls", 0)
13806 if checkargs_calls < 1:
13807 raise errors.ProgrammerError("CheckArguments was not called")
13809 self.expandnames_calls += 1
13811 if self.op.notify_waitlock:
13812 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13814 self.LogInfo("Expanding names")
13816 # Get lock on master node (just to get a lock, not for a particular reason)
13817 self.needed_locks = {
13818 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13821 def Exec(self, feedback_fn):
13822 if self.expandnames_calls < 1:
13823 raise errors.ProgrammerError("ExpandNames was not called")
13825 if self.op.notify_exec:
13826 self._Notify(False, constants.JQT_EXEC, None)
13828 self.LogInfo("Executing")
13830 if self.op.log_messages:
13831 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13832 for idx, msg in enumerate(self.op.log_messages):
13833 self.LogInfo("Sending log message %s", idx + 1)
13834 feedback_fn(constants.JQT_MSGPREFIX + msg)
13835 # Report how many test messages have been sent
13836 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13839 raise errors.OpExecError("Opcode failure was requested")
13844 class IAllocator(object):
13845 """IAllocator framework.
13847 An IAllocator instance has three sets of attributes:
13848 - cfg that is needed to query the cluster
13849 - input data (all members of the _KEYS class attribute are required)
13850 - four buffer attributes (in|out_data|text), that represent the
13851 input (to the external script) in text and data structure format,
13852 and the output from it, again in two formats
13853 - the result variables from the script (success, info, nodes) for
13857 # pylint: disable=R0902
13858 # lots of instance attributes
13860 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13862 self.rpc = rpc_runner
13863 # init buffer variables
13864 self.in_text = self.out_text = self.in_data = self.out_data = None
13865 # init all input fields so that pylint is happy
13867 self.memory = self.disks = self.disk_template = None
13868 self.os = self.tags = self.nics = self.vcpus = None
13869 self.hypervisor = None
13870 self.relocate_from = None
13872 self.instances = None
13873 self.evac_mode = None
13874 self.target_groups = []
13876 self.required_nodes = None
13877 # init result fields
13878 self.success = self.info = self.result = None
13881 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13883 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13884 " IAllocator" % self.mode)
13886 keyset = [n for (n, _) in keydata]
13889 if key not in keyset:
13890 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13891 " IAllocator" % key)
13892 setattr(self, key, kwargs[key])
13895 if key not in kwargs:
13896 raise errors.ProgrammerError("Missing input parameter '%s' to"
13897 " IAllocator" % key)
13898 self._BuildInputData(compat.partial(fn, self), keydata)
13900 def _ComputeClusterData(self):
13901 """Compute the generic allocator input data.
13903 This is the data that is independent of the actual operation.
13907 cluster_info = cfg.GetClusterInfo()
13910 "version": constants.IALLOCATOR_VERSION,
13911 "cluster_name": cfg.GetClusterName(),
13912 "cluster_tags": list(cluster_info.GetTags()),
13913 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13914 # we don't have job IDs
13916 ninfo = cfg.GetAllNodesInfo()
13917 iinfo = cfg.GetAllInstancesInfo().values()
13918 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13921 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13923 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13924 hypervisor_name = self.hypervisor
13925 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13926 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13928 hypervisor_name = cluster_info.primary_hypervisor
13930 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13933 self.rpc.call_all_instances_info(node_list,
13934 cluster_info.enabled_hypervisors)
13936 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13938 config_ndata = self._ComputeBasicNodeData(ninfo)
13939 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13940 i_list, config_ndata)
13941 assert len(data["nodes"]) == len(ninfo), \
13942 "Incomplete node data computed"
13944 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13946 self.in_data = data
13949 def _ComputeNodeGroupData(cfg):
13950 """Compute node groups data.
13953 ng = dict((guuid, {
13954 "name": gdata.name,
13955 "alloc_policy": gdata.alloc_policy,
13957 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13962 def _ComputeBasicNodeData(node_cfg):
13963 """Compute global node data.
13966 @returns: a dict of name: (node dict, node config)
13969 # fill in static (config-based) values
13970 node_results = dict((ninfo.name, {
13971 "tags": list(ninfo.GetTags()),
13972 "primary_ip": ninfo.primary_ip,
13973 "secondary_ip": ninfo.secondary_ip,
13974 "offline": ninfo.offline,
13975 "drained": ninfo.drained,
13976 "master_candidate": ninfo.master_candidate,
13977 "group": ninfo.group,
13978 "master_capable": ninfo.master_capable,
13979 "vm_capable": ninfo.vm_capable,
13981 for ninfo in node_cfg.values())
13983 return node_results
13986 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13988 """Compute global node data.
13990 @param node_results: the basic node structures as filled from the config
13993 #TODO(dynmem): compute the right data on MAX and MIN memory
13994 # make a copy of the current dict
13995 node_results = dict(node_results)
13996 for nname, nresult in node_data.items():
13997 assert nname in node_results, "Missing basic data for node %s" % nname
13998 ninfo = node_cfg[nname]
14000 if not (ninfo.offline or ninfo.drained):
14001 nresult.Raise("Can't get data for node %s" % nname)
14002 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14004 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14006 for attr in ["memory_total", "memory_free", "memory_dom0",
14007 "vg_size", "vg_free", "cpu_total"]:
14008 if attr not in remote_info:
14009 raise errors.OpExecError("Node '%s' didn't return attribute"
14010 " '%s'" % (nname, attr))
14011 if not isinstance(remote_info[attr], int):
14012 raise errors.OpExecError("Node '%s' returned invalid value"
14014 (nname, attr, remote_info[attr]))
14015 # compute memory used by primary instances
14016 i_p_mem = i_p_up_mem = 0
14017 for iinfo, beinfo in i_list:
14018 if iinfo.primary_node == nname:
14019 i_p_mem += beinfo[constants.BE_MAXMEM]
14020 if iinfo.name not in node_iinfo[nname].payload:
14023 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14024 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14025 remote_info["memory_free"] -= max(0, i_mem_diff)
14027 if iinfo.admin_state == constants.ADMINST_UP:
14028 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14030 # compute memory used by instances
14032 "total_memory": remote_info["memory_total"],
14033 "reserved_memory": remote_info["memory_dom0"],
14034 "free_memory": remote_info["memory_free"],
14035 "total_disk": remote_info["vg_size"],
14036 "free_disk": remote_info["vg_free"],
14037 "total_cpus": remote_info["cpu_total"],
14038 "i_pri_memory": i_p_mem,
14039 "i_pri_up_memory": i_p_up_mem,
14041 pnr_dyn.update(node_results[nname])
14042 node_results[nname] = pnr_dyn
14044 return node_results
14047 def _ComputeInstanceData(cluster_info, i_list):
14048 """Compute global instance data.
14052 for iinfo, beinfo in i_list:
14054 for nic in iinfo.nics:
14055 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14059 "mode": filled_params[constants.NIC_MODE],
14060 "link": filled_params[constants.NIC_LINK],
14062 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14063 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14064 nic_data.append(nic_dict)
14066 "tags": list(iinfo.GetTags()),
14067 "admin_state": iinfo.admin_state,
14068 "vcpus": beinfo[constants.BE_VCPUS],
14069 "memory": beinfo[constants.BE_MAXMEM],
14071 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14073 "disks": [{constants.IDISK_SIZE: dsk.size,
14074 constants.IDISK_MODE: dsk.mode}
14075 for dsk in iinfo.disks],
14076 "disk_template": iinfo.disk_template,
14077 "hypervisor": iinfo.hypervisor,
14079 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14081 instance_data[iinfo.name] = pir
14083 return instance_data
14085 def _AddNewInstance(self):
14086 """Add new instance data to allocator structure.
14088 This in combination with _AllocatorGetClusterData will create the
14089 correct structure needed as input for the allocator.
14091 The checks for the completeness of the opcode must have already been
14095 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14097 if self.disk_template in constants.DTS_INT_MIRROR:
14098 self.required_nodes = 2
14100 self.required_nodes = 1
14104 "disk_template": self.disk_template,
14107 "vcpus": self.vcpus,
14108 "memory": self.memory,
14109 "disks": self.disks,
14110 "disk_space_total": disk_space,
14112 "required_nodes": self.required_nodes,
14113 "hypervisor": self.hypervisor,
14118 def _AddRelocateInstance(self):
14119 """Add relocate instance data to allocator structure.
14121 This in combination with _IAllocatorGetClusterData will create the
14122 correct structure needed as input for the allocator.
14124 The checks for the completeness of the opcode must have already been
14128 instance = self.cfg.GetInstanceInfo(self.name)
14129 if instance is None:
14130 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14131 " IAllocator" % self.name)
14133 if instance.disk_template not in constants.DTS_MIRRORED:
14134 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14135 errors.ECODE_INVAL)
14137 if instance.disk_template in constants.DTS_INT_MIRROR and \
14138 len(instance.secondary_nodes) != 1:
14139 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14140 errors.ECODE_STATE)
14142 self.required_nodes = 1
14143 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14144 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14148 "disk_space_total": disk_space,
14149 "required_nodes": self.required_nodes,
14150 "relocate_from": self.relocate_from,
14154 def _AddNodeEvacuate(self):
14155 """Get data for node-evacuate requests.
14159 "instances": self.instances,
14160 "evac_mode": self.evac_mode,
14163 def _AddChangeGroup(self):
14164 """Get data for node-evacuate requests.
14168 "instances": self.instances,
14169 "target_groups": self.target_groups,
14172 def _BuildInputData(self, fn, keydata):
14173 """Build input data structures.
14176 self._ComputeClusterData()
14179 request["type"] = self.mode
14180 for keyname, keytype in keydata:
14181 if keyname not in request:
14182 raise errors.ProgrammerError("Request parameter %s is missing" %
14184 val = request[keyname]
14185 if not keytype(val):
14186 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14187 " validation, value %s, expected"
14188 " type %s" % (keyname, val, keytype))
14189 self.in_data["request"] = request
14191 self.in_text = serializer.Dump(self.in_data)
14193 _STRING_LIST = ht.TListOf(ht.TString)
14194 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14195 # pylint: disable=E1101
14196 # Class '...' has no 'OP_ID' member
14197 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14198 opcodes.OpInstanceMigrate.OP_ID,
14199 opcodes.OpInstanceReplaceDisks.OP_ID])
14203 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14204 ht.TItems([ht.TNonEmptyString,
14205 ht.TNonEmptyString,
14206 ht.TListOf(ht.TNonEmptyString),
14209 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14210 ht.TItems([ht.TNonEmptyString,
14213 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14214 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14217 constants.IALLOCATOR_MODE_ALLOC:
14220 ("name", ht.TString),
14221 ("memory", ht.TInt),
14222 ("disks", ht.TListOf(ht.TDict)),
14223 ("disk_template", ht.TString),
14224 ("os", ht.TString),
14225 ("tags", _STRING_LIST),
14226 ("nics", ht.TListOf(ht.TDict)),
14227 ("vcpus", ht.TInt),
14228 ("hypervisor", ht.TString),
14230 constants.IALLOCATOR_MODE_RELOC:
14231 (_AddRelocateInstance,
14232 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14234 constants.IALLOCATOR_MODE_NODE_EVAC:
14235 (_AddNodeEvacuate, [
14236 ("instances", _STRING_LIST),
14237 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14239 constants.IALLOCATOR_MODE_CHG_GROUP:
14240 (_AddChangeGroup, [
14241 ("instances", _STRING_LIST),
14242 ("target_groups", _STRING_LIST),
14246 def Run(self, name, validate=True, call_fn=None):
14247 """Run an instance allocator and return the results.
14250 if call_fn is None:
14251 call_fn = self.rpc.call_iallocator_runner
14253 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14254 result.Raise("Failure while running the iallocator script")
14256 self.out_text = result.payload
14258 self._ValidateResult()
14260 def _ValidateResult(self):
14261 """Process the allocator results.
14263 This will process and if successful save the result in
14264 self.out_data and the other parameters.
14268 rdict = serializer.Load(self.out_text)
14269 except Exception, err:
14270 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14272 if not isinstance(rdict, dict):
14273 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14275 # TODO: remove backwards compatiblity in later versions
14276 if "nodes" in rdict and "result" not in rdict:
14277 rdict["result"] = rdict["nodes"]
14280 for key in "success", "info", "result":
14281 if key not in rdict:
14282 raise errors.OpExecError("Can't parse iallocator results:"
14283 " missing key '%s'" % key)
14284 setattr(self, key, rdict[key])
14286 if not self._result_check(self.result):
14287 raise errors.OpExecError("Iallocator returned invalid result,"
14288 " expected %s, got %s" %
14289 (self._result_check, self.result),
14290 errors.ECODE_INVAL)
14292 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14293 assert self.relocate_from is not None
14294 assert self.required_nodes == 1
14296 node2group = dict((name, ndata["group"])
14297 for (name, ndata) in self.in_data["nodes"].items())
14299 fn = compat.partial(self._NodesToGroups, node2group,
14300 self.in_data["nodegroups"])
14302 instance = self.cfg.GetInstanceInfo(self.name)
14303 request_groups = fn(self.relocate_from + [instance.primary_node])
14304 result_groups = fn(rdict["result"] + [instance.primary_node])
14306 if self.success and not set(result_groups).issubset(request_groups):
14307 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14308 " differ from original groups (%s)" %
14309 (utils.CommaJoin(result_groups),
14310 utils.CommaJoin(request_groups)))
14312 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14313 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14315 self.out_data = rdict
14318 def _NodesToGroups(node2group, groups, nodes):
14319 """Returns a list of unique group names for a list of nodes.
14321 @type node2group: dict
14322 @param node2group: Map from node name to group UUID
14324 @param groups: Group information
14326 @param nodes: Node names
14333 group_uuid = node2group[node]
14335 # Ignore unknown node
14339 group = groups[group_uuid]
14341 # Can't find group, let's use UUID
14342 group_name = group_uuid
14344 group_name = group["name"]
14346 result.add(group_name)
14348 return sorted(result)
14351 class LUTestAllocator(NoHooksLU):
14352 """Run allocator tests.
14354 This LU runs the allocator tests
14357 def CheckPrereq(self):
14358 """Check prerequisites.
14360 This checks the opcode parameters depending on the director and mode test.
14363 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14364 for attr in ["memory", "disks", "disk_template",
14365 "os", "tags", "nics", "vcpus"]:
14366 if not hasattr(self.op, attr):
14367 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14368 attr, errors.ECODE_INVAL)
14369 iname = self.cfg.ExpandInstanceName(self.op.name)
14370 if iname is not None:
14371 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14372 iname, errors.ECODE_EXISTS)
14373 if not isinstance(self.op.nics, list):
14374 raise errors.OpPrereqError("Invalid parameter 'nics'",
14375 errors.ECODE_INVAL)
14376 if not isinstance(self.op.disks, list):
14377 raise errors.OpPrereqError("Invalid parameter 'disks'",
14378 errors.ECODE_INVAL)
14379 for row in self.op.disks:
14380 if (not isinstance(row, dict) or
14381 constants.IDISK_SIZE not in row or
14382 not isinstance(row[constants.IDISK_SIZE], int) or
14383 constants.IDISK_MODE not in row or
14384 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14385 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14386 " parameter", errors.ECODE_INVAL)
14387 if self.op.hypervisor is None:
14388 self.op.hypervisor = self.cfg.GetHypervisorType()
14389 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14390 fname = _ExpandInstanceName(self.cfg, self.op.name)
14391 self.op.name = fname
14392 self.relocate_from = \
14393 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14394 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14395 constants.IALLOCATOR_MODE_NODE_EVAC):
14396 if not self.op.instances:
14397 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14398 self.op.instances = _GetWantedInstances(self, self.op.instances)
14400 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14401 self.op.mode, errors.ECODE_INVAL)
14403 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14404 if self.op.allocator is None:
14405 raise errors.OpPrereqError("Missing allocator name",
14406 errors.ECODE_INVAL)
14407 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14408 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14409 self.op.direction, errors.ECODE_INVAL)
14411 def Exec(self, feedback_fn):
14412 """Run the allocator test.
14415 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14416 ial = IAllocator(self.cfg, self.rpc,
14419 memory=self.op.memory,
14420 disks=self.op.disks,
14421 disk_template=self.op.disk_template,
14425 vcpus=self.op.vcpus,
14426 hypervisor=self.op.hypervisor,
14428 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14429 ial = IAllocator(self.cfg, self.rpc,
14432 relocate_from=list(self.relocate_from),
14434 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14435 ial = IAllocator(self.cfg, self.rpc,
14437 instances=self.op.instances,
14438 target_groups=self.op.target_groups)
14439 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14440 ial = IAllocator(self.cfg, self.rpc,
14442 instances=self.op.instances,
14443 evac_mode=self.op.evac_mode)
14445 raise errors.ProgrammerError("Uncatched mode %s in"
14446 " LUTestAllocator.Exec", self.op.mode)
14448 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14449 result = ial.in_text
14451 ial.Run(self.op.allocator, validate=False)
14452 result = ial.out_text
14456 #: Query type implementations
14458 constants.QR_INSTANCE: _InstanceQuery,
14459 constants.QR_NODE: _NodeQuery,
14460 constants.QR_GROUP: _GroupQuery,
14461 constants.QR_OS: _OsQuery,
14464 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14467 def _GetQueryImplementation(name):
14468 """Returns the implemtnation for a query type.
14470 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14474 return _QUERY_IMPL[name]
14476 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14477 errors.ECODE_INVAL)