4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ExpandItemName(fn, name, kind):
1040 """Expand an item name.
1042 @param fn: the function to use for expansion
1043 @param name: requested item name
1044 @param kind: text description ('Node' or 'Instance')
1045 @return: the resolved (full) name
1046 @raise errors.OpPrereqError: if the item is not found
1049 full_name = fn(name)
1050 if full_name is None:
1051 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1056 def _ExpandNodeName(cfg, name):
1057 """Wrapper over L{_ExpandItemName} for nodes."""
1058 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1061 def _ExpandInstanceName(cfg, name):
1062 """Wrapper over L{_ExpandItemName} for instance."""
1063 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1066 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1067 minmem, maxmem, vcpus, nics, disk_template, disks,
1068 bep, hvp, hypervisor_name, tags):
1069 """Builds instance related env variables for hooks
1071 This builds the hook environment from individual variables.
1074 @param name: the name of the instance
1075 @type primary_node: string
1076 @param primary_node: the name of the instance's primary node
1077 @type secondary_nodes: list
1078 @param secondary_nodes: list of secondary nodes as strings
1079 @type os_type: string
1080 @param os_type: the name of the instance's OS
1081 @type status: string
1082 @param status: the desired status of the instance
1083 @type minmem: string
1084 @param minmem: the minimum memory size of the instance
1085 @type maxmem: string
1086 @param maxmem: the maximum memory size of the instance
1088 @param vcpus: the count of VCPUs the instance has
1090 @param nics: list of tuples (ip, mac, mode, link) representing
1091 the NICs the instance has
1092 @type disk_template: string
1093 @param disk_template: the disk template of the instance
1095 @param disks: the list of (size, mode) pairs
1097 @param bep: the backend parameters for the instance
1099 @param hvp: the hypervisor parameters for the instance
1100 @type hypervisor_name: string
1101 @param hypervisor_name: the hypervisor for the instance
1103 @param tags: list of instance tags as strings
1105 @return: the hook environment for this instance
1110 "INSTANCE_NAME": name,
1111 "INSTANCE_PRIMARY": primary_node,
1112 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1113 "INSTANCE_OS_TYPE": os_type,
1114 "INSTANCE_STATUS": status,
1115 "INSTANCE_MINMEM": minmem,
1116 "INSTANCE_MAXMEM": maxmem,
1117 # TODO(2.7) remove deprecated "memory" value
1118 "INSTANCE_MEMORY": maxmem,
1119 "INSTANCE_VCPUS": vcpus,
1120 "INSTANCE_DISK_TEMPLATE": disk_template,
1121 "INSTANCE_HYPERVISOR": hypervisor_name,
1124 nic_count = len(nics)
1125 for idx, (ip, mac, mode, link) in enumerate(nics):
1128 env["INSTANCE_NIC%d_IP" % idx] = ip
1129 env["INSTANCE_NIC%d_MAC" % idx] = mac
1130 env["INSTANCE_NIC%d_MODE" % idx] = mode
1131 env["INSTANCE_NIC%d_LINK" % idx] = link
1132 if mode == constants.NIC_MODE_BRIDGED:
1133 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1137 env["INSTANCE_NIC_COUNT"] = nic_count
1140 disk_count = len(disks)
1141 for idx, (size, mode) in enumerate(disks):
1142 env["INSTANCE_DISK%d_SIZE" % idx] = size
1143 env["INSTANCE_DISK%d_MODE" % idx] = mode
1147 env["INSTANCE_DISK_COUNT"] = disk_count
1152 env["INSTANCE_TAGS"] = " ".join(tags)
1154 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1155 for key, value in source.items():
1156 env["INSTANCE_%s_%s" % (kind, key)] = value
1161 def _NICListToTuple(lu, nics):
1162 """Build a list of nic information tuples.
1164 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1165 value in LUInstanceQueryData.
1167 @type lu: L{LogicalUnit}
1168 @param lu: the logical unit on whose behalf we execute
1169 @type nics: list of L{objects.NIC}
1170 @param nics: list of nics to convert to hooks tuples
1174 cluster = lu.cfg.GetClusterInfo()
1178 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1179 mode = filled_params[constants.NIC_MODE]
1180 link = filled_params[constants.NIC_LINK]
1181 hooks_nics.append((ip, mac, mode, link))
1185 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1186 """Builds instance related env variables for hooks from an object.
1188 @type lu: L{LogicalUnit}
1189 @param lu: the logical unit on whose behalf we execute
1190 @type instance: L{objects.Instance}
1191 @param instance: the instance for which we should build the
1193 @type override: dict
1194 @param override: dictionary with key/values that will override
1197 @return: the hook environment dictionary
1200 cluster = lu.cfg.GetClusterInfo()
1201 bep = cluster.FillBE(instance)
1202 hvp = cluster.FillHV(instance)
1204 "name": instance.name,
1205 "primary_node": instance.primary_node,
1206 "secondary_nodes": instance.secondary_nodes,
1207 "os_type": instance.os,
1208 "status": instance.admin_state,
1209 "maxmem": bep[constants.BE_MAXMEM],
1210 "minmem": bep[constants.BE_MINMEM],
1211 "vcpus": bep[constants.BE_VCPUS],
1212 "nics": _NICListToTuple(lu, instance.nics),
1213 "disk_template": instance.disk_template,
1214 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1217 "hypervisor_name": instance.hypervisor,
1218 "tags": instance.tags,
1221 args.update(override)
1222 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1225 def _AdjustCandidatePool(lu, exceptions):
1226 """Adjust the candidate pool after node operations.
1229 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1231 lu.LogInfo("Promoted nodes to master candidate role: %s",
1232 utils.CommaJoin(node.name for node in mod_list))
1233 for name in mod_list:
1234 lu.context.ReaddNode(name)
1235 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1237 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1241 def _DecideSelfPromotion(lu, exceptions=None):
1242 """Decide whether I should promote myself as a master candidate.
1245 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1246 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1247 # the new node will increase mc_max with one, so:
1248 mc_should = min(mc_should + 1, cp_size)
1249 return mc_now < mc_should
1252 def _CalculateGroupIPolicy(cfg, group):
1253 """Calculate instance policy for group.
1256 cluster = cfg.GetClusterInfo()
1257 return cluster.SimpleFillIPolicy(group.ipolicy)
1260 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1261 """Check that the brigdes needed by a list of nics exist.
1264 cluster = lu.cfg.GetClusterInfo()
1265 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1266 brlist = [params[constants.NIC_LINK] for params in paramslist
1267 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1269 result = lu.rpc.call_bridges_exist(target_node, brlist)
1270 result.Raise("Error checking bridges on destination node '%s'" %
1271 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1274 def _CheckInstanceBridgesExist(lu, instance, node=None):
1275 """Check that the brigdes needed by an instance exist.
1279 node = instance.primary_node
1280 _CheckNicsBridgesExist(lu, instance.nics, node)
1283 def _CheckOSVariant(os_obj, name):
1284 """Check whether an OS name conforms to the os variants specification.
1286 @type os_obj: L{objects.OS}
1287 @param os_obj: OS object to check
1289 @param name: OS name passed by the user, to check for validity
1292 variant = objects.OS.GetVariant(name)
1293 if not os_obj.supported_variants:
1295 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1296 " passed)" % (os_obj.name, variant),
1300 raise errors.OpPrereqError("OS name must include a variant",
1303 if variant not in os_obj.supported_variants:
1304 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1307 def _GetNodeInstancesInner(cfg, fn):
1308 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1311 def _GetNodeInstances(cfg, node_name):
1312 """Returns a list of all primary and secondary instances on a node.
1316 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1319 def _GetNodePrimaryInstances(cfg, node_name):
1320 """Returns primary instances on a node.
1323 return _GetNodeInstancesInner(cfg,
1324 lambda inst: node_name == inst.primary_node)
1327 def _GetNodeSecondaryInstances(cfg, node_name):
1328 """Returns secondary instances on a node.
1331 return _GetNodeInstancesInner(cfg,
1332 lambda inst: node_name in inst.secondary_nodes)
1335 def _GetStorageTypeArgs(cfg, storage_type):
1336 """Returns the arguments for a storage type.
1339 # Special case for file storage
1340 if storage_type == constants.ST_FILE:
1341 # storage.FileStorage wants a list of storage directories
1342 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1347 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1350 for dev in instance.disks:
1351 cfg.SetDiskID(dev, node_name)
1353 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1354 result.Raise("Failed to get disk status from node %s" % node_name,
1355 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1357 for idx, bdev_status in enumerate(result.payload):
1358 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1364 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1365 """Check the sanity of iallocator and node arguments and use the
1366 cluster-wide iallocator if appropriate.
1368 Check that at most one of (iallocator, node) is specified. If none is
1369 specified, then the LU's opcode's iallocator slot is filled with the
1370 cluster-wide default iallocator.
1372 @type iallocator_slot: string
1373 @param iallocator_slot: the name of the opcode iallocator slot
1374 @type node_slot: string
1375 @param node_slot: the name of the opcode target node slot
1378 node = getattr(lu.op, node_slot, None)
1379 iallocator = getattr(lu.op, iallocator_slot, None)
1381 if node is not None and iallocator is not None:
1382 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1384 elif node is None and iallocator is None:
1385 default_iallocator = lu.cfg.GetDefaultIAllocator()
1386 if default_iallocator:
1387 setattr(lu.op, iallocator_slot, default_iallocator)
1389 raise errors.OpPrereqError("No iallocator or node given and no"
1390 " cluster-wide default iallocator found;"
1391 " please specify either an iallocator or a"
1392 " node, or set a cluster-wide default"
1396 def _GetDefaultIAllocator(cfg, iallocator):
1397 """Decides on which iallocator to use.
1399 @type cfg: L{config.ConfigWriter}
1400 @param cfg: Cluster configuration object
1401 @type iallocator: string or None
1402 @param iallocator: Iallocator specified in opcode
1404 @return: Iallocator name
1408 # Use default iallocator
1409 iallocator = cfg.GetDefaultIAllocator()
1412 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1413 " opcode nor as a cluster-wide default",
1419 class LUClusterPostInit(LogicalUnit):
1420 """Logical unit for running hooks after cluster initialization.
1423 HPATH = "cluster-init"
1424 HTYPE = constants.HTYPE_CLUSTER
1426 def BuildHooksEnv(self):
1431 "OP_TARGET": self.cfg.GetClusterName(),
1434 def BuildHooksNodes(self):
1435 """Build hooks nodes.
1438 return ([], [self.cfg.GetMasterNode()])
1440 def Exec(self, feedback_fn):
1447 class LUClusterDestroy(LogicalUnit):
1448 """Logical unit for destroying the cluster.
1451 HPATH = "cluster-destroy"
1452 HTYPE = constants.HTYPE_CLUSTER
1454 def BuildHooksEnv(self):
1459 "OP_TARGET": self.cfg.GetClusterName(),
1462 def BuildHooksNodes(self):
1463 """Build hooks nodes.
1468 def CheckPrereq(self):
1469 """Check prerequisites.
1471 This checks whether the cluster is empty.
1473 Any errors are signaled by raising errors.OpPrereqError.
1476 master = self.cfg.GetMasterNode()
1478 nodelist = self.cfg.GetNodeList()
1479 if len(nodelist) != 1 or nodelist[0] != master:
1480 raise errors.OpPrereqError("There are still %d node(s) in"
1481 " this cluster." % (len(nodelist) - 1),
1483 instancelist = self.cfg.GetInstanceList()
1485 raise errors.OpPrereqError("There are still %d instance(s) in"
1486 " this cluster." % len(instancelist),
1489 def Exec(self, feedback_fn):
1490 """Destroys the cluster.
1493 master_params = self.cfg.GetMasterNetworkParameters()
1495 # Run post hooks on master node before it's removed
1496 _RunPostHook(self, master_params.name)
1498 ems = self.cfg.GetUseExternalMipScript()
1499 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1501 result.Raise("Could not disable the master role")
1503 return master_params.name
1506 def _VerifyCertificate(filename):
1507 """Verifies a certificate for L{LUClusterVerifyConfig}.
1509 @type filename: string
1510 @param filename: Path to PEM file
1514 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1515 utils.ReadFile(filename))
1516 except Exception, err: # pylint: disable=W0703
1517 return (LUClusterVerifyConfig.ETYPE_ERROR,
1518 "Failed to load X509 certificate %s: %s" % (filename, err))
1521 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1522 constants.SSL_CERT_EXPIRATION_ERROR)
1525 fnamemsg = "While verifying %s: %s" % (filename, msg)
1530 return (None, fnamemsg)
1531 elif errcode == utils.CERT_WARNING:
1532 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1533 elif errcode == utils.CERT_ERROR:
1534 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1536 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1539 def _GetAllHypervisorParameters(cluster, instances):
1540 """Compute the set of all hypervisor parameters.
1542 @type cluster: L{objects.Cluster}
1543 @param cluster: the cluster object
1544 @param instances: list of L{objects.Instance}
1545 @param instances: additional instances from which to obtain parameters
1546 @rtype: list of (origin, hypervisor, parameters)
1547 @return: a list with all parameters found, indicating the hypervisor they
1548 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1553 for hv_name in cluster.enabled_hypervisors:
1554 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1556 for os_name, os_hvp in cluster.os_hvp.items():
1557 for hv_name, hv_params in os_hvp.items():
1559 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1560 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1562 # TODO: collapse identical parameter values in a single one
1563 for instance in instances:
1564 if instance.hvparams:
1565 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1566 cluster.FillHV(instance)))
1571 class _VerifyErrors(object):
1572 """Mix-in for cluster/group verify LUs.
1574 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1575 self.op and self._feedback_fn to be available.)
1579 ETYPE_FIELD = "code"
1580 ETYPE_ERROR = "ERROR"
1581 ETYPE_WARNING = "WARNING"
1583 def _Error(self, ecode, item, msg, *args, **kwargs):
1584 """Format an error message.
1586 Based on the opcode's error_codes parameter, either format a
1587 parseable error code, or a simpler error string.
1589 This must be called only from Exec and functions called from Exec.
1592 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1593 itype, etxt, _ = ecode
1594 # first complete the msg
1597 # then format the whole message
1598 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1599 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1605 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1606 # and finally report it via the feedback_fn
1607 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1609 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1610 """Log an error message if the passed condition is True.
1614 or self.op.debug_simulate_errors) # pylint: disable=E1101
1616 # If the error code is in the list of ignored errors, demote the error to a
1618 (_, etxt, _) = ecode
1619 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1620 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1623 self._Error(ecode, *args, **kwargs)
1625 # do not mark the operation as failed for WARN cases only
1626 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1627 self.bad = self.bad or cond
1630 class LUClusterVerify(NoHooksLU):
1631 """Submits all jobs necessary to verify the cluster.
1636 def ExpandNames(self):
1637 self.needed_locks = {}
1639 def Exec(self, feedback_fn):
1642 if self.op.group_name:
1643 groups = [self.op.group_name]
1644 depends_fn = lambda: None
1646 groups = self.cfg.GetNodeGroupList()
1648 # Verify global configuration
1650 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1653 # Always depend on global verification
1654 depends_fn = lambda: [(-len(jobs), [])]
1656 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1657 ignore_errors=self.op.ignore_errors,
1658 depends=depends_fn())]
1659 for group in groups)
1661 # Fix up all parameters
1662 for op in itertools.chain(*jobs): # pylint: disable=W0142
1663 op.debug_simulate_errors = self.op.debug_simulate_errors
1664 op.verbose = self.op.verbose
1665 op.error_codes = self.op.error_codes
1667 op.skip_checks = self.op.skip_checks
1668 except AttributeError:
1669 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1671 return ResultWithJobs(jobs)
1674 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1675 """Verifies the cluster config.
1680 def _VerifyHVP(self, hvp_data):
1681 """Verifies locally the syntax of the hypervisor parameters.
1684 for item, hv_name, hv_params in hvp_data:
1685 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1688 hv_class = hypervisor.GetHypervisor(hv_name)
1689 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1690 hv_class.CheckParameterSyntax(hv_params)
1691 except errors.GenericError, err:
1692 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1694 def ExpandNames(self):
1695 # Information can be safely retrieved as the BGL is acquired in exclusive
1697 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1698 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1699 self.all_node_info = self.cfg.GetAllNodesInfo()
1700 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1701 self.needed_locks = {}
1703 def Exec(self, feedback_fn):
1704 """Verify integrity of cluster, performing various test on nodes.
1708 self._feedback_fn = feedback_fn
1710 feedback_fn("* Verifying cluster config")
1712 for msg in self.cfg.VerifyConfig():
1713 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1715 feedback_fn("* Verifying cluster certificate files")
1717 for cert_filename in constants.ALL_CERT_FILES:
1718 (errcode, msg) = _VerifyCertificate(cert_filename)
1719 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1721 feedback_fn("* Verifying hypervisor parameters")
1723 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1724 self.all_inst_info.values()))
1726 feedback_fn("* Verifying all nodes belong to an existing group")
1728 # We do this verification here because, should this bogus circumstance
1729 # occur, it would never be caught by VerifyGroup, which only acts on
1730 # nodes/instances reachable from existing node groups.
1732 dangling_nodes = set(node.name for node in self.all_node_info.values()
1733 if node.group not in self.all_group_info)
1735 dangling_instances = {}
1736 no_node_instances = []
1738 for inst in self.all_inst_info.values():
1739 if inst.primary_node in dangling_nodes:
1740 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1741 elif inst.primary_node not in self.all_node_info:
1742 no_node_instances.append(inst.name)
1747 utils.CommaJoin(dangling_instances.get(node.name,
1749 for node in dangling_nodes]
1751 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1753 "the following nodes (and their instances) belong to a non"
1754 " existing group: %s", utils.CommaJoin(pretty_dangling))
1756 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1758 "the following instances have a non-existing primary-node:"
1759 " %s", utils.CommaJoin(no_node_instances))
1764 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1765 """Verifies the status of a node group.
1768 HPATH = "cluster-verify"
1769 HTYPE = constants.HTYPE_CLUSTER
1772 _HOOKS_INDENT_RE = re.compile("^", re.M)
1774 class NodeImage(object):
1775 """A class representing the logical and physical status of a node.
1778 @ivar name: the node name to which this object refers
1779 @ivar volumes: a structure as returned from
1780 L{ganeti.backend.GetVolumeList} (runtime)
1781 @ivar instances: a list of running instances (runtime)
1782 @ivar pinst: list of configured primary instances (config)
1783 @ivar sinst: list of configured secondary instances (config)
1784 @ivar sbp: dictionary of {primary-node: list of instances} for all
1785 instances for which this node is secondary (config)
1786 @ivar mfree: free memory, as reported by hypervisor (runtime)
1787 @ivar dfree: free disk, as reported by the node (runtime)
1788 @ivar offline: the offline status (config)
1789 @type rpc_fail: boolean
1790 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1791 not whether the individual keys were correct) (runtime)
1792 @type lvm_fail: boolean
1793 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1794 @type hyp_fail: boolean
1795 @ivar hyp_fail: whether the RPC call didn't return the instance list
1796 @type ghost: boolean
1797 @ivar ghost: whether this is a known node or not (config)
1798 @type os_fail: boolean
1799 @ivar os_fail: whether the RPC call didn't return valid OS data
1801 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1802 @type vm_capable: boolean
1803 @ivar vm_capable: whether the node can host instances
1806 def __init__(self, offline=False, name=None, vm_capable=True):
1815 self.offline = offline
1816 self.vm_capable = vm_capable
1817 self.rpc_fail = False
1818 self.lvm_fail = False
1819 self.hyp_fail = False
1821 self.os_fail = False
1824 def ExpandNames(self):
1825 # This raises errors.OpPrereqError on its own:
1826 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1828 # Get instances in node group; this is unsafe and needs verification later
1829 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1831 self.needed_locks = {
1832 locking.LEVEL_INSTANCE: inst_names,
1833 locking.LEVEL_NODEGROUP: [self.group_uuid],
1834 locking.LEVEL_NODE: [],
1837 self.share_locks = _ShareAll()
1839 def DeclareLocks(self, level):
1840 if level == locking.LEVEL_NODE:
1841 # Get members of node group; this is unsafe and needs verification later
1842 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1844 all_inst_info = self.cfg.GetAllInstancesInfo()
1846 # In Exec(), we warn about mirrored instances that have primary and
1847 # secondary living in separate node groups. To fully verify that
1848 # volumes for these instances are healthy, we will need to do an
1849 # extra call to their secondaries. We ensure here those nodes will
1851 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1852 # Important: access only the instances whose lock is owned
1853 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1854 nodes.update(all_inst_info[inst].secondary_nodes)
1856 self.needed_locks[locking.LEVEL_NODE] = nodes
1858 def CheckPrereq(self):
1859 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1860 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1862 group_nodes = set(self.group_info.members)
1863 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1866 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1868 unlocked_instances = \
1869 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1872 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1873 utils.CommaJoin(unlocked_nodes))
1875 if unlocked_instances:
1876 raise errors.OpPrereqError("Missing lock for instances: %s" %
1877 utils.CommaJoin(unlocked_instances))
1879 self.all_node_info = self.cfg.GetAllNodesInfo()
1880 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1882 self.my_node_names = utils.NiceSort(group_nodes)
1883 self.my_inst_names = utils.NiceSort(group_instances)
1885 self.my_node_info = dict((name, self.all_node_info[name])
1886 for name in self.my_node_names)
1888 self.my_inst_info = dict((name, self.all_inst_info[name])
1889 for name in self.my_inst_names)
1891 # We detect here the nodes that will need the extra RPC calls for verifying
1892 # split LV volumes; they should be locked.
1893 extra_lv_nodes = set()
1895 for inst in self.my_inst_info.values():
1896 if inst.disk_template in constants.DTS_INT_MIRROR:
1897 group = self.my_node_info[inst.primary_node].group
1898 for nname in inst.secondary_nodes:
1899 if self.all_node_info[nname].group != group:
1900 extra_lv_nodes.add(nname)
1902 unlocked_lv_nodes = \
1903 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1905 if unlocked_lv_nodes:
1906 raise errors.OpPrereqError("these nodes could be locked: %s" %
1907 utils.CommaJoin(unlocked_lv_nodes))
1908 self.extra_lv_nodes = list(extra_lv_nodes)
1910 def _VerifyNode(self, ninfo, nresult):
1911 """Perform some basic validation on data returned from a node.
1913 - check the result data structure is well formed and has all the
1915 - check ganeti version
1917 @type ninfo: L{objects.Node}
1918 @param ninfo: the node to check
1919 @param nresult: the results from the node
1921 @return: whether overall this call was successful (and we can expect
1922 reasonable values in the respose)
1926 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1928 # main result, nresult should be a non-empty dict
1929 test = not nresult or not isinstance(nresult, dict)
1930 _ErrorIf(test, constants.CV_ENODERPC, node,
1931 "unable to verify node: no data returned")
1935 # compares ganeti version
1936 local_version = constants.PROTOCOL_VERSION
1937 remote_version = nresult.get("version", None)
1938 test = not (remote_version and
1939 isinstance(remote_version, (list, tuple)) and
1940 len(remote_version) == 2)
1941 _ErrorIf(test, constants.CV_ENODERPC, node,
1942 "connection to node returned invalid data")
1946 test = local_version != remote_version[0]
1947 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1948 "incompatible protocol versions: master %s,"
1949 " node %s", local_version, remote_version[0])
1953 # node seems compatible, we can actually try to look into its results
1955 # full package version
1956 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1957 constants.CV_ENODEVERSION, node,
1958 "software version mismatch: master %s, node %s",
1959 constants.RELEASE_VERSION, remote_version[1],
1960 code=self.ETYPE_WARNING)
1962 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1963 if ninfo.vm_capable and isinstance(hyp_result, dict):
1964 for hv_name, hv_result in hyp_result.iteritems():
1965 test = hv_result is not None
1966 _ErrorIf(test, constants.CV_ENODEHV, node,
1967 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1969 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1970 if ninfo.vm_capable and isinstance(hvp_result, list):
1971 for item, hv_name, hv_result in hvp_result:
1972 _ErrorIf(True, constants.CV_ENODEHV, node,
1973 "hypervisor %s parameter verify failure (source %s): %s",
1974 hv_name, item, hv_result)
1976 test = nresult.get(constants.NV_NODESETUP,
1977 ["Missing NODESETUP results"])
1978 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1983 def _VerifyNodeTime(self, ninfo, nresult,
1984 nvinfo_starttime, nvinfo_endtime):
1985 """Check the node time.
1987 @type ninfo: L{objects.Node}
1988 @param ninfo: the node to check
1989 @param nresult: the remote results for the node
1990 @param nvinfo_starttime: the start time of the RPC call
1991 @param nvinfo_endtime: the end time of the RPC call
1995 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1997 ntime = nresult.get(constants.NV_TIME, None)
1999 ntime_merged = utils.MergeTime(ntime)
2000 except (ValueError, TypeError):
2001 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2004 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2005 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2006 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2007 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2011 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2012 "Node time diverges by at least %s from master node time",
2015 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2016 """Check the node LVM results.
2018 @type ninfo: L{objects.Node}
2019 @param ninfo: the node to check
2020 @param nresult: the remote results for the node
2021 @param vg_name: the configured VG name
2028 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2030 # checks vg existence and size > 20G
2031 vglist = nresult.get(constants.NV_VGLIST, None)
2033 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2035 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2036 constants.MIN_VG_SIZE)
2037 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2040 pvlist = nresult.get(constants.NV_PVLIST, None)
2041 test = pvlist is None
2042 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2044 # check that ':' is not present in PV names, since it's a
2045 # special character for lvcreate (denotes the range of PEs to
2047 for _, pvname, owner_vg in pvlist:
2048 test = ":" in pvname
2049 _ErrorIf(test, constants.CV_ENODELVM, node,
2050 "Invalid character ':' in PV '%s' of VG '%s'",
2053 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2054 """Check the node bridges.
2056 @type ninfo: L{objects.Node}
2057 @param ninfo: the node to check
2058 @param nresult: the remote results for the node
2059 @param bridges: the expected list of bridges
2066 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2068 missing = nresult.get(constants.NV_BRIDGES, None)
2069 test = not isinstance(missing, list)
2070 _ErrorIf(test, constants.CV_ENODENET, node,
2071 "did not return valid bridge information")
2073 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2074 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2076 def _VerifyNodeUserScripts(self, ninfo, nresult):
2077 """Check the results of user scripts presence and executability on the node
2079 @type ninfo: L{objects.Node}
2080 @param ninfo: the node to check
2081 @param nresult: the remote results for the node
2086 test = not constants.NV_USERSCRIPTS in nresult
2087 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2088 "did not return user scripts information")
2090 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2092 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2093 "user scripts not present or not executable: %s" %
2094 utils.CommaJoin(sorted(broken_scripts)))
2096 def _VerifyNodeNetwork(self, ninfo, nresult):
2097 """Check the node network connectivity results.
2099 @type ninfo: L{objects.Node}
2100 @param ninfo: the node to check
2101 @param nresult: the remote results for the node
2105 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2107 test = constants.NV_NODELIST not in nresult
2108 _ErrorIf(test, constants.CV_ENODESSH, node,
2109 "node hasn't returned node ssh connectivity data")
2111 if nresult[constants.NV_NODELIST]:
2112 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2113 _ErrorIf(True, constants.CV_ENODESSH, node,
2114 "ssh communication with node '%s': %s", a_node, a_msg)
2116 test = constants.NV_NODENETTEST not in nresult
2117 _ErrorIf(test, constants.CV_ENODENET, node,
2118 "node hasn't returned node tcp connectivity data")
2120 if nresult[constants.NV_NODENETTEST]:
2121 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2123 _ErrorIf(True, constants.CV_ENODENET, node,
2124 "tcp communication with node '%s': %s",
2125 anode, nresult[constants.NV_NODENETTEST][anode])
2127 test = constants.NV_MASTERIP not in nresult
2128 _ErrorIf(test, constants.CV_ENODENET, node,
2129 "node hasn't returned node master IP reachability data")
2131 if not nresult[constants.NV_MASTERIP]:
2132 if node == self.master_node:
2133 msg = "the master node cannot reach the master IP (not configured?)"
2135 msg = "cannot reach the master IP"
2136 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2138 def _VerifyInstancePolicy(self, instance):
2139 """Verify instance specs against instance policy set on node group level.
2143 cluster = self.cfg.GetClusterInfo()
2144 full_beparams = cluster.FillBE(instance)
2145 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2147 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2148 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2149 disk_count = len(instance.disks)
2150 disk_sizes = [disk.size for disk in instance.disks]
2151 nic_count = len(instance.nics)
2154 (constants.ISPEC_MEM_SIZE, mem_size),
2155 (constants.ISPEC_CPU_COUNT, cpu_count),
2156 (constants.ISPEC_DISK_COUNT, disk_count),
2157 (constants.ISPEC_NIC_COUNT, nic_count),
2158 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2160 for (name, value) in test_settings:
2161 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2162 self._ErrorIf(test_result is not None,
2163 constants.CV_EINSTANCEPOLICY, instance.name,
2166 def _VerifyInstance(self, instance, instanceconfig, node_image,
2168 """Verify an instance.
2170 This function checks to see if the required block devices are
2171 available on the instance's node.
2174 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2175 node_current = instanceconfig.primary_node
2177 node_vol_should = {}
2178 instanceconfig.MapLVsByNode(node_vol_should)
2180 self._VerifyInstancePolicy(instanceconfig)
2182 for node in node_vol_should:
2183 n_img = node_image[node]
2184 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2185 # ignore missing volumes on offline or broken nodes
2187 for volume in node_vol_should[node]:
2188 test = volume not in n_img.volumes
2189 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2190 "volume %s missing on node %s", volume, node)
2192 if instanceconfig.admin_state == constants.ADMINST_UP:
2193 pri_img = node_image[node_current]
2194 test = instance not in pri_img.instances and not pri_img.offline
2195 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2196 "instance not running on its primary node %s",
2199 diskdata = [(nname, success, status, idx)
2200 for (nname, disks) in diskstatus.items()
2201 for idx, (success, status) in enumerate(disks)]
2203 for nname, success, bdev_status, idx in diskdata:
2204 # the 'ghost node' construction in Exec() ensures that we have a
2206 snode = node_image[nname]
2207 bad_snode = snode.ghost or snode.offline
2208 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2209 not success and not bad_snode,
2210 constants.CV_EINSTANCEFAULTYDISK, instance,
2211 "couldn't retrieve status for disk/%s on %s: %s",
2212 idx, nname, bdev_status)
2213 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2214 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2215 constants.CV_EINSTANCEFAULTYDISK, instance,
2216 "disk/%s on %s is faulty", idx, nname)
2218 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2219 """Verify if there are any unknown volumes in the cluster.
2221 The .os, .swap and backup volumes are ignored. All other volumes are
2222 reported as unknown.
2224 @type reserved: L{ganeti.utils.FieldSet}
2225 @param reserved: a FieldSet of reserved volume names
2228 for node, n_img in node_image.items():
2229 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2230 # skip non-healthy nodes
2232 for volume in n_img.volumes:
2233 test = ((node not in node_vol_should or
2234 volume not in node_vol_should[node]) and
2235 not reserved.Matches(volume))
2236 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2237 "volume %s is unknown", volume)
2239 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2240 """Verify N+1 Memory Resilience.
2242 Check that if one single node dies we can still start all the
2243 instances it was primary for.
2246 cluster_info = self.cfg.GetClusterInfo()
2247 for node, n_img in node_image.items():
2248 # This code checks that every node which is now listed as
2249 # secondary has enough memory to host all instances it is
2250 # supposed to should a single other node in the cluster fail.
2251 # FIXME: not ready for failover to an arbitrary node
2252 # FIXME: does not support file-backed instances
2253 # WARNING: we currently take into account down instances as well
2254 # as up ones, considering that even if they're down someone
2255 # might want to start them even in the event of a node failure.
2257 # we're skipping offline nodes from the N+1 warning, since
2258 # most likely we don't have good memory infromation from them;
2259 # we already list instances living on such nodes, and that's
2262 #TODO(dynmem): use MINMEM for checking
2263 #TODO(dynmem): also consider ballooning out other instances
2264 for prinode, instances in n_img.sbp.items():
2266 for instance in instances:
2267 bep = cluster_info.FillBE(instance_cfg[instance])
2268 if bep[constants.BE_AUTO_BALANCE]:
2269 needed_mem += bep[constants.BE_MAXMEM]
2270 test = n_img.mfree < needed_mem
2271 self._ErrorIf(test, constants.CV_ENODEN1, node,
2272 "not enough memory to accomodate instance failovers"
2273 " should node %s fail (%dMiB needed, %dMiB available)",
2274 prinode, needed_mem, n_img.mfree)
2277 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2278 (files_all, files_opt, files_mc, files_vm)):
2279 """Verifies file checksums collected from all nodes.
2281 @param errorif: Callback for reporting errors
2282 @param nodeinfo: List of L{objects.Node} objects
2283 @param master_node: Name of master node
2284 @param all_nvinfo: RPC results
2287 # Define functions determining which nodes to consider for a file
2290 (files_mc, lambda node: (node.master_candidate or
2291 node.name == master_node)),
2292 (files_vm, lambda node: node.vm_capable),
2295 # Build mapping from filename to list of nodes which should have the file
2297 for (files, fn) in files2nodefn:
2299 filenodes = nodeinfo
2301 filenodes = filter(fn, nodeinfo)
2302 nodefiles.update((filename,
2303 frozenset(map(operator.attrgetter("name"), filenodes)))
2304 for filename in files)
2306 assert set(nodefiles) == (files_all | files_mc | files_vm)
2308 fileinfo = dict((filename, {}) for filename in nodefiles)
2309 ignore_nodes = set()
2311 for node in nodeinfo:
2313 ignore_nodes.add(node.name)
2316 nresult = all_nvinfo[node.name]
2318 if nresult.fail_msg or not nresult.payload:
2321 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2323 test = not (node_files and isinstance(node_files, dict))
2324 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2325 "Node did not return file checksum data")
2327 ignore_nodes.add(node.name)
2330 # Build per-checksum mapping from filename to nodes having it
2331 for (filename, checksum) in node_files.items():
2332 assert filename in nodefiles
2333 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2335 for (filename, checksums) in fileinfo.items():
2336 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2338 # Nodes having the file
2339 with_file = frozenset(node_name
2340 for nodes in fileinfo[filename].values()
2341 for node_name in nodes) - ignore_nodes
2343 expected_nodes = nodefiles[filename] - ignore_nodes
2345 # Nodes missing file
2346 missing_file = expected_nodes - with_file
2348 if filename in files_opt:
2350 errorif(missing_file and missing_file != expected_nodes,
2351 constants.CV_ECLUSTERFILECHECK, None,
2352 "File %s is optional, but it must exist on all or no"
2353 " nodes (not found on %s)",
2354 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2356 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2357 "File %s is missing from node(s) %s", filename,
2358 utils.CommaJoin(utils.NiceSort(missing_file)))
2360 # Warn if a node has a file it shouldn't
2361 unexpected = with_file - expected_nodes
2363 constants.CV_ECLUSTERFILECHECK, None,
2364 "File %s should not exist on node(s) %s",
2365 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2367 # See if there are multiple versions of the file
2368 test = len(checksums) > 1
2370 variants = ["variant %s on %s" %
2371 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2372 for (idx, (checksum, nodes)) in
2373 enumerate(sorted(checksums.items()))]
2377 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2378 "File %s found with %s different checksums (%s)",
2379 filename, len(checksums), "; ".join(variants))
2381 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2383 """Verifies and the node DRBD status.
2385 @type ninfo: L{objects.Node}
2386 @param ninfo: the node to check
2387 @param nresult: the remote results for the node
2388 @param instanceinfo: the dict of instances
2389 @param drbd_helper: the configured DRBD usermode helper
2390 @param drbd_map: the DRBD map as returned by
2391 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2398 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2399 test = (helper_result == None)
2400 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2401 "no drbd usermode helper returned")
2403 status, payload = helper_result
2405 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2406 "drbd usermode helper check unsuccessful: %s", payload)
2407 test = status and (payload != drbd_helper)
2408 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2409 "wrong drbd usermode helper: %s", payload)
2411 # compute the DRBD minors
2413 for minor, instance in drbd_map[node].items():
2414 test = instance not in instanceinfo
2415 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2416 "ghost instance '%s' in temporary DRBD map", instance)
2417 # ghost instance should not be running, but otherwise we
2418 # don't give double warnings (both ghost instance and
2419 # unallocated minor in use)
2421 node_drbd[minor] = (instance, False)
2423 instance = instanceinfo[instance]
2424 node_drbd[minor] = (instance.name,
2425 instance.admin_state == constants.ADMINST_UP)
2427 # and now check them
2428 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2429 test = not isinstance(used_minors, (tuple, list))
2430 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2431 "cannot parse drbd status file: %s", str(used_minors))
2433 # we cannot check drbd status
2436 for minor, (iname, must_exist) in node_drbd.items():
2437 test = minor not in used_minors and must_exist
2438 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2439 "drbd minor %d of instance %s is not active", minor, iname)
2440 for minor in used_minors:
2441 test = minor not in node_drbd
2442 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2443 "unallocated drbd minor %d is in use", minor)
2445 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2446 """Builds the node OS structures.
2448 @type ninfo: L{objects.Node}
2449 @param ninfo: the node to check
2450 @param nresult: the remote results for the node
2451 @param nimg: the node image object
2455 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2457 remote_os = nresult.get(constants.NV_OSLIST, None)
2458 test = (not isinstance(remote_os, list) or
2459 not compat.all(isinstance(v, list) and len(v) == 7
2460 for v in remote_os))
2462 _ErrorIf(test, constants.CV_ENODEOS, node,
2463 "node hasn't returned valid OS data")
2472 for (name, os_path, status, diagnose,
2473 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2475 if name not in os_dict:
2478 # parameters is a list of lists instead of list of tuples due to
2479 # JSON lacking a real tuple type, fix it:
2480 parameters = [tuple(v) for v in parameters]
2481 os_dict[name].append((os_path, status, diagnose,
2482 set(variants), set(parameters), set(api_ver)))
2484 nimg.oslist = os_dict
2486 def _VerifyNodeOS(self, ninfo, nimg, base):
2487 """Verifies the node OS list.
2489 @type ninfo: L{objects.Node}
2490 @param ninfo: the node to check
2491 @param nimg: the node image object
2492 @param base: the 'template' node we match against (e.g. from the master)
2496 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2498 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2500 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2501 for os_name, os_data in nimg.oslist.items():
2502 assert os_data, "Empty OS status for OS %s?!" % os_name
2503 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2504 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2505 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2506 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2507 "OS '%s' has multiple entries (first one shadows the rest): %s",
2508 os_name, utils.CommaJoin([v[0] for v in os_data]))
2509 # comparisons with the 'base' image
2510 test = os_name not in base.oslist
2511 _ErrorIf(test, constants.CV_ENODEOS, node,
2512 "Extra OS %s not present on reference node (%s)",
2516 assert base.oslist[os_name], "Base node has empty OS status?"
2517 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2519 # base OS is invalid, skipping
2521 for kind, a, b in [("API version", f_api, b_api),
2522 ("variants list", f_var, b_var),
2523 ("parameters", beautify_params(f_param),
2524 beautify_params(b_param))]:
2525 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2526 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2527 kind, os_name, base.name,
2528 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2530 # check any missing OSes
2531 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2532 _ErrorIf(missing, constants.CV_ENODEOS, node,
2533 "OSes present on reference node %s but missing on this node: %s",
2534 base.name, utils.CommaJoin(missing))
2536 def _VerifyOob(self, ninfo, nresult):
2537 """Verifies out of band functionality of a node.
2539 @type ninfo: L{objects.Node}
2540 @param ninfo: the node to check
2541 @param nresult: the remote results for the node
2545 # We just have to verify the paths on master and/or master candidates
2546 # as the oob helper is invoked on the master
2547 if ((ninfo.master_candidate or ninfo.master_capable) and
2548 constants.NV_OOB_PATHS in nresult):
2549 for path_result in nresult[constants.NV_OOB_PATHS]:
2550 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2552 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2553 """Verifies and updates the node volume data.
2555 This function will update a L{NodeImage}'s internal structures
2556 with data from the remote call.
2558 @type ninfo: L{objects.Node}
2559 @param ninfo: the node to check
2560 @param nresult: the remote results for the node
2561 @param nimg: the node image object
2562 @param vg_name: the configured VG name
2566 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568 nimg.lvm_fail = True
2569 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2572 elif isinstance(lvdata, basestring):
2573 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2574 utils.SafeEncode(lvdata))
2575 elif not isinstance(lvdata, dict):
2576 _ErrorIf(True, constants.CV_ENODELVM, node,
2577 "rpc call to node failed (lvlist)")
2579 nimg.volumes = lvdata
2580 nimg.lvm_fail = False
2582 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2583 """Verifies and updates the node instance list.
2585 If the listing was successful, then updates this node's instance
2586 list. Otherwise, it marks the RPC call as failed for the instance
2589 @type ninfo: L{objects.Node}
2590 @param ninfo: the node to check
2591 @param nresult: the remote results for the node
2592 @param nimg: the node image object
2595 idata = nresult.get(constants.NV_INSTANCELIST, None)
2596 test = not isinstance(idata, list)
2597 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2598 "rpc call to node failed (instancelist): %s",
2599 utils.SafeEncode(str(idata)))
2601 nimg.hyp_fail = True
2603 nimg.instances = idata
2605 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2606 """Verifies and computes a node information map
2608 @type ninfo: L{objects.Node}
2609 @param ninfo: the node to check
2610 @param nresult: the remote results for the node
2611 @param nimg: the node image object
2612 @param vg_name: the configured VG name
2616 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2618 # try to read free memory (from the hypervisor)
2619 hv_info = nresult.get(constants.NV_HVINFO, None)
2620 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2621 _ErrorIf(test, constants.CV_ENODEHV, node,
2622 "rpc call to node failed (hvinfo)")
2625 nimg.mfree = int(hv_info["memory_free"])
2626 except (ValueError, TypeError):
2627 _ErrorIf(True, constants.CV_ENODERPC, node,
2628 "node returned invalid nodeinfo, check hypervisor")
2630 # FIXME: devise a free space model for file based instances as well
2631 if vg_name is not None:
2632 test = (constants.NV_VGLIST not in nresult or
2633 vg_name not in nresult[constants.NV_VGLIST])
2634 _ErrorIf(test, constants.CV_ENODELVM, node,
2635 "node didn't return data for the volume group '%s'"
2636 " - it is either missing or broken", vg_name)
2639 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2640 except (ValueError, TypeError):
2641 _ErrorIf(True, constants.CV_ENODERPC, node,
2642 "node returned invalid LVM info, check LVM status")
2644 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2645 """Gets per-disk status information for all instances.
2647 @type nodelist: list of strings
2648 @param nodelist: Node names
2649 @type node_image: dict of (name, L{objects.Node})
2650 @param node_image: Node objects
2651 @type instanceinfo: dict of (name, L{objects.Instance})
2652 @param instanceinfo: Instance objects
2653 @rtype: {instance: {node: [(succes, payload)]}}
2654 @return: a dictionary of per-instance dictionaries with nodes as
2655 keys and disk information as values; the disk information is a
2656 list of tuples (success, payload)
2659 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2662 node_disks_devonly = {}
2663 diskless_instances = set()
2664 diskless = constants.DT_DISKLESS
2666 for nname in nodelist:
2667 node_instances = list(itertools.chain(node_image[nname].pinst,
2668 node_image[nname].sinst))
2669 diskless_instances.update(inst for inst in node_instances
2670 if instanceinfo[inst].disk_template == diskless)
2671 disks = [(inst, disk)
2672 for inst in node_instances
2673 for disk in instanceinfo[inst].disks]
2676 # No need to collect data
2679 node_disks[nname] = disks
2681 # Creating copies as SetDiskID below will modify the objects and that can
2682 # lead to incorrect data returned from nodes
2683 devonly = [dev.Copy() for (_, dev) in disks]
2686 self.cfg.SetDiskID(dev, nname)
2688 node_disks_devonly[nname] = devonly
2690 assert len(node_disks) == len(node_disks_devonly)
2692 # Collect data from all nodes with disks
2693 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2696 assert len(result) == len(node_disks)
2700 for (nname, nres) in result.items():
2701 disks = node_disks[nname]
2704 # No data from this node
2705 data = len(disks) * [(False, "node offline")]
2708 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2709 "while getting disk information: %s", msg)
2711 # No data from this node
2712 data = len(disks) * [(False, msg)]
2715 for idx, i in enumerate(nres.payload):
2716 if isinstance(i, (tuple, list)) and len(i) == 2:
2719 logging.warning("Invalid result from node %s, entry %d: %s",
2721 data.append((False, "Invalid result from the remote node"))
2723 for ((inst, _), status) in zip(disks, data):
2724 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2726 # Add empty entries for diskless instances.
2727 for inst in diskless_instances:
2728 assert inst not in instdisk
2731 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2732 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2733 compat.all(isinstance(s, (tuple, list)) and
2734 len(s) == 2 for s in statuses)
2735 for inst, nnames in instdisk.items()
2736 for nname, statuses in nnames.items())
2737 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2742 def _SshNodeSelector(group_uuid, all_nodes):
2743 """Create endless iterators for all potential SSH check hosts.
2746 nodes = [node for node in all_nodes
2747 if (node.group != group_uuid and
2749 keyfunc = operator.attrgetter("group")
2751 return map(itertools.cycle,
2752 [sorted(map(operator.attrgetter("name"), names))
2753 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2757 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2758 """Choose which nodes should talk to which other nodes.
2760 We will make nodes contact all nodes in their group, and one node from
2763 @warning: This algorithm has a known issue if one node group is much
2764 smaller than others (e.g. just one node). In such a case all other
2765 nodes will talk to the single node.
2768 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2769 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2771 return (online_nodes,
2772 dict((name, sorted([i.next() for i in sel]))
2773 for name in online_nodes))
2775 def BuildHooksEnv(self):
2778 Cluster-Verify hooks just ran in the post phase and their failure makes
2779 the output be logged in the verify output and the verification to fail.
2783 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2786 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2787 for node in self.my_node_info.values())
2791 def BuildHooksNodes(self):
2792 """Build hooks nodes.
2795 return ([], self.my_node_names)
2797 def Exec(self, feedback_fn):
2798 """Verify integrity of the node group, performing various test on nodes.
2801 # This method has too many local variables. pylint: disable=R0914
2802 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2804 if not self.my_node_names:
2806 feedback_fn("* Empty node group, skipping verification")
2810 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2811 verbose = self.op.verbose
2812 self._feedback_fn = feedback_fn
2814 vg_name = self.cfg.GetVGName()
2815 drbd_helper = self.cfg.GetDRBDHelper()
2816 cluster = self.cfg.GetClusterInfo()
2817 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2818 hypervisors = cluster.enabled_hypervisors
2819 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2821 i_non_redundant = [] # Non redundant instances
2822 i_non_a_balanced = [] # Non auto-balanced instances
2823 i_offline = 0 # Count of offline instances
2824 n_offline = 0 # Count of offline nodes
2825 n_drained = 0 # Count of nodes being drained
2826 node_vol_should = {}
2828 # FIXME: verify OS list
2831 filemap = _ComputeAncillaryFiles(cluster, False)
2833 # do local checksums
2834 master_node = self.master_node = self.cfg.GetMasterNode()
2835 master_ip = self.cfg.GetMasterIP()
2837 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2840 if self.cfg.GetUseExternalMipScript():
2841 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2843 node_verify_param = {
2844 constants.NV_FILELIST:
2845 utils.UniqueSequence(filename
2846 for files in filemap
2847 for filename in files),
2848 constants.NV_NODELIST:
2849 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2850 self.all_node_info.values()),
2851 constants.NV_HYPERVISOR: hypervisors,
2852 constants.NV_HVPARAMS:
2853 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2854 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2855 for node in node_data_list
2856 if not node.offline],
2857 constants.NV_INSTANCELIST: hypervisors,
2858 constants.NV_VERSION: None,
2859 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2860 constants.NV_NODESETUP: None,
2861 constants.NV_TIME: None,
2862 constants.NV_MASTERIP: (master_node, master_ip),
2863 constants.NV_OSLIST: None,
2864 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2865 constants.NV_USERSCRIPTS: user_scripts,
2868 if vg_name is not None:
2869 node_verify_param[constants.NV_VGLIST] = None
2870 node_verify_param[constants.NV_LVLIST] = vg_name
2871 node_verify_param[constants.NV_PVLIST] = [vg_name]
2872 node_verify_param[constants.NV_DRBDLIST] = None
2875 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2878 # FIXME: this needs to be changed per node-group, not cluster-wide
2880 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2881 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2882 bridges.add(default_nicpp[constants.NIC_LINK])
2883 for instance in self.my_inst_info.values():
2884 for nic in instance.nics:
2885 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2886 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2887 bridges.add(full_nic[constants.NIC_LINK])
2890 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2892 # Build our expected cluster state
2893 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2895 vm_capable=node.vm_capable))
2896 for node in node_data_list)
2900 for node in self.all_node_info.values():
2901 path = _SupportsOob(self.cfg, node)
2902 if path and path not in oob_paths:
2903 oob_paths.append(path)
2906 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2908 for instance in self.my_inst_names:
2909 inst_config = self.my_inst_info[instance]
2911 for nname in inst_config.all_nodes:
2912 if nname not in node_image:
2913 gnode = self.NodeImage(name=nname)
2914 gnode.ghost = (nname not in self.all_node_info)
2915 node_image[nname] = gnode
2917 inst_config.MapLVsByNode(node_vol_should)
2919 pnode = inst_config.primary_node
2920 node_image[pnode].pinst.append(instance)
2922 for snode in inst_config.secondary_nodes:
2923 nimg = node_image[snode]
2924 nimg.sinst.append(instance)
2925 if pnode not in nimg.sbp:
2926 nimg.sbp[pnode] = []
2927 nimg.sbp[pnode].append(instance)
2929 # At this point, we have the in-memory data structures complete,
2930 # except for the runtime information, which we'll gather next
2932 # Due to the way our RPC system works, exact response times cannot be
2933 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2934 # time before and after executing the request, we can at least have a time
2936 nvinfo_starttime = time.time()
2937 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2939 self.cfg.GetClusterName())
2940 nvinfo_endtime = time.time()
2942 if self.extra_lv_nodes and vg_name is not None:
2944 self.rpc.call_node_verify(self.extra_lv_nodes,
2945 {constants.NV_LVLIST: vg_name},
2946 self.cfg.GetClusterName())
2948 extra_lv_nvinfo = {}
2950 all_drbd_map = self.cfg.ComputeDRBDMap()
2952 feedback_fn("* Gathering disk information (%s nodes)" %
2953 len(self.my_node_names))
2954 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2957 feedback_fn("* Verifying configuration file consistency")
2959 # If not all nodes are being checked, we need to make sure the master node
2960 # and a non-checked vm_capable node are in the list.
2961 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2963 vf_nvinfo = all_nvinfo.copy()
2964 vf_node_info = list(self.my_node_info.values())
2965 additional_nodes = []
2966 if master_node not in self.my_node_info:
2967 additional_nodes.append(master_node)
2968 vf_node_info.append(self.all_node_info[master_node])
2969 # Add the first vm_capable node we find which is not included
2970 for node in absent_nodes:
2971 nodeinfo = self.all_node_info[node]
2972 if nodeinfo.vm_capable and not nodeinfo.offline:
2973 additional_nodes.append(node)
2974 vf_node_info.append(self.all_node_info[node])
2976 key = constants.NV_FILELIST
2977 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2978 {key: node_verify_param[key]},
2979 self.cfg.GetClusterName()))
2981 vf_nvinfo = all_nvinfo
2982 vf_node_info = self.my_node_info.values()
2984 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2986 feedback_fn("* Verifying node status")
2990 for node_i in node_data_list:
2992 nimg = node_image[node]
2996 feedback_fn("* Skipping offline node %s" % (node,))
3000 if node == master_node:
3002 elif node_i.master_candidate:
3003 ntype = "master candidate"
3004 elif node_i.drained:
3010 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3012 msg = all_nvinfo[node].fail_msg
3013 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3016 nimg.rpc_fail = True
3019 nresult = all_nvinfo[node].payload
3021 nimg.call_ok = self._VerifyNode(node_i, nresult)
3022 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3023 self._VerifyNodeNetwork(node_i, nresult)
3024 self._VerifyNodeUserScripts(node_i, nresult)
3025 self._VerifyOob(node_i, nresult)
3028 self._VerifyNodeLVM(node_i, nresult, vg_name)
3029 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3032 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3033 self._UpdateNodeInstances(node_i, nresult, nimg)
3034 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3035 self._UpdateNodeOS(node_i, nresult, nimg)
3037 if not nimg.os_fail:
3038 if refos_img is None:
3040 self._VerifyNodeOS(node_i, nimg, refos_img)
3041 self._VerifyNodeBridges(node_i, nresult, bridges)
3043 # Check whether all running instancies are primary for the node. (This
3044 # can no longer be done from _VerifyInstance below, since some of the
3045 # wrong instances could be from other node groups.)
3046 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3048 for inst in non_primary_inst:
3049 # FIXME: investigate best way to handle offline insts
3050 if inst.admin_state == constants.ADMINST_OFFLINE:
3052 feedback_fn("* Skipping offline instance %s" % inst.name)
3055 test = inst in self.all_inst_info
3056 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3057 "instance should not run on node %s", node_i.name)
3058 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3059 "node is running unknown instance %s", inst)
3061 for node, result in extra_lv_nvinfo.items():
3062 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3063 node_image[node], vg_name)
3065 feedback_fn("* Verifying instance status")
3066 for instance in self.my_inst_names:
3068 feedback_fn("* Verifying instance %s" % instance)
3069 inst_config = self.my_inst_info[instance]
3070 self._VerifyInstance(instance, inst_config, node_image,
3072 inst_nodes_offline = []
3074 pnode = inst_config.primary_node
3075 pnode_img = node_image[pnode]
3076 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3077 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3078 " primary node failed", instance)
3080 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3082 constants.CV_EINSTANCEBADNODE, instance,
3083 "instance is marked as running and lives on offline node %s",
3084 inst_config.primary_node)
3086 # If the instance is non-redundant we cannot survive losing its primary
3087 # node, so we are not N+1 compliant. On the other hand we have no disk
3088 # templates with more than one secondary so that situation is not well
3090 # FIXME: does not support file-backed instances
3091 if not inst_config.secondary_nodes:
3092 i_non_redundant.append(instance)
3094 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3095 constants.CV_EINSTANCELAYOUT,
3096 instance, "instance has multiple secondary nodes: %s",
3097 utils.CommaJoin(inst_config.secondary_nodes),
3098 code=self.ETYPE_WARNING)
3100 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3101 pnode = inst_config.primary_node
3102 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3103 instance_groups = {}
3105 for node in instance_nodes:
3106 instance_groups.setdefault(self.all_node_info[node].group,
3110 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3111 # Sort so that we always list the primary node first.
3112 for group, nodes in sorted(instance_groups.items(),
3113 key=lambda (_, nodes): pnode in nodes,
3116 self._ErrorIf(len(instance_groups) > 1,
3117 constants.CV_EINSTANCESPLITGROUPS,
3118 instance, "instance has primary and secondary nodes in"
3119 " different groups: %s", utils.CommaJoin(pretty_list),
3120 code=self.ETYPE_WARNING)
3122 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3123 i_non_a_balanced.append(instance)
3125 for snode in inst_config.secondary_nodes:
3126 s_img = node_image[snode]
3127 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3128 snode, "instance %s, connection to secondary node failed",
3132 inst_nodes_offline.append(snode)
3134 # warn that the instance lives on offline nodes
3135 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3136 "instance has offline secondary node(s) %s",
3137 utils.CommaJoin(inst_nodes_offline))
3138 # ... or ghost/non-vm_capable nodes
3139 for node in inst_config.all_nodes:
3140 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3141 instance, "instance lives on ghost node %s", node)
3142 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3143 instance, "instance lives on non-vm_capable node %s", node)
3145 feedback_fn("* Verifying orphan volumes")
3146 reserved = utils.FieldSet(*cluster.reserved_lvs)
3148 # We will get spurious "unknown volume" warnings if any node of this group
3149 # is secondary for an instance whose primary is in another group. To avoid
3150 # them, we find these instances and add their volumes to node_vol_should.
3151 for inst in self.all_inst_info.values():
3152 for secondary in inst.secondary_nodes:
3153 if (secondary in self.my_node_info
3154 and inst.name not in self.my_inst_info):
3155 inst.MapLVsByNode(node_vol_should)
3158 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3160 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3161 feedback_fn("* Verifying N+1 Memory redundancy")
3162 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3164 feedback_fn("* Other Notes")
3166 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3167 % len(i_non_redundant))
3169 if i_non_a_balanced:
3170 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3171 % len(i_non_a_balanced))
3174 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3177 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3180 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3184 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3185 """Analyze the post-hooks' result
3187 This method analyses the hook result, handles it, and sends some
3188 nicely-formatted feedback back to the user.
3190 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3191 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3192 @param hooks_results: the results of the multi-node hooks rpc call
3193 @param feedback_fn: function used send feedback back to the caller
3194 @param lu_result: previous Exec result
3195 @return: the new Exec result, based on the previous result
3199 # We only really run POST phase hooks, only for non-empty groups,
3200 # and are only interested in their results
3201 if not self.my_node_names:
3204 elif phase == constants.HOOKS_PHASE_POST:
3205 # Used to change hooks' output to proper indentation
3206 feedback_fn("* Hooks Results")
3207 assert hooks_results, "invalid result from hooks"
3209 for node_name in hooks_results:
3210 res = hooks_results[node_name]
3212 test = msg and not res.offline
3213 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3214 "Communication failure in hooks execution: %s", msg)
3215 if res.offline or msg:
3216 # No need to investigate payload if node is offline or gave
3219 for script, hkr, output in res.payload:
3220 test = hkr == constants.HKR_FAIL
3221 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3222 "Script %s failed, output:", script)
3224 output = self._HOOKS_INDENT_RE.sub(" ", output)
3225 feedback_fn("%s" % output)
3231 class LUClusterVerifyDisks(NoHooksLU):
3232 """Verifies the cluster disks status.
3237 def ExpandNames(self):
3238 self.share_locks = _ShareAll()
3239 self.needed_locks = {
3240 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3243 def Exec(self, feedback_fn):
3244 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3246 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3247 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3248 for group in group_names])
3251 class LUGroupVerifyDisks(NoHooksLU):
3252 """Verifies the status of all disks in a node group.
3257 def ExpandNames(self):
3258 # Raises errors.OpPrereqError on its own if group can't be found
3259 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3261 self.share_locks = _ShareAll()
3262 self.needed_locks = {
3263 locking.LEVEL_INSTANCE: [],
3264 locking.LEVEL_NODEGROUP: [],
3265 locking.LEVEL_NODE: [],
3268 def DeclareLocks(self, level):
3269 if level == locking.LEVEL_INSTANCE:
3270 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3272 # Lock instances optimistically, needs verification once node and group
3273 # locks have been acquired
3274 self.needed_locks[locking.LEVEL_INSTANCE] = \
3275 self.cfg.GetNodeGroupInstances(self.group_uuid)
3277 elif level == locking.LEVEL_NODEGROUP:
3278 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3280 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3281 set([self.group_uuid] +
3282 # Lock all groups used by instances optimistically; this requires
3283 # going via the node before it's locked, requiring verification
3286 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3287 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3289 elif level == locking.LEVEL_NODE:
3290 # This will only lock the nodes in the group to be verified which contain
3292 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3293 self._LockInstancesNodes()
3295 # Lock all nodes in group to be verified
3296 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3297 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3298 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3300 def CheckPrereq(self):
3301 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3302 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3303 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3305 assert self.group_uuid in owned_groups
3307 # Check if locked instances are still correct
3308 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3310 # Get instance information
3311 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3313 # Check if node groups for locked instances are still correct
3314 for (instance_name, inst) in self.instances.items():
3315 assert owned_nodes.issuperset(inst.all_nodes), \
3316 "Instance %s's nodes changed while we kept the lock" % instance_name
3318 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3321 assert self.group_uuid in inst_groups, \
3322 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3324 def Exec(self, feedback_fn):
3325 """Verify integrity of cluster disks.
3327 @rtype: tuple of three items
3328 @return: a tuple of (dict of node-to-node_error, list of instances
3329 which need activate-disks, dict of instance: (node, volume) for
3334 res_instances = set()
3337 nv_dict = _MapInstanceDisksToNodes([inst
3338 for inst in self.instances.values()
3339 if inst.admin_state == constants.ADMINST_UP])
3342 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3343 set(self.cfg.GetVmCapableNodeList()))
3345 node_lvs = self.rpc.call_lv_list(nodes, [])
3347 for (node, node_res) in node_lvs.items():
3348 if node_res.offline:
3351 msg = node_res.fail_msg
3353 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3354 res_nodes[node] = msg
3357 for lv_name, (_, _, lv_online) in node_res.payload.items():
3358 inst = nv_dict.pop((node, lv_name), None)
3359 if not (lv_online or inst is None):
3360 res_instances.add(inst)
3362 # any leftover items in nv_dict are missing LVs, let's arrange the data
3364 for key, inst in nv_dict.iteritems():
3365 res_missing.setdefault(inst, []).append(list(key))
3367 return (res_nodes, list(res_instances), res_missing)
3370 class LUClusterRepairDiskSizes(NoHooksLU):
3371 """Verifies the cluster disks sizes.
3376 def ExpandNames(self):
3377 if self.op.instances:
3378 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3379 self.needed_locks = {
3380 locking.LEVEL_NODE_RES: [],
3381 locking.LEVEL_INSTANCE: self.wanted_names,
3383 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3385 self.wanted_names = None
3386 self.needed_locks = {
3387 locking.LEVEL_NODE_RES: locking.ALL_SET,
3388 locking.LEVEL_INSTANCE: locking.ALL_SET,
3390 self.share_locks = {
3391 locking.LEVEL_NODE_RES: 1,
3392 locking.LEVEL_INSTANCE: 0,
3395 def DeclareLocks(self, level):
3396 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3397 self._LockInstancesNodes(primary_only=True, level=level)
3399 def CheckPrereq(self):
3400 """Check prerequisites.
3402 This only checks the optional instance list against the existing names.
3405 if self.wanted_names is None:
3406 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3408 self.wanted_instances = \
3409 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3411 def _EnsureChildSizes(self, disk):
3412 """Ensure children of the disk have the needed disk size.
3414 This is valid mainly for DRBD8 and fixes an issue where the
3415 children have smaller disk size.
3417 @param disk: an L{ganeti.objects.Disk} object
3420 if disk.dev_type == constants.LD_DRBD8:
3421 assert disk.children, "Empty children for DRBD8?"
3422 fchild = disk.children[0]
3423 mismatch = fchild.size < disk.size
3425 self.LogInfo("Child disk has size %d, parent %d, fixing",
3426 fchild.size, disk.size)
3427 fchild.size = disk.size
3429 # and we recurse on this child only, not on the metadev
3430 return self._EnsureChildSizes(fchild) or mismatch
3434 def Exec(self, feedback_fn):
3435 """Verify the size of cluster disks.
3438 # TODO: check child disks too
3439 # TODO: check differences in size between primary/secondary nodes
3441 for instance in self.wanted_instances:
3442 pnode = instance.primary_node
3443 if pnode not in per_node_disks:
3444 per_node_disks[pnode] = []
3445 for idx, disk in enumerate(instance.disks):
3446 per_node_disks[pnode].append((instance, idx, disk))
3448 assert not (frozenset(per_node_disks.keys()) -
3449 self.owned_locks(locking.LEVEL_NODE_RES)), \
3450 "Not owning correct locks"
3451 assert not self.owned_locks(locking.LEVEL_NODE)
3454 for node, dskl in per_node_disks.items():
3455 newl = [v[2].Copy() for v in dskl]
3457 self.cfg.SetDiskID(dsk, node)
3458 result = self.rpc.call_blockdev_getsize(node, newl)
3460 self.LogWarning("Failure in blockdev_getsize call to node"
3461 " %s, ignoring", node)
3463 if len(result.payload) != len(dskl):
3464 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3465 " result.payload=%s", node, len(dskl), result.payload)
3466 self.LogWarning("Invalid result from node %s, ignoring node results",
3469 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3471 self.LogWarning("Disk %d of instance %s did not return size"
3472 " information, ignoring", idx, instance.name)
3474 if not isinstance(size, (int, long)):
3475 self.LogWarning("Disk %d of instance %s did not return valid"
3476 " size information, ignoring", idx, instance.name)
3479 if size != disk.size:
3480 self.LogInfo("Disk %d of instance %s has mismatched size,"
3481 " correcting: recorded %d, actual %d", idx,
3482 instance.name, disk.size, size)
3484 self.cfg.Update(instance, feedback_fn)
3485 changed.append((instance.name, idx, size))
3486 if self._EnsureChildSizes(disk):
3487 self.cfg.Update(instance, feedback_fn)
3488 changed.append((instance.name, idx, disk.size))
3492 class LUClusterRename(LogicalUnit):
3493 """Rename the cluster.
3496 HPATH = "cluster-rename"
3497 HTYPE = constants.HTYPE_CLUSTER
3499 def BuildHooksEnv(self):
3504 "OP_TARGET": self.cfg.GetClusterName(),
3505 "NEW_NAME": self.op.name,
3508 def BuildHooksNodes(self):
3509 """Build hooks nodes.
3512 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3514 def CheckPrereq(self):
3515 """Verify that the passed name is a valid one.
3518 hostname = netutils.GetHostname(name=self.op.name,
3519 family=self.cfg.GetPrimaryIPFamily())
3521 new_name = hostname.name
3522 self.ip = new_ip = hostname.ip
3523 old_name = self.cfg.GetClusterName()
3524 old_ip = self.cfg.GetMasterIP()
3525 if new_name == old_name and new_ip == old_ip:
3526 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3527 " cluster has changed",
3529 if new_ip != old_ip:
3530 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3531 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3532 " reachable on the network" %
3533 new_ip, errors.ECODE_NOTUNIQUE)
3535 self.op.name = new_name
3537 def Exec(self, feedback_fn):
3538 """Rename the cluster.
3541 clustername = self.op.name
3544 # shutdown the master IP
3545 master_params = self.cfg.GetMasterNetworkParameters()
3546 ems = self.cfg.GetUseExternalMipScript()
3547 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3549 result.Raise("Could not disable the master role")
3552 cluster = self.cfg.GetClusterInfo()
3553 cluster.cluster_name = clustername
3554 cluster.master_ip = new_ip
3555 self.cfg.Update(cluster, feedback_fn)
3557 # update the known hosts file
3558 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3559 node_list = self.cfg.GetOnlineNodeList()
3561 node_list.remove(master_params.name)
3564 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3566 master_params.ip = new_ip
3567 result = self.rpc.call_node_activate_master_ip(master_params.name,
3569 msg = result.fail_msg
3571 self.LogWarning("Could not re-enable the master role on"
3572 " the master, please restart manually: %s", msg)
3577 def _ValidateNetmask(cfg, netmask):
3578 """Checks if a netmask is valid.
3580 @type cfg: L{config.ConfigWriter}
3581 @param cfg: The cluster configuration
3583 @param netmask: the netmask to be verified
3584 @raise errors.OpPrereqError: if the validation fails
3587 ip_family = cfg.GetPrimaryIPFamily()
3589 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3590 except errors.ProgrammerError:
3591 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3593 if not ipcls.ValidateNetmask(netmask):
3594 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3598 class LUClusterSetParams(LogicalUnit):
3599 """Change the parameters of the cluster.
3602 HPATH = "cluster-modify"
3603 HTYPE = constants.HTYPE_CLUSTER
3606 def CheckArguments(self):
3610 if self.op.uid_pool:
3611 uidpool.CheckUidPool(self.op.uid_pool)
3613 if self.op.add_uids:
3614 uidpool.CheckUidPool(self.op.add_uids)
3616 if self.op.remove_uids:
3617 uidpool.CheckUidPool(self.op.remove_uids)
3619 if self.op.master_netmask is not None:
3620 _ValidateNetmask(self.cfg, self.op.master_netmask)
3622 if self.op.diskparams:
3623 for dt_params in self.op.diskparams.values():
3624 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3626 def ExpandNames(self):
3627 # FIXME: in the future maybe other cluster params won't require checking on
3628 # all nodes to be modified.
3629 self.needed_locks = {
3630 locking.LEVEL_NODE: locking.ALL_SET,
3632 self.share_locks[locking.LEVEL_NODE] = 1
3634 def BuildHooksEnv(self):
3639 "OP_TARGET": self.cfg.GetClusterName(),
3640 "NEW_VG_NAME": self.op.vg_name,
3643 def BuildHooksNodes(self):
3644 """Build hooks nodes.
3647 mn = self.cfg.GetMasterNode()
3650 def CheckPrereq(self):
3651 """Check prerequisites.
3653 This checks whether the given params don't conflict and
3654 if the given volume group is valid.
3657 if self.op.vg_name is not None and not self.op.vg_name:
3658 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3659 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3660 " instances exist", errors.ECODE_INVAL)
3662 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3663 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3664 raise errors.OpPrereqError("Cannot disable drbd helper while"
3665 " drbd-based instances exist",
3668 node_list = self.owned_locks(locking.LEVEL_NODE)
3670 # if vg_name not None, checks given volume group on all nodes
3672 vglist = self.rpc.call_vg_list(node_list)
3673 for node in node_list:
3674 msg = vglist[node].fail_msg
3676 # ignoring down node
3677 self.LogWarning("Error while gathering data on node %s"
3678 " (ignoring node): %s", node, msg)
3680 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3682 constants.MIN_VG_SIZE)
3684 raise errors.OpPrereqError("Error on node '%s': %s" %
3685 (node, vgstatus), errors.ECODE_ENVIRON)
3687 if self.op.drbd_helper:
3688 # checks given drbd helper on all nodes
3689 helpers = self.rpc.call_drbd_helper(node_list)
3690 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3692 self.LogInfo("Not checking drbd helper on offline node %s", node)
3694 msg = helpers[node].fail_msg
3696 raise errors.OpPrereqError("Error checking drbd helper on node"
3697 " '%s': %s" % (node, msg),
3698 errors.ECODE_ENVIRON)
3699 node_helper = helpers[node].payload
3700 if node_helper != self.op.drbd_helper:
3701 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3702 (node, node_helper), errors.ECODE_ENVIRON)
3704 self.cluster = cluster = self.cfg.GetClusterInfo()
3705 # validate params changes
3706 if self.op.beparams:
3707 objects.UpgradeBeParams(self.op.beparams)
3708 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3709 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3711 if self.op.ndparams:
3712 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3713 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3715 # TODO: we need a more general way to handle resetting
3716 # cluster-level parameters to default values
3717 if self.new_ndparams["oob_program"] == "":
3718 self.new_ndparams["oob_program"] = \
3719 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3721 if self.op.hv_state:
3722 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3723 self.cluster.hv_state_static)
3724 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3725 for hv, values in new_hv_state.items())
3727 if self.op.disk_state:
3728 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3729 self.cluster.disk_state_static)
3730 self.new_disk_state = \
3731 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3732 for name, values in svalues.items()))
3733 for storage, svalues in new_disk_state.items())
3737 for key, value in self.op.ipolicy.items():
3738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3739 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3741 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3742 self.new_ipolicy = ipolicy
3744 if self.op.nicparams:
3745 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3746 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3747 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3750 # check all instances for consistency
3751 for instance in self.cfg.GetAllInstancesInfo().values():
3752 for nic_idx, nic in enumerate(instance.nics):
3753 params_copy = copy.deepcopy(nic.nicparams)
3754 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3756 # check parameter syntax
3758 objects.NIC.CheckParameterSyntax(params_filled)
3759 except errors.ConfigurationError, err:
3760 nic_errors.append("Instance %s, nic/%d: %s" %
3761 (instance.name, nic_idx, err))
3763 # if we're moving instances to routed, check that they have an ip
3764 target_mode = params_filled[constants.NIC_MODE]
3765 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3766 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3767 " address" % (instance.name, nic_idx))
3769 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3770 "\n".join(nic_errors))
3772 # hypervisor list/parameters
3773 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3774 if self.op.hvparams:
3775 for hv_name, hv_dict in self.op.hvparams.items():
3776 if hv_name not in self.new_hvparams:
3777 self.new_hvparams[hv_name] = hv_dict
3779 self.new_hvparams[hv_name].update(hv_dict)
3781 # disk template parameters
3782 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3783 if self.op.diskparams:
3784 for dt_name, dt_params in self.op.diskparams.items():
3785 if dt_name not in self.op.diskparams:
3786 self.new_diskparams[dt_name] = dt_params
3788 self.new_diskparams[dt_name].update(dt_params)
3790 # os hypervisor parameters
3791 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3793 for os_name, hvs in self.op.os_hvp.items():
3794 if os_name not in self.new_os_hvp:
3795 self.new_os_hvp[os_name] = hvs
3797 for hv_name, hv_dict in hvs.items():
3798 if hv_name not in self.new_os_hvp[os_name]:
3799 self.new_os_hvp[os_name][hv_name] = hv_dict
3801 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3804 self.new_osp = objects.FillDict(cluster.osparams, {})
3805 if self.op.osparams:
3806 for os_name, osp in self.op.osparams.items():
3807 if os_name not in self.new_osp:
3808 self.new_osp[os_name] = {}
3810 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3813 if not self.new_osp[os_name]:
3814 # we removed all parameters
3815 del self.new_osp[os_name]
3817 # check the parameter validity (remote check)
3818 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3819 os_name, self.new_osp[os_name])
3821 # changes to the hypervisor list
3822 if self.op.enabled_hypervisors is not None:
3823 self.hv_list = self.op.enabled_hypervisors
3824 for hv in self.hv_list:
3825 # if the hypervisor doesn't already exist in the cluster
3826 # hvparams, we initialize it to empty, and then (in both
3827 # cases) we make sure to fill the defaults, as we might not
3828 # have a complete defaults list if the hypervisor wasn't
3830 if hv not in new_hvp:
3832 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3833 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3835 self.hv_list = cluster.enabled_hypervisors
3837 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3838 # either the enabled list has changed, or the parameters have, validate
3839 for hv_name, hv_params in self.new_hvparams.items():
3840 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3841 (self.op.enabled_hypervisors and
3842 hv_name in self.op.enabled_hypervisors)):
3843 # either this is a new hypervisor, or its parameters have changed
3844 hv_class = hypervisor.GetHypervisor(hv_name)
3845 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3846 hv_class.CheckParameterSyntax(hv_params)
3847 _CheckHVParams(self, node_list, hv_name, hv_params)
3850 # no need to check any newly-enabled hypervisors, since the
3851 # defaults have already been checked in the above code-block
3852 for os_name, os_hvp in self.new_os_hvp.items():
3853 for hv_name, hv_params in os_hvp.items():
3854 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3855 # we need to fill in the new os_hvp on top of the actual hv_p
3856 cluster_defaults = self.new_hvparams.get(hv_name, {})
3857 new_osp = objects.FillDict(cluster_defaults, hv_params)
3858 hv_class = hypervisor.GetHypervisor(hv_name)
3859 hv_class.CheckParameterSyntax(new_osp)
3860 _CheckHVParams(self, node_list, hv_name, new_osp)
3862 if self.op.default_iallocator:
3863 alloc_script = utils.FindFile(self.op.default_iallocator,
3864 constants.IALLOCATOR_SEARCH_PATH,
3866 if alloc_script is None:
3867 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3868 " specified" % self.op.default_iallocator,
3871 def Exec(self, feedback_fn):
3872 """Change the parameters of the cluster.
3875 if self.op.vg_name is not None:
3876 new_volume = self.op.vg_name
3879 if new_volume != self.cfg.GetVGName():
3880 self.cfg.SetVGName(new_volume)
3882 feedback_fn("Cluster LVM configuration already in desired"
3883 " state, not changing")
3884 if self.op.drbd_helper is not None:
3885 new_helper = self.op.drbd_helper
3888 if new_helper != self.cfg.GetDRBDHelper():
3889 self.cfg.SetDRBDHelper(new_helper)
3891 feedback_fn("Cluster DRBD helper already in desired state,"
3893 if self.op.hvparams:
3894 self.cluster.hvparams = self.new_hvparams
3896 self.cluster.os_hvp = self.new_os_hvp
3897 if self.op.enabled_hypervisors is not None:
3898 self.cluster.hvparams = self.new_hvparams
3899 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3900 if self.op.beparams:
3901 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3902 if self.op.nicparams:
3903 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3905 self.cluster.ipolicy = self.new_ipolicy
3906 if self.op.osparams:
3907 self.cluster.osparams = self.new_osp
3908 if self.op.ndparams:
3909 self.cluster.ndparams = self.new_ndparams
3910 if self.op.diskparams:
3911 self.cluster.diskparams = self.new_diskparams
3912 if self.op.hv_state:
3913 self.cluster.hv_state_static = self.new_hv_state
3914 if self.op.disk_state:
3915 self.cluster.disk_state_static = self.new_disk_state
3917 if self.op.candidate_pool_size is not None:
3918 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3919 # we need to update the pool size here, otherwise the save will fail
3920 _AdjustCandidatePool(self, [])
3922 if self.op.maintain_node_health is not None:
3923 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3924 feedback_fn("Note: CONFD was disabled at build time, node health"
3925 " maintenance is not useful (still enabling it)")
3926 self.cluster.maintain_node_health = self.op.maintain_node_health
3928 if self.op.prealloc_wipe_disks is not None:
3929 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3931 if self.op.add_uids is not None:
3932 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3934 if self.op.remove_uids is not None:
3935 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3937 if self.op.uid_pool is not None:
3938 self.cluster.uid_pool = self.op.uid_pool
3940 if self.op.default_iallocator is not None:
3941 self.cluster.default_iallocator = self.op.default_iallocator
3943 if self.op.reserved_lvs is not None:
3944 self.cluster.reserved_lvs = self.op.reserved_lvs
3946 if self.op.use_external_mip_script is not None:
3947 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3949 def helper_os(aname, mods, desc):
3951 lst = getattr(self.cluster, aname)
3952 for key, val in mods:
3953 if key == constants.DDM_ADD:
3955 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3958 elif key == constants.DDM_REMOVE:
3962 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3964 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3966 if self.op.hidden_os:
3967 helper_os("hidden_os", self.op.hidden_os, "hidden")
3969 if self.op.blacklisted_os:
3970 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3972 if self.op.master_netdev:
3973 master_params = self.cfg.GetMasterNetworkParameters()
3974 ems = self.cfg.GetUseExternalMipScript()
3975 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3976 self.cluster.master_netdev)
3977 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3979 result.Raise("Could not disable the master ip")
3980 feedback_fn("Changing master_netdev from %s to %s" %
3981 (master_params.netdev, self.op.master_netdev))
3982 self.cluster.master_netdev = self.op.master_netdev
3984 if self.op.master_netmask:
3985 master_params = self.cfg.GetMasterNetworkParameters()
3986 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3987 result = self.rpc.call_node_change_master_netmask(master_params.name,
3988 master_params.netmask,
3989 self.op.master_netmask,
3991 master_params.netdev)
3993 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3996 self.cluster.master_netmask = self.op.master_netmask
3998 self.cfg.Update(self.cluster, feedback_fn)
4000 if self.op.master_netdev:
4001 master_params = self.cfg.GetMasterNetworkParameters()
4002 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4003 self.op.master_netdev)
4004 ems = self.cfg.GetUseExternalMipScript()
4005 result = self.rpc.call_node_activate_master_ip(master_params.name,
4008 self.LogWarning("Could not re-enable the master ip on"
4009 " the master, please restart manually: %s",
4013 def _UploadHelper(lu, nodes, fname):
4014 """Helper for uploading a file and showing warnings.
4017 if os.path.exists(fname):
4018 result = lu.rpc.call_upload_file(nodes, fname)
4019 for to_node, to_result in result.items():
4020 msg = to_result.fail_msg
4022 msg = ("Copy of file %s to node %s failed: %s" %
4023 (fname, to_node, msg))
4024 lu.proc.LogWarning(msg)
4027 def _ComputeAncillaryFiles(cluster, redist):
4028 """Compute files external to Ganeti which need to be consistent.
4030 @type redist: boolean
4031 @param redist: Whether to include files which need to be redistributed
4034 # Compute files for all nodes
4036 constants.SSH_KNOWN_HOSTS_FILE,
4037 constants.CONFD_HMAC_KEY,
4038 constants.CLUSTER_DOMAIN_SECRET_FILE,
4039 constants.SPICE_CERT_FILE,
4040 constants.SPICE_CACERT_FILE,
4041 constants.RAPI_USERS_FILE,
4045 files_all.update(constants.ALL_CERT_FILES)
4046 files_all.update(ssconf.SimpleStore().GetFileList())
4048 # we need to ship at least the RAPI certificate
4049 files_all.add(constants.RAPI_CERT_FILE)
4051 if cluster.modify_etc_hosts:
4052 files_all.add(constants.ETC_HOSTS)
4054 # Files which are optional, these must:
4055 # - be present in one other category as well
4056 # - either exist or not exist on all nodes of that category (mc, vm all)
4058 constants.RAPI_USERS_FILE,
4061 # Files which should only be on master candidates
4065 files_mc.add(constants.CLUSTER_CONF_FILE)
4067 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4069 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4071 # Files which should only be on VM-capable nodes
4072 files_vm = set(filename
4073 for hv_name in cluster.enabled_hypervisors
4074 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4076 files_opt |= set(filename
4077 for hv_name in cluster.enabled_hypervisors
4078 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4080 # Filenames in each category must be unique
4081 all_files_set = files_all | files_mc | files_vm
4082 assert (len(all_files_set) ==
4083 sum(map(len, [files_all, files_mc, files_vm]))), \
4084 "Found file listed in more than one file list"
4086 # Optional files must be present in one other category
4087 assert all_files_set.issuperset(files_opt), \
4088 "Optional file not in a different required list"
4090 return (files_all, files_opt, files_mc, files_vm)
4093 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4094 """Distribute additional files which are part of the cluster configuration.
4096 ConfigWriter takes care of distributing the config and ssconf files, but
4097 there are more files which should be distributed to all nodes. This function
4098 makes sure those are copied.
4100 @param lu: calling logical unit
4101 @param additional_nodes: list of nodes not in the config to distribute to
4102 @type additional_vm: boolean
4103 @param additional_vm: whether the additional nodes are vm-capable or not
4106 # Gather target nodes
4107 cluster = lu.cfg.GetClusterInfo()
4108 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4110 online_nodes = lu.cfg.GetOnlineNodeList()
4111 vm_nodes = lu.cfg.GetVmCapableNodeList()
4113 if additional_nodes is not None:
4114 online_nodes.extend(additional_nodes)
4116 vm_nodes.extend(additional_nodes)
4118 # Never distribute to master node
4119 for nodelist in [online_nodes, vm_nodes]:
4120 if master_info.name in nodelist:
4121 nodelist.remove(master_info.name)
4124 (files_all, _, files_mc, files_vm) = \
4125 _ComputeAncillaryFiles(cluster, True)
4127 # Never re-distribute configuration file from here
4128 assert not (constants.CLUSTER_CONF_FILE in files_all or
4129 constants.CLUSTER_CONF_FILE in files_vm)
4130 assert not files_mc, "Master candidates not handled in this function"
4133 (online_nodes, files_all),
4134 (vm_nodes, files_vm),
4138 for (node_list, files) in filemap:
4140 _UploadHelper(lu, node_list, fname)
4143 class LUClusterRedistConf(NoHooksLU):
4144 """Force the redistribution of cluster configuration.
4146 This is a very simple LU.
4151 def ExpandNames(self):
4152 self.needed_locks = {
4153 locking.LEVEL_NODE: locking.ALL_SET,
4155 self.share_locks[locking.LEVEL_NODE] = 1
4157 def Exec(self, feedback_fn):
4158 """Redistribute the configuration.
4161 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4162 _RedistributeAncillaryFiles(self)
4165 class LUClusterActivateMasterIp(NoHooksLU):
4166 """Activate the master IP on the master node.
4169 def Exec(self, feedback_fn):
4170 """Activate the master IP.
4173 master_params = self.cfg.GetMasterNetworkParameters()
4174 ems = self.cfg.GetUseExternalMipScript()
4175 result = self.rpc.call_node_activate_master_ip(master_params.name,
4177 result.Raise("Could not activate the master IP")
4180 class LUClusterDeactivateMasterIp(NoHooksLU):
4181 """Deactivate the master IP on the master node.
4184 def Exec(self, feedback_fn):
4185 """Deactivate the master IP.
4188 master_params = self.cfg.GetMasterNetworkParameters()
4189 ems = self.cfg.GetUseExternalMipScript()
4190 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4192 result.Raise("Could not deactivate the master IP")
4195 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4196 """Sleep and poll for an instance's disk to sync.
4199 if not instance.disks or disks is not None and not disks:
4202 disks = _ExpandCheckDisks(instance, disks)
4205 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4207 node = instance.primary_node
4210 lu.cfg.SetDiskID(dev, node)
4212 # TODO: Convert to utils.Retry
4215 degr_retries = 10 # in seconds, as we sleep 1 second each time
4219 cumul_degraded = False
4220 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4221 msg = rstats.fail_msg
4223 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4226 raise errors.RemoteError("Can't contact node %s for mirror data,"
4227 " aborting." % node)
4230 rstats = rstats.payload
4232 for i, mstat in enumerate(rstats):
4234 lu.LogWarning("Can't compute data for node %s/%s",
4235 node, disks[i].iv_name)
4238 cumul_degraded = (cumul_degraded or
4239 (mstat.is_degraded and mstat.sync_percent is None))
4240 if mstat.sync_percent is not None:
4242 if mstat.estimated_time is not None:
4243 rem_time = ("%s remaining (estimated)" %
4244 utils.FormatSeconds(mstat.estimated_time))
4245 max_time = mstat.estimated_time
4247 rem_time = "no time estimate"
4248 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4249 (disks[i].iv_name, mstat.sync_percent, rem_time))
4251 # if we're done but degraded, let's do a few small retries, to
4252 # make sure we see a stable and not transient situation; therefore
4253 # we force restart of the loop
4254 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4255 logging.info("Degraded disks found, %d retries left", degr_retries)
4263 time.sleep(min(60, max_time))
4266 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4267 return not cumul_degraded
4270 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4271 """Check that mirrors are not degraded.
4273 The ldisk parameter, if True, will change the test from the
4274 is_degraded attribute (which represents overall non-ok status for
4275 the device(s)) to the ldisk (representing the local storage status).
4278 lu.cfg.SetDiskID(dev, node)
4282 if on_primary or dev.AssembleOnSecondary():
4283 rstats = lu.rpc.call_blockdev_find(node, dev)
4284 msg = rstats.fail_msg
4286 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4288 elif not rstats.payload:
4289 lu.LogWarning("Can't find disk on node %s", node)
4293 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4295 result = result and not rstats.payload.is_degraded
4298 for child in dev.children:
4299 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4304 class LUOobCommand(NoHooksLU):
4305 """Logical unit for OOB handling.
4309 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4311 def ExpandNames(self):
4312 """Gather locks we need.
4315 if self.op.node_names:
4316 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4317 lock_names = self.op.node_names
4319 lock_names = locking.ALL_SET
4321 self.needed_locks = {
4322 locking.LEVEL_NODE: lock_names,
4325 def CheckPrereq(self):
4326 """Check prerequisites.
4329 - the node exists in the configuration
4332 Any errors are signaled by raising errors.OpPrereqError.
4336 self.master_node = self.cfg.GetMasterNode()
4338 assert self.op.power_delay >= 0.0
4340 if self.op.node_names:
4341 if (self.op.command in self._SKIP_MASTER and
4342 self.master_node in self.op.node_names):
4343 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4344 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4346 if master_oob_handler:
4347 additional_text = ("run '%s %s %s' if you want to operate on the"
4348 " master regardless") % (master_oob_handler,
4352 additional_text = "it does not support out-of-band operations"
4354 raise errors.OpPrereqError(("Operating on the master node %s is not"
4355 " allowed for %s; %s") %
4356 (self.master_node, self.op.command,
4357 additional_text), errors.ECODE_INVAL)
4359 self.op.node_names = self.cfg.GetNodeList()
4360 if self.op.command in self._SKIP_MASTER:
4361 self.op.node_names.remove(self.master_node)
4363 if self.op.command in self._SKIP_MASTER:
4364 assert self.master_node not in self.op.node_names
4366 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4368 raise errors.OpPrereqError("Node %s not found" % node_name,
4371 self.nodes.append(node)
4373 if (not self.op.ignore_status and
4374 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4375 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4376 " not marked offline") % node_name,
4379 def Exec(self, feedback_fn):
4380 """Execute OOB and return result if we expect any.
4383 master_node = self.master_node
4386 for idx, node in enumerate(utils.NiceSort(self.nodes,
4387 key=lambda node: node.name)):
4388 node_entry = [(constants.RS_NORMAL, node.name)]
4389 ret.append(node_entry)
4391 oob_program = _SupportsOob(self.cfg, node)
4394 node_entry.append((constants.RS_UNAVAIL, None))
4397 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4398 self.op.command, oob_program, node.name)
4399 result = self.rpc.call_run_oob(master_node, oob_program,
4400 self.op.command, node.name,
4404 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4405 node.name, result.fail_msg)
4406 node_entry.append((constants.RS_NODATA, None))
4409 self._CheckPayload(result)
4410 except errors.OpExecError, err:
4411 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4413 node_entry.append((constants.RS_NODATA, None))
4415 if self.op.command == constants.OOB_HEALTH:
4416 # For health we should log important events
4417 for item, status in result.payload:
4418 if status in [constants.OOB_STATUS_WARNING,
4419 constants.OOB_STATUS_CRITICAL]:
4420 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4421 item, node.name, status)
4423 if self.op.command == constants.OOB_POWER_ON:
4425 elif self.op.command == constants.OOB_POWER_OFF:
4426 node.powered = False
4427 elif self.op.command == constants.OOB_POWER_STATUS:
4428 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4429 if powered != node.powered:
4430 logging.warning(("Recorded power state (%s) of node '%s' does not"
4431 " match actual power state (%s)"), node.powered,
4434 # For configuration changing commands we should update the node
4435 if self.op.command in (constants.OOB_POWER_ON,
4436 constants.OOB_POWER_OFF):
4437 self.cfg.Update(node, feedback_fn)
4439 node_entry.append((constants.RS_NORMAL, result.payload))
4441 if (self.op.command == constants.OOB_POWER_ON and
4442 idx < len(self.nodes) - 1):
4443 time.sleep(self.op.power_delay)
4447 def _CheckPayload(self, result):
4448 """Checks if the payload is valid.
4450 @param result: RPC result
4451 @raises errors.OpExecError: If payload is not valid
4455 if self.op.command == constants.OOB_HEALTH:
4456 if not isinstance(result.payload, list):
4457 errs.append("command 'health' is expected to return a list but got %s" %
4458 type(result.payload))
4460 for item, status in result.payload:
4461 if status not in constants.OOB_STATUSES:
4462 errs.append("health item '%s' has invalid status '%s'" %
4465 if self.op.command == constants.OOB_POWER_STATUS:
4466 if not isinstance(result.payload, dict):
4467 errs.append("power-status is expected to return a dict but got %s" %
4468 type(result.payload))
4470 if self.op.command in [
4471 constants.OOB_POWER_ON,
4472 constants.OOB_POWER_OFF,
4473 constants.OOB_POWER_CYCLE,
4475 if result.payload is not None:
4476 errs.append("%s is expected to not return payload but got '%s'" %
4477 (self.op.command, result.payload))
4480 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4481 utils.CommaJoin(errs))
4484 class _OsQuery(_QueryBase):
4485 FIELDS = query.OS_FIELDS
4487 def ExpandNames(self, lu):
4488 # Lock all nodes in shared mode
4489 # Temporary removal of locks, should be reverted later
4490 # TODO: reintroduce locks when they are lighter-weight
4491 lu.needed_locks = {}
4492 #self.share_locks[locking.LEVEL_NODE] = 1
4493 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4495 # The following variables interact with _QueryBase._GetNames
4497 self.wanted = self.names
4499 self.wanted = locking.ALL_SET
4501 self.do_locking = self.use_locking
4503 def DeclareLocks(self, lu, level):
4507 def _DiagnoseByOS(rlist):
4508 """Remaps a per-node return list into an a per-os per-node dictionary
4510 @param rlist: a map with node names as keys and OS objects as values
4513 @return: a dictionary with osnames as keys and as value another
4514 map, with nodes as keys and tuples of (path, status, diagnose,
4515 variants, parameters, api_versions) as values, eg::
4517 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4518 (/srv/..., False, "invalid api")],
4519 "node2": [(/srv/..., True, "", [], [])]}
4524 # we build here the list of nodes that didn't fail the RPC (at RPC
4525 # level), so that nodes with a non-responding node daemon don't
4526 # make all OSes invalid
4527 good_nodes = [node_name for node_name in rlist
4528 if not rlist[node_name].fail_msg]
4529 for node_name, nr in rlist.items():
4530 if nr.fail_msg or not nr.payload:
4532 for (name, path, status, diagnose, variants,
4533 params, api_versions) in nr.payload:
4534 if name not in all_os:
4535 # build a list of nodes for this os containing empty lists
4536 # for each node in node_list
4538 for nname in good_nodes:
4539 all_os[name][nname] = []
4540 # convert params from [name, help] to (name, help)
4541 params = [tuple(v) for v in params]
4542 all_os[name][node_name].append((path, status, diagnose,
4543 variants, params, api_versions))
4546 def _GetQueryData(self, lu):
4547 """Computes the list of nodes and their attributes.
4550 # Locking is not used
4551 assert not (compat.any(lu.glm.is_owned(level)
4552 for level in locking.LEVELS
4553 if level != locking.LEVEL_CLUSTER) or
4554 self.do_locking or self.use_locking)
4556 valid_nodes = [node.name
4557 for node in lu.cfg.GetAllNodesInfo().values()
4558 if not node.offline and node.vm_capable]
4559 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4560 cluster = lu.cfg.GetClusterInfo()
4564 for (os_name, os_data) in pol.items():
4565 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4566 hidden=(os_name in cluster.hidden_os),
4567 blacklisted=(os_name in cluster.blacklisted_os))
4571 api_versions = set()
4573 for idx, osl in enumerate(os_data.values()):
4574 info.valid = bool(info.valid and osl and osl[0][1])
4578 (node_variants, node_params, node_api) = osl[0][3:6]
4581 variants.update(node_variants)
4582 parameters.update(node_params)
4583 api_versions.update(node_api)
4585 # Filter out inconsistent values
4586 variants.intersection_update(node_variants)
4587 parameters.intersection_update(node_params)
4588 api_versions.intersection_update(node_api)
4590 info.variants = list(variants)
4591 info.parameters = list(parameters)
4592 info.api_versions = list(api_versions)
4594 data[os_name] = info
4596 # Prepare data in requested order
4597 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4601 class LUOsDiagnose(NoHooksLU):
4602 """Logical unit for OS diagnose/query.
4608 def _BuildFilter(fields, names):
4609 """Builds a filter for querying OSes.
4612 name_filter = qlang.MakeSimpleFilter("name", names)
4614 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4615 # respective field is not requested
4616 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4617 for fname in ["hidden", "blacklisted"]
4618 if fname not in fields]
4619 if "valid" not in fields:
4620 status_filter.append([qlang.OP_TRUE, "valid"])
4623 status_filter.insert(0, qlang.OP_AND)
4625 status_filter = None
4627 if name_filter and status_filter:
4628 return [qlang.OP_AND, name_filter, status_filter]
4632 return status_filter
4634 def CheckArguments(self):
4635 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4636 self.op.output_fields, False)
4638 def ExpandNames(self):
4639 self.oq.ExpandNames(self)
4641 def Exec(self, feedback_fn):
4642 return self.oq.OldStyleQuery(self)
4645 class LUNodeRemove(LogicalUnit):
4646 """Logical unit for removing a node.
4649 HPATH = "node-remove"
4650 HTYPE = constants.HTYPE_NODE
4652 def BuildHooksEnv(self):
4655 This doesn't run on the target node in the pre phase as a failed
4656 node would then be impossible to remove.
4660 "OP_TARGET": self.op.node_name,
4661 "NODE_NAME": self.op.node_name,
4664 def BuildHooksNodes(self):
4665 """Build hooks nodes.
4668 all_nodes = self.cfg.GetNodeList()
4670 all_nodes.remove(self.op.node_name)
4672 logging.warning("Node '%s', which is about to be removed, was not found"
4673 " in the list of all nodes", self.op.node_name)
4674 return (all_nodes, all_nodes)
4676 def CheckPrereq(self):
4677 """Check prerequisites.
4680 - the node exists in the configuration
4681 - it does not have primary or secondary instances
4682 - it's not the master
4684 Any errors are signaled by raising errors.OpPrereqError.
4687 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4688 node = self.cfg.GetNodeInfo(self.op.node_name)
4689 assert node is not None
4691 masternode = self.cfg.GetMasterNode()
4692 if node.name == masternode:
4693 raise errors.OpPrereqError("Node is the master node, failover to another"
4694 " node is required", errors.ECODE_INVAL)
4696 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4697 if node.name in instance.all_nodes:
4698 raise errors.OpPrereqError("Instance %s is still running on the node,"
4699 " please remove first" % instance_name,
4701 self.op.node_name = node.name
4704 def Exec(self, feedback_fn):
4705 """Removes the node from the cluster.
4709 logging.info("Stopping the node daemon and removing configs from node %s",
4712 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4714 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4717 # Promote nodes to master candidate as needed
4718 _AdjustCandidatePool(self, exceptions=[node.name])
4719 self.context.RemoveNode(node.name)
4721 # Run post hooks on the node before it's removed
4722 _RunPostHook(self, node.name)
4724 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4725 msg = result.fail_msg
4727 self.LogWarning("Errors encountered on the remote node while leaving"
4728 " the cluster: %s", msg)
4730 # Remove node from our /etc/hosts
4731 if self.cfg.GetClusterInfo().modify_etc_hosts:
4732 master_node = self.cfg.GetMasterNode()
4733 result = self.rpc.call_etc_hosts_modify(master_node,
4734 constants.ETC_HOSTS_REMOVE,
4736 result.Raise("Can't update hosts file with new host data")
4737 _RedistributeAncillaryFiles(self)
4740 class _NodeQuery(_QueryBase):
4741 FIELDS = query.NODE_FIELDS
4743 def ExpandNames(self, lu):
4744 lu.needed_locks = {}
4745 lu.share_locks = _ShareAll()
4748 self.wanted = _GetWantedNodes(lu, self.names)
4750 self.wanted = locking.ALL_SET
4752 self.do_locking = (self.use_locking and
4753 query.NQ_LIVE in self.requested_data)
4756 # If any non-static field is requested we need to lock the nodes
4757 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4759 def DeclareLocks(self, lu, level):
4762 def _GetQueryData(self, lu):
4763 """Computes the list of nodes and their attributes.
4766 all_info = lu.cfg.GetAllNodesInfo()
4768 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4770 # Gather data as requested
4771 if query.NQ_LIVE in self.requested_data:
4772 # filter out non-vm_capable nodes
4773 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4775 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4776 [lu.cfg.GetHypervisorType()])
4777 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4778 for (name, nresult) in node_data.items()
4779 if not nresult.fail_msg and nresult.payload)
4783 if query.NQ_INST in self.requested_data:
4784 node_to_primary = dict([(name, set()) for name in nodenames])
4785 node_to_secondary = dict([(name, set()) for name in nodenames])
4787 inst_data = lu.cfg.GetAllInstancesInfo()
4789 for inst in inst_data.values():
4790 if inst.primary_node in node_to_primary:
4791 node_to_primary[inst.primary_node].add(inst.name)
4792 for secnode in inst.secondary_nodes:
4793 if secnode in node_to_secondary:
4794 node_to_secondary[secnode].add(inst.name)
4796 node_to_primary = None
4797 node_to_secondary = None
4799 if query.NQ_OOB in self.requested_data:
4800 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4801 for name, node in all_info.iteritems())
4805 if query.NQ_GROUP in self.requested_data:
4806 groups = lu.cfg.GetAllNodeGroupsInfo()
4810 return query.NodeQueryData([all_info[name] for name in nodenames],
4811 live_data, lu.cfg.GetMasterNode(),
4812 node_to_primary, node_to_secondary, groups,
4813 oob_support, lu.cfg.GetClusterInfo())
4816 class LUNodeQuery(NoHooksLU):
4817 """Logical unit for querying nodes.
4820 # pylint: disable=W0142
4823 def CheckArguments(self):
4824 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4825 self.op.output_fields, self.op.use_locking)
4827 def ExpandNames(self):
4828 self.nq.ExpandNames(self)
4830 def DeclareLocks(self, level):
4831 self.nq.DeclareLocks(self, level)
4833 def Exec(self, feedback_fn):
4834 return self.nq.OldStyleQuery(self)
4837 class LUNodeQueryvols(NoHooksLU):
4838 """Logical unit for getting volumes on node(s).
4842 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4843 _FIELDS_STATIC = utils.FieldSet("node")
4845 def CheckArguments(self):
4846 _CheckOutputFields(static=self._FIELDS_STATIC,
4847 dynamic=self._FIELDS_DYNAMIC,
4848 selected=self.op.output_fields)
4850 def ExpandNames(self):
4851 self.share_locks = _ShareAll()
4852 self.needed_locks = {}
4854 if not self.op.nodes:
4855 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4857 self.needed_locks[locking.LEVEL_NODE] = \
4858 _GetWantedNodes(self, self.op.nodes)
4860 def Exec(self, feedback_fn):
4861 """Computes the list of nodes and their attributes.
4864 nodenames = self.owned_locks(locking.LEVEL_NODE)
4865 volumes = self.rpc.call_node_volumes(nodenames)
4867 ilist = self.cfg.GetAllInstancesInfo()
4868 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4871 for node in nodenames:
4872 nresult = volumes[node]
4875 msg = nresult.fail_msg
4877 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4880 node_vols = sorted(nresult.payload,
4881 key=operator.itemgetter("dev"))
4883 for vol in node_vols:
4885 for field in self.op.output_fields:
4888 elif field == "phys":
4892 elif field == "name":
4894 elif field == "size":
4895 val = int(float(vol["size"]))
4896 elif field == "instance":
4897 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4899 raise errors.ParameterError(field)
4900 node_output.append(str(val))
4902 output.append(node_output)
4907 class LUNodeQueryStorage(NoHooksLU):
4908 """Logical unit for getting information on storage units on node(s).
4911 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4914 def CheckArguments(self):
4915 _CheckOutputFields(static=self._FIELDS_STATIC,
4916 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4917 selected=self.op.output_fields)
4919 def ExpandNames(self):
4920 self.share_locks = _ShareAll()
4921 self.needed_locks = {}
4924 self.needed_locks[locking.LEVEL_NODE] = \
4925 _GetWantedNodes(self, self.op.nodes)
4927 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4929 def Exec(self, feedback_fn):
4930 """Computes the list of nodes and their attributes.
4933 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4935 # Always get name to sort by
4936 if constants.SF_NAME in self.op.output_fields:
4937 fields = self.op.output_fields[:]
4939 fields = [constants.SF_NAME] + self.op.output_fields
4941 # Never ask for node or type as it's only known to the LU
4942 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4943 while extra in fields:
4944 fields.remove(extra)
4946 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4947 name_idx = field_idx[constants.SF_NAME]
4949 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4950 data = self.rpc.call_storage_list(self.nodes,
4951 self.op.storage_type, st_args,
4952 self.op.name, fields)
4956 for node in utils.NiceSort(self.nodes):
4957 nresult = data[node]
4961 msg = nresult.fail_msg
4963 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4966 rows = dict([(row[name_idx], row) for row in nresult.payload])
4968 for name in utils.NiceSort(rows.keys()):
4973 for field in self.op.output_fields:
4974 if field == constants.SF_NODE:
4976 elif field == constants.SF_TYPE:
4977 val = self.op.storage_type
4978 elif field in field_idx:
4979 val = row[field_idx[field]]
4981 raise errors.ParameterError(field)
4990 class _InstanceQuery(_QueryBase):
4991 FIELDS = query.INSTANCE_FIELDS
4993 def ExpandNames(self, lu):
4994 lu.needed_locks = {}
4995 lu.share_locks = _ShareAll()
4998 self.wanted = _GetWantedInstances(lu, self.names)
5000 self.wanted = locking.ALL_SET
5002 self.do_locking = (self.use_locking and
5003 query.IQ_LIVE in self.requested_data)
5005 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5007 lu.needed_locks[locking.LEVEL_NODE] = []
5008 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5010 self.do_grouplocks = (self.do_locking and
5011 query.IQ_NODES in self.requested_data)
5013 def DeclareLocks(self, lu, level):
5015 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5016 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5018 # Lock all groups used by instances optimistically; this requires going
5019 # via the node before it's locked, requiring verification later on
5020 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5022 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5023 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5024 elif level == locking.LEVEL_NODE:
5025 lu._LockInstancesNodes() # pylint: disable=W0212
5028 def _CheckGroupLocks(lu):
5029 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5030 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5032 # Check if node groups for locked instances are still correct
5033 for instance_name in owned_instances:
5034 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5036 def _GetQueryData(self, lu):
5037 """Computes the list of instances and their attributes.
5040 if self.do_grouplocks:
5041 self._CheckGroupLocks(lu)
5043 cluster = lu.cfg.GetClusterInfo()
5044 all_info = lu.cfg.GetAllInstancesInfo()
5046 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5048 instance_list = [all_info[name] for name in instance_names]
5049 nodes = frozenset(itertools.chain(*(inst.all_nodes
5050 for inst in instance_list)))
5051 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5054 wrongnode_inst = set()
5056 # Gather data as requested
5057 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5059 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5061 result = node_data[name]
5063 # offline nodes will be in both lists
5064 assert result.fail_msg
5065 offline_nodes.append(name)
5067 bad_nodes.append(name)
5068 elif result.payload:
5069 for inst in result.payload:
5070 if inst in all_info:
5071 if all_info[inst].primary_node == name:
5072 live_data.update(result.payload)
5074 wrongnode_inst.add(inst)
5076 # orphan instance; we don't list it here as we don't
5077 # handle this case yet in the output of instance listing
5078 logging.warning("Orphan instance '%s' found on node %s",
5080 # else no instance is alive
5084 if query.IQ_DISKUSAGE in self.requested_data:
5085 disk_usage = dict((inst.name,
5086 _ComputeDiskSize(inst.disk_template,
5087 [{constants.IDISK_SIZE: disk.size}
5088 for disk in inst.disks]))
5089 for inst in instance_list)
5093 if query.IQ_CONSOLE in self.requested_data:
5095 for inst in instance_list:
5096 if inst.name in live_data:
5097 # Instance is running
5098 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5100 consinfo[inst.name] = None
5101 assert set(consinfo.keys()) == set(instance_names)
5105 if query.IQ_NODES in self.requested_data:
5106 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5108 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5109 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5110 for uuid in set(map(operator.attrgetter("group"),
5116 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5117 disk_usage, offline_nodes, bad_nodes,
5118 live_data, wrongnode_inst, consinfo,
5122 class LUQuery(NoHooksLU):
5123 """Query for resources/items of a certain kind.
5126 # pylint: disable=W0142
5129 def CheckArguments(self):
5130 qcls = _GetQueryImplementation(self.op.what)
5132 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5134 def ExpandNames(self):
5135 self.impl.ExpandNames(self)
5137 def DeclareLocks(self, level):
5138 self.impl.DeclareLocks(self, level)
5140 def Exec(self, feedback_fn):
5141 return self.impl.NewStyleQuery(self)
5144 class LUQueryFields(NoHooksLU):
5145 """Query for resources/items of a certain kind.
5148 # pylint: disable=W0142
5151 def CheckArguments(self):
5152 self.qcls = _GetQueryImplementation(self.op.what)
5154 def ExpandNames(self):
5155 self.needed_locks = {}
5157 def Exec(self, feedback_fn):
5158 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5161 class LUNodeModifyStorage(NoHooksLU):
5162 """Logical unit for modifying a storage volume on a node.
5167 def CheckArguments(self):
5168 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5170 storage_type = self.op.storage_type
5173 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5175 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5176 " modified" % storage_type,
5179 diff = set(self.op.changes.keys()) - modifiable
5181 raise errors.OpPrereqError("The following fields can not be modified for"
5182 " storage units of type '%s': %r" %
5183 (storage_type, list(diff)),
5186 def ExpandNames(self):
5187 self.needed_locks = {
5188 locking.LEVEL_NODE: self.op.node_name,
5191 def Exec(self, feedback_fn):
5192 """Computes the list of nodes and their attributes.
5195 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5196 result = self.rpc.call_storage_modify(self.op.node_name,
5197 self.op.storage_type, st_args,
5198 self.op.name, self.op.changes)
5199 result.Raise("Failed to modify storage unit '%s' on %s" %
5200 (self.op.name, self.op.node_name))
5203 class LUNodeAdd(LogicalUnit):
5204 """Logical unit for adding node to the cluster.
5208 HTYPE = constants.HTYPE_NODE
5209 _NFLAGS = ["master_capable", "vm_capable"]
5211 def CheckArguments(self):
5212 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5213 # validate/normalize the node name
5214 self.hostname = netutils.GetHostname(name=self.op.node_name,
5215 family=self.primary_ip_family)
5216 self.op.node_name = self.hostname.name
5218 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5219 raise errors.OpPrereqError("Cannot readd the master node",
5222 if self.op.readd and self.op.group:
5223 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5224 " being readded", errors.ECODE_INVAL)
5226 def BuildHooksEnv(self):
5229 This will run on all nodes before, and on all nodes + the new node after.
5233 "OP_TARGET": self.op.node_name,
5234 "NODE_NAME": self.op.node_name,
5235 "NODE_PIP": self.op.primary_ip,
5236 "NODE_SIP": self.op.secondary_ip,
5237 "MASTER_CAPABLE": str(self.op.master_capable),
5238 "VM_CAPABLE": str(self.op.vm_capable),
5241 def BuildHooksNodes(self):
5242 """Build hooks nodes.
5245 # Exclude added node
5246 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5247 post_nodes = pre_nodes + [self.op.node_name, ]
5249 return (pre_nodes, post_nodes)
5251 def CheckPrereq(self):
5252 """Check prerequisites.
5255 - the new node is not already in the config
5257 - its parameters (single/dual homed) matches the cluster
5259 Any errors are signaled by raising errors.OpPrereqError.
5263 hostname = self.hostname
5264 node = hostname.name
5265 primary_ip = self.op.primary_ip = hostname.ip
5266 if self.op.secondary_ip is None:
5267 if self.primary_ip_family == netutils.IP6Address.family:
5268 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5269 " IPv4 address must be given as secondary",
5271 self.op.secondary_ip = primary_ip
5273 secondary_ip = self.op.secondary_ip
5274 if not netutils.IP4Address.IsValid(secondary_ip):
5275 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5276 " address" % secondary_ip, errors.ECODE_INVAL)
5278 node_list = cfg.GetNodeList()
5279 if not self.op.readd and node in node_list:
5280 raise errors.OpPrereqError("Node %s is already in the configuration" %
5281 node, errors.ECODE_EXISTS)
5282 elif self.op.readd and node not in node_list:
5283 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5286 self.changed_primary_ip = False
5288 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5289 if self.op.readd and node == existing_node_name:
5290 if existing_node.secondary_ip != secondary_ip:
5291 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5292 " address configuration as before",
5294 if existing_node.primary_ip != primary_ip:
5295 self.changed_primary_ip = True
5299 if (existing_node.primary_ip == primary_ip or
5300 existing_node.secondary_ip == primary_ip or
5301 existing_node.primary_ip == secondary_ip or
5302 existing_node.secondary_ip == secondary_ip):
5303 raise errors.OpPrereqError("New node ip address(es) conflict with"
5304 " existing node %s" % existing_node.name,
5305 errors.ECODE_NOTUNIQUE)
5307 # After this 'if' block, None is no longer a valid value for the
5308 # _capable op attributes
5310 old_node = self.cfg.GetNodeInfo(node)
5311 assert old_node is not None, "Can't retrieve locked node %s" % node
5312 for attr in self._NFLAGS:
5313 if getattr(self.op, attr) is None:
5314 setattr(self.op, attr, getattr(old_node, attr))
5316 for attr in self._NFLAGS:
5317 if getattr(self.op, attr) is None:
5318 setattr(self.op, attr, True)
5320 if self.op.readd and not self.op.vm_capable:
5321 pri, sec = cfg.GetNodeInstances(node)
5323 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5324 " flag set to false, but it already holds"
5325 " instances" % node,
5328 # check that the type of the node (single versus dual homed) is the
5329 # same as for the master
5330 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5331 master_singlehomed = myself.secondary_ip == myself.primary_ip
5332 newbie_singlehomed = secondary_ip == primary_ip
5333 if master_singlehomed != newbie_singlehomed:
5334 if master_singlehomed:
5335 raise errors.OpPrereqError("The master has no secondary ip but the"
5336 " new node has one",
5339 raise errors.OpPrereqError("The master has a secondary ip but the"
5340 " new node doesn't have one",
5343 # checks reachability
5344 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5345 raise errors.OpPrereqError("Node not reachable by ping",
5346 errors.ECODE_ENVIRON)
5348 if not newbie_singlehomed:
5349 # check reachability from my secondary ip to newbie's secondary ip
5350 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5351 source=myself.secondary_ip):
5352 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5353 " based ping to node daemon port",
5354 errors.ECODE_ENVIRON)
5361 if self.op.master_capable:
5362 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5364 self.master_candidate = False
5367 self.new_node = old_node
5369 node_group = cfg.LookupNodeGroup(self.op.group)
5370 self.new_node = objects.Node(name=node,
5371 primary_ip=primary_ip,
5372 secondary_ip=secondary_ip,
5373 master_candidate=self.master_candidate,
5374 offline=False, drained=False,
5377 if self.op.ndparams:
5378 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5380 def Exec(self, feedback_fn):
5381 """Adds the new node to the cluster.
5384 new_node = self.new_node
5385 node = new_node.name
5387 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5390 # We adding a new node so we assume it's powered
5391 new_node.powered = True
5393 # for re-adds, reset the offline/drained/master-candidate flags;
5394 # we need to reset here, otherwise offline would prevent RPC calls
5395 # later in the procedure; this also means that if the re-add
5396 # fails, we are left with a non-offlined, broken node
5398 new_node.drained = new_node.offline = False # pylint: disable=W0201
5399 self.LogInfo("Readding a node, the offline/drained flags were reset")
5400 # if we demote the node, we do cleanup later in the procedure
5401 new_node.master_candidate = self.master_candidate
5402 if self.changed_primary_ip:
5403 new_node.primary_ip = self.op.primary_ip
5405 # copy the master/vm_capable flags
5406 for attr in self._NFLAGS:
5407 setattr(new_node, attr, getattr(self.op, attr))
5409 # notify the user about any possible mc promotion
5410 if new_node.master_candidate:
5411 self.LogInfo("Node will be a master candidate")
5413 if self.op.ndparams:
5414 new_node.ndparams = self.op.ndparams
5416 new_node.ndparams = {}
5418 # check connectivity
5419 result = self.rpc.call_version([node])[node]
5420 result.Raise("Can't get version information from node %s" % node)
5421 if constants.PROTOCOL_VERSION == result.payload:
5422 logging.info("Communication to node %s fine, sw version %s match",
5423 node, result.payload)
5425 raise errors.OpExecError("Version mismatch master version %s,"
5426 " node version %s" %
5427 (constants.PROTOCOL_VERSION, result.payload))
5429 # Add node to our /etc/hosts, and add key to known_hosts
5430 if self.cfg.GetClusterInfo().modify_etc_hosts:
5431 master_node = self.cfg.GetMasterNode()
5432 result = self.rpc.call_etc_hosts_modify(master_node,
5433 constants.ETC_HOSTS_ADD,
5436 result.Raise("Can't update hosts file with new host data")
5438 if new_node.secondary_ip != new_node.primary_ip:
5439 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5442 node_verify_list = [self.cfg.GetMasterNode()]
5443 node_verify_param = {
5444 constants.NV_NODELIST: ([node], {}),
5445 # TODO: do a node-net-test as well?
5448 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5449 self.cfg.GetClusterName())
5450 for verifier in node_verify_list:
5451 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5452 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5454 for failed in nl_payload:
5455 feedback_fn("ssh/hostname verification failed"
5456 " (checking from %s): %s" %
5457 (verifier, nl_payload[failed]))
5458 raise errors.OpExecError("ssh/hostname verification failed")
5461 _RedistributeAncillaryFiles(self)
5462 self.context.ReaddNode(new_node)
5463 # make sure we redistribute the config
5464 self.cfg.Update(new_node, feedback_fn)
5465 # and make sure the new node will not have old files around
5466 if not new_node.master_candidate:
5467 result = self.rpc.call_node_demote_from_mc(new_node.name)
5468 msg = result.fail_msg
5470 self.LogWarning("Node failed to demote itself from master"
5471 " candidate status: %s" % msg)
5473 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5474 additional_vm=self.op.vm_capable)
5475 self.context.AddNode(new_node, self.proc.GetECId())
5478 class LUNodeSetParams(LogicalUnit):
5479 """Modifies the parameters of a node.
5481 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5482 to the node role (as _ROLE_*)
5483 @cvar _R2F: a dictionary from node role to tuples of flags
5484 @cvar _FLAGS: a list of attribute names corresponding to the flags
5487 HPATH = "node-modify"
5488 HTYPE = constants.HTYPE_NODE
5490 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5492 (True, False, False): _ROLE_CANDIDATE,
5493 (False, True, False): _ROLE_DRAINED,
5494 (False, False, True): _ROLE_OFFLINE,
5495 (False, False, False): _ROLE_REGULAR,
5497 _R2F = dict((v, k) for k, v in _F2R.items())
5498 _FLAGS = ["master_candidate", "drained", "offline"]
5500 def CheckArguments(self):
5501 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5502 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5503 self.op.master_capable, self.op.vm_capable,
5504 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5506 if all_mods.count(None) == len(all_mods):
5507 raise errors.OpPrereqError("Please pass at least one modification",
5509 if all_mods.count(True) > 1:
5510 raise errors.OpPrereqError("Can't set the node into more than one"
5511 " state at the same time",
5514 # Boolean value that tells us whether we might be demoting from MC
5515 self.might_demote = (self.op.master_candidate == False or
5516 self.op.offline == True or
5517 self.op.drained == True or
5518 self.op.master_capable == False)
5520 if self.op.secondary_ip:
5521 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5522 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5523 " address" % self.op.secondary_ip,
5526 self.lock_all = self.op.auto_promote and self.might_demote
5527 self.lock_instances = self.op.secondary_ip is not None
5529 def _InstanceFilter(self, instance):
5530 """Filter for getting affected instances.
5533 return (instance.disk_template in constants.DTS_INT_MIRROR and
5534 self.op.node_name in instance.all_nodes)
5536 def ExpandNames(self):
5538 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5540 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5542 # Since modifying a node can have severe effects on currently running
5543 # operations the resource lock is at least acquired in shared mode
5544 self.needed_locks[locking.LEVEL_NODE_RES] = \
5545 self.needed_locks[locking.LEVEL_NODE]
5547 # Get node resource and instance locks in shared mode; they are not used
5548 # for anything but read-only access
5549 self.share_locks[locking.LEVEL_NODE_RES] = 1
5550 self.share_locks[locking.LEVEL_INSTANCE] = 1
5552 if self.lock_instances:
5553 self.needed_locks[locking.LEVEL_INSTANCE] = \
5554 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5556 def BuildHooksEnv(self):
5559 This runs on the master node.
5563 "OP_TARGET": self.op.node_name,
5564 "MASTER_CANDIDATE": str(self.op.master_candidate),
5565 "OFFLINE": str(self.op.offline),
5566 "DRAINED": str(self.op.drained),
5567 "MASTER_CAPABLE": str(self.op.master_capable),
5568 "VM_CAPABLE": str(self.op.vm_capable),
5571 def BuildHooksNodes(self):
5572 """Build hooks nodes.
5575 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5578 def CheckPrereq(self):
5579 """Check prerequisites.
5581 This only checks the instance list against the existing names.
5584 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5586 if self.lock_instances:
5587 affected_instances = \
5588 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5590 # Verify instance locks
5591 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5592 wanted_instances = frozenset(affected_instances.keys())
5593 if wanted_instances - owned_instances:
5594 raise errors.OpPrereqError("Instances affected by changing node %s's"
5595 " secondary IP address have changed since"
5596 " locks were acquired, wanted '%s', have"
5597 " '%s'; retry the operation" %
5599 utils.CommaJoin(wanted_instances),
5600 utils.CommaJoin(owned_instances)),
5603 affected_instances = None
5605 if (self.op.master_candidate is not None or
5606 self.op.drained is not None or
5607 self.op.offline is not None):
5608 # we can't change the master's node flags
5609 if self.op.node_name == self.cfg.GetMasterNode():
5610 raise errors.OpPrereqError("The master role can be changed"
5611 " only via master-failover",
5614 if self.op.master_candidate and not node.master_capable:
5615 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5616 " it a master candidate" % node.name,
5619 if self.op.vm_capable == False:
5620 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5622 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5623 " the vm_capable flag" % node.name,
5626 if node.master_candidate and self.might_demote and not self.lock_all:
5627 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5628 # check if after removing the current node, we're missing master
5630 (mc_remaining, mc_should, _) = \
5631 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5632 if mc_remaining < mc_should:
5633 raise errors.OpPrereqError("Not enough master candidates, please"
5634 " pass auto promote option to allow"
5635 " promotion", errors.ECODE_STATE)
5637 self.old_flags = old_flags = (node.master_candidate,
5638 node.drained, node.offline)
5639 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5640 self.old_role = old_role = self._F2R[old_flags]
5642 # Check for ineffective changes
5643 for attr in self._FLAGS:
5644 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5645 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5646 setattr(self.op, attr, None)
5648 # Past this point, any flag change to False means a transition
5649 # away from the respective state, as only real changes are kept
5651 # TODO: We might query the real power state if it supports OOB
5652 if _SupportsOob(self.cfg, node):
5653 if self.op.offline is False and not (node.powered or
5654 self.op.powered == True):
5655 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5656 " offline status can be reset") %
5658 elif self.op.powered is not None:
5659 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5660 " as it does not support out-of-band"
5661 " handling") % self.op.node_name)
5663 # If we're being deofflined/drained, we'll MC ourself if needed
5664 if (self.op.drained == False or self.op.offline == False or
5665 (self.op.master_capable and not node.master_capable)):
5666 if _DecideSelfPromotion(self):
5667 self.op.master_candidate = True
5668 self.LogInfo("Auto-promoting node to master candidate")
5670 # If we're no longer master capable, we'll demote ourselves from MC
5671 if self.op.master_capable == False and node.master_candidate:
5672 self.LogInfo("Demoting from master candidate")
5673 self.op.master_candidate = False
5676 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5677 if self.op.master_candidate:
5678 new_role = self._ROLE_CANDIDATE
5679 elif self.op.drained:
5680 new_role = self._ROLE_DRAINED
5681 elif self.op.offline:
5682 new_role = self._ROLE_OFFLINE
5683 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5684 # False is still in new flags, which means we're un-setting (the
5686 new_role = self._ROLE_REGULAR
5687 else: # no new flags, nothing, keep old role
5690 self.new_role = new_role
5692 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5693 # Trying to transition out of offline status
5694 # TODO: Use standard RPC runner, but make sure it works when the node is
5695 # still marked offline
5696 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5698 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5699 " to report its version: %s" %
5700 (node.name, result.fail_msg),
5703 self.LogWarning("Transitioning node from offline to online state"
5704 " without using re-add. Please make sure the node"
5707 if self.op.secondary_ip:
5708 # Ok even without locking, because this can't be changed by any LU
5709 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5710 master_singlehomed = master.secondary_ip == master.primary_ip
5711 if master_singlehomed and self.op.secondary_ip:
5712 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5713 " homed cluster", errors.ECODE_INVAL)
5715 assert not (frozenset(affected_instances) -
5716 self.owned_locks(locking.LEVEL_INSTANCE))
5719 if affected_instances:
5720 raise errors.OpPrereqError("Cannot change secondary IP address:"
5721 " offline node has instances (%s)"
5722 " configured to use it" %
5723 utils.CommaJoin(affected_instances.keys()))
5725 # On online nodes, check that no instances are running, and that
5726 # the node has the new ip and we can reach it.
5727 for instance in affected_instances.values():
5728 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5729 msg="cannot change secondary ip")
5731 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5732 if master.name != node.name:
5733 # check reachability from master secondary ip to new secondary ip
5734 if not netutils.TcpPing(self.op.secondary_ip,
5735 constants.DEFAULT_NODED_PORT,
5736 source=master.secondary_ip):
5737 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5738 " based ping to node daemon port",
5739 errors.ECODE_ENVIRON)
5741 if self.op.ndparams:
5742 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5743 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5744 self.new_ndparams = new_ndparams
5746 if self.op.hv_state:
5747 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5748 self.node.hv_state_static)
5750 if self.op.disk_state:
5751 self.new_disk_state = \
5752 _MergeAndVerifyDiskState(self.op.disk_state,
5753 self.node.disk_state_static)
5755 def Exec(self, feedback_fn):
5760 old_role = self.old_role
5761 new_role = self.new_role
5765 if self.op.ndparams:
5766 node.ndparams = self.new_ndparams
5768 if self.op.powered is not None:
5769 node.powered = self.op.powered
5771 if self.op.hv_state:
5772 node.hv_state_static = self.new_hv_state
5774 if self.op.disk_state:
5775 node.disk_state_static = self.new_disk_state
5777 for attr in ["master_capable", "vm_capable"]:
5778 val = getattr(self.op, attr)
5780 setattr(node, attr, val)
5781 result.append((attr, str(val)))
5783 if new_role != old_role:
5784 # Tell the node to demote itself, if no longer MC and not offline
5785 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5786 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5788 self.LogWarning("Node failed to demote itself: %s", msg)
5790 new_flags = self._R2F[new_role]
5791 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5793 result.append((desc, str(nf)))
5794 (node.master_candidate, node.drained, node.offline) = new_flags
5796 # we locked all nodes, we adjust the CP before updating this node
5798 _AdjustCandidatePool(self, [node.name])
5800 if self.op.secondary_ip:
5801 node.secondary_ip = self.op.secondary_ip
5802 result.append(("secondary_ip", self.op.secondary_ip))
5804 # this will trigger configuration file update, if needed
5805 self.cfg.Update(node, feedback_fn)
5807 # this will trigger job queue propagation or cleanup if the mc
5809 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5810 self.context.ReaddNode(node)
5815 class LUNodePowercycle(NoHooksLU):
5816 """Powercycles a node.
5821 def CheckArguments(self):
5822 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5823 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5824 raise errors.OpPrereqError("The node is the master and the force"
5825 " parameter was not set",
5828 def ExpandNames(self):
5829 """Locking for PowercycleNode.
5831 This is a last-resort option and shouldn't block on other
5832 jobs. Therefore, we grab no locks.
5835 self.needed_locks = {}
5837 def Exec(self, feedback_fn):
5841 result = self.rpc.call_node_powercycle(self.op.node_name,
5842 self.cfg.GetHypervisorType())
5843 result.Raise("Failed to schedule the reboot")
5844 return result.payload
5847 class LUClusterQuery(NoHooksLU):
5848 """Query cluster configuration.
5853 def ExpandNames(self):
5854 self.needed_locks = {}
5856 def Exec(self, feedback_fn):
5857 """Return cluster config.
5860 cluster = self.cfg.GetClusterInfo()
5863 # Filter just for enabled hypervisors
5864 for os_name, hv_dict in cluster.os_hvp.items():
5865 os_hvp[os_name] = {}
5866 for hv_name, hv_params in hv_dict.items():
5867 if hv_name in cluster.enabled_hypervisors:
5868 os_hvp[os_name][hv_name] = hv_params
5870 # Convert ip_family to ip_version
5871 primary_ip_version = constants.IP4_VERSION
5872 if cluster.primary_ip_family == netutils.IP6Address.family:
5873 primary_ip_version = constants.IP6_VERSION
5876 "software_version": constants.RELEASE_VERSION,
5877 "protocol_version": constants.PROTOCOL_VERSION,
5878 "config_version": constants.CONFIG_VERSION,
5879 "os_api_version": max(constants.OS_API_VERSIONS),
5880 "export_version": constants.EXPORT_VERSION,
5881 "architecture": (platform.architecture()[0], platform.machine()),
5882 "name": cluster.cluster_name,
5883 "master": cluster.master_node,
5884 "default_hypervisor": cluster.primary_hypervisor,
5885 "enabled_hypervisors": cluster.enabled_hypervisors,
5886 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5887 for hypervisor_name in cluster.enabled_hypervisors]),
5889 "beparams": cluster.beparams,
5890 "osparams": cluster.osparams,
5891 "ipolicy": cluster.ipolicy,
5892 "nicparams": cluster.nicparams,
5893 "ndparams": cluster.ndparams,
5894 "candidate_pool_size": cluster.candidate_pool_size,
5895 "master_netdev": cluster.master_netdev,
5896 "master_netmask": cluster.master_netmask,
5897 "use_external_mip_script": cluster.use_external_mip_script,
5898 "volume_group_name": cluster.volume_group_name,
5899 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5900 "file_storage_dir": cluster.file_storage_dir,
5901 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5902 "maintain_node_health": cluster.maintain_node_health,
5903 "ctime": cluster.ctime,
5904 "mtime": cluster.mtime,
5905 "uuid": cluster.uuid,
5906 "tags": list(cluster.GetTags()),
5907 "uid_pool": cluster.uid_pool,
5908 "default_iallocator": cluster.default_iallocator,
5909 "reserved_lvs": cluster.reserved_lvs,
5910 "primary_ip_version": primary_ip_version,
5911 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5912 "hidden_os": cluster.hidden_os,
5913 "blacklisted_os": cluster.blacklisted_os,
5919 class LUClusterConfigQuery(NoHooksLU):
5920 """Return configuration values.
5924 _FIELDS_DYNAMIC = utils.FieldSet()
5925 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5926 "watcher_pause", "volume_group_name")
5928 def CheckArguments(self):
5929 _CheckOutputFields(static=self._FIELDS_STATIC,
5930 dynamic=self._FIELDS_DYNAMIC,
5931 selected=self.op.output_fields)
5933 def ExpandNames(self):
5934 self.needed_locks = {}
5936 def Exec(self, feedback_fn):
5937 """Dump a representation of the cluster config to the standard output.
5941 for field in self.op.output_fields:
5942 if field == "cluster_name":
5943 entry = self.cfg.GetClusterName()
5944 elif field == "master_node":
5945 entry = self.cfg.GetMasterNode()
5946 elif field == "drain_flag":
5947 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5948 elif field == "watcher_pause":
5949 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5950 elif field == "volume_group_name":
5951 entry = self.cfg.GetVGName()
5953 raise errors.ParameterError(field)
5954 values.append(entry)
5958 class LUInstanceActivateDisks(NoHooksLU):
5959 """Bring up an instance's disks.
5964 def ExpandNames(self):
5965 self._ExpandAndLockInstance()
5966 self.needed_locks[locking.LEVEL_NODE] = []
5967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5969 def DeclareLocks(self, level):
5970 if level == locking.LEVEL_NODE:
5971 self._LockInstancesNodes()
5973 def CheckPrereq(self):
5974 """Check prerequisites.
5976 This checks that the instance is in the cluster.
5979 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5980 assert self.instance is not None, \
5981 "Cannot retrieve locked instance %s" % self.op.instance_name
5982 _CheckNodeOnline(self, self.instance.primary_node)
5984 def Exec(self, feedback_fn):
5985 """Activate the disks.
5988 disks_ok, disks_info = \
5989 _AssembleInstanceDisks(self, self.instance,
5990 ignore_size=self.op.ignore_size)
5992 raise errors.OpExecError("Cannot activate block devices")
5997 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5999 """Prepare the block devices for an instance.
6001 This sets up the block devices on all nodes.
6003 @type lu: L{LogicalUnit}
6004 @param lu: the logical unit on whose behalf we execute
6005 @type instance: L{objects.Instance}
6006 @param instance: the instance for whose disks we assemble
6007 @type disks: list of L{objects.Disk} or None
6008 @param disks: which disks to assemble (or all, if None)
6009 @type ignore_secondaries: boolean
6010 @param ignore_secondaries: if true, errors on secondary nodes
6011 won't result in an error return from the function
6012 @type ignore_size: boolean
6013 @param ignore_size: if true, the current known size of the disk
6014 will not be used during the disk activation, useful for cases
6015 when the size is wrong
6016 @return: False if the operation failed, otherwise a list of
6017 (host, instance_visible_name, node_visible_name)
6018 with the mapping from node devices to instance devices
6023 iname = instance.name
6024 disks = _ExpandCheckDisks(instance, disks)
6026 # With the two passes mechanism we try to reduce the window of
6027 # opportunity for the race condition of switching DRBD to primary
6028 # before handshaking occured, but we do not eliminate it
6030 # The proper fix would be to wait (with some limits) until the
6031 # connection has been made and drbd transitions from WFConnection
6032 # into any other network-connected state (Connected, SyncTarget,
6035 # 1st pass, assemble on all nodes in secondary mode
6036 for idx, inst_disk in enumerate(disks):
6037 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6039 node_disk = node_disk.Copy()
6040 node_disk.UnsetSize()
6041 lu.cfg.SetDiskID(node_disk, node)
6042 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6043 msg = result.fail_msg
6045 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6046 " (is_primary=False, pass=1): %s",
6047 inst_disk.iv_name, node, msg)
6048 if not ignore_secondaries:
6051 # FIXME: race condition on drbd migration to primary
6053 # 2nd pass, do only the primary node
6054 for idx, inst_disk in enumerate(disks):
6057 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6058 if node != instance.primary_node:
6061 node_disk = node_disk.Copy()
6062 node_disk.UnsetSize()
6063 lu.cfg.SetDiskID(node_disk, node)
6064 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6065 msg = result.fail_msg
6067 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6068 " (is_primary=True, pass=2): %s",
6069 inst_disk.iv_name, node, msg)
6072 dev_path = result.payload
6074 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6076 # leave the disks configured for the primary node
6077 # this is a workaround that would be fixed better by
6078 # improving the logical/physical id handling
6080 lu.cfg.SetDiskID(disk, instance.primary_node)
6082 return disks_ok, device_info
6085 def _StartInstanceDisks(lu, instance, force):
6086 """Start the disks of an instance.
6089 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6090 ignore_secondaries=force)
6092 _ShutdownInstanceDisks(lu, instance)
6093 if force is not None and not force:
6094 lu.proc.LogWarning("", hint="If the message above refers to a"
6096 " you can retry the operation using '--force'.")
6097 raise errors.OpExecError("Disk consistency error")
6100 class LUInstanceDeactivateDisks(NoHooksLU):
6101 """Shutdown an instance's disks.
6106 def ExpandNames(self):
6107 self._ExpandAndLockInstance()
6108 self.needed_locks[locking.LEVEL_NODE] = []
6109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6111 def DeclareLocks(self, level):
6112 if level == locking.LEVEL_NODE:
6113 self._LockInstancesNodes()
6115 def CheckPrereq(self):
6116 """Check prerequisites.
6118 This checks that the instance is in the cluster.
6121 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122 assert self.instance is not None, \
6123 "Cannot retrieve locked instance %s" % self.op.instance_name
6125 def Exec(self, feedback_fn):
6126 """Deactivate the disks
6129 instance = self.instance
6131 _ShutdownInstanceDisks(self, instance)
6133 _SafeShutdownInstanceDisks(self, instance)
6136 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6137 """Shutdown block devices of an instance.
6139 This function checks if an instance is running, before calling
6140 _ShutdownInstanceDisks.
6143 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6144 _ShutdownInstanceDisks(lu, instance, disks=disks)
6147 def _ExpandCheckDisks(instance, disks):
6148 """Return the instance disks selected by the disks list
6150 @type disks: list of L{objects.Disk} or None
6151 @param disks: selected disks
6152 @rtype: list of L{objects.Disk}
6153 @return: selected instance disks to act on
6157 return instance.disks
6159 if not set(disks).issubset(instance.disks):
6160 raise errors.ProgrammerError("Can only act on disks belonging to the"
6165 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6166 """Shutdown block devices of an instance.
6168 This does the shutdown on all nodes of the instance.
6170 If the ignore_primary is false, errors on the primary node are
6175 disks = _ExpandCheckDisks(instance, disks)
6178 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6179 lu.cfg.SetDiskID(top_disk, node)
6180 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6181 msg = result.fail_msg
6183 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6184 disk.iv_name, node, msg)
6185 if ((node == instance.primary_node and not ignore_primary) or
6186 (node != instance.primary_node and not result.offline)):
6191 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6192 """Checks if a node has enough free memory.
6194 This function check if a given node has the needed amount of free
6195 memory. In case the node has less memory or we cannot get the
6196 information from the node, this function raise an OpPrereqError
6199 @type lu: C{LogicalUnit}
6200 @param lu: a logical unit from which we get configuration data
6202 @param node: the node to check
6203 @type reason: C{str}
6204 @param reason: string to use in the error message
6205 @type requested: C{int}
6206 @param requested: the amount of memory in MiB to check for
6207 @type hypervisor_name: C{str}
6208 @param hypervisor_name: the hypervisor to ask for memory stats
6209 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6210 we cannot check the node
6213 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6214 nodeinfo[node].Raise("Can't get data from node %s" % node,
6215 prereq=True, ecode=errors.ECODE_ENVIRON)
6216 (_, _, (hv_info, )) = nodeinfo[node].payload
6218 free_mem = hv_info.get("memory_free", None)
6219 if not isinstance(free_mem, int):
6220 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6221 " was '%s'" % (node, free_mem),
6222 errors.ECODE_ENVIRON)
6223 if requested > free_mem:
6224 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6225 " needed %s MiB, available %s MiB" %
6226 (node, reason, requested, free_mem),
6230 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6231 """Checks if nodes have enough free disk space in the all VGs.
6233 This function check if all given nodes have the needed amount of
6234 free disk. In case any node has less disk or we cannot get the
6235 information from the node, this function raise an OpPrereqError
6238 @type lu: C{LogicalUnit}
6239 @param lu: a logical unit from which we get configuration data
6240 @type nodenames: C{list}
6241 @param nodenames: the list of node names to check
6242 @type req_sizes: C{dict}
6243 @param req_sizes: the hash of vg and corresponding amount of disk in
6245 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6246 or we cannot check the node
6249 for vg, req_size in req_sizes.items():
6250 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6253 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6254 """Checks if nodes have enough free disk space in the specified VG.
6256 This function check if all given nodes have the needed amount of
6257 free disk. In case any node has less disk or we cannot get the
6258 information from the node, this function raise an OpPrereqError
6261 @type lu: C{LogicalUnit}
6262 @param lu: a logical unit from which we get configuration data
6263 @type nodenames: C{list}
6264 @param nodenames: the list of node names to check
6266 @param vg: the volume group to check
6267 @type requested: C{int}
6268 @param requested: the amount of disk in MiB to check for
6269 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6270 or we cannot check the node
6273 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6274 for node in nodenames:
6275 info = nodeinfo[node]
6276 info.Raise("Cannot get current information from node %s" % node,
6277 prereq=True, ecode=errors.ECODE_ENVIRON)
6278 (_, (vg_info, ), _) = info.payload
6279 vg_free = vg_info.get("vg_free", None)
6280 if not isinstance(vg_free, int):
6281 raise errors.OpPrereqError("Can't compute free disk space on node"
6282 " %s for vg %s, result was '%s'" %
6283 (node, vg, vg_free), errors.ECODE_ENVIRON)
6284 if requested > vg_free:
6285 raise errors.OpPrereqError("Not enough disk space on target node %s"
6286 " vg %s: required %d MiB, available %d MiB" %
6287 (node, vg, requested, vg_free),
6291 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6292 """Checks if nodes have enough physical CPUs
6294 This function checks if all given nodes have the needed number of
6295 physical CPUs. In case any node has less CPUs or we cannot get the
6296 information from the node, this function raises an OpPrereqError
6299 @type lu: C{LogicalUnit}
6300 @param lu: a logical unit from which we get configuration data
6301 @type nodenames: C{list}
6302 @param nodenames: the list of node names to check
6303 @type requested: C{int}
6304 @param requested: the minimum acceptable number of physical CPUs
6305 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6306 or we cannot check the node
6309 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6310 for node in nodenames:
6311 info = nodeinfo[node]
6312 info.Raise("Cannot get current information from node %s" % node,
6313 prereq=True, ecode=errors.ECODE_ENVIRON)
6314 (_, _, (hv_info, )) = info.payload
6315 num_cpus = hv_info.get("cpu_total", None)
6316 if not isinstance(num_cpus, int):
6317 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6318 " on node %s, result was '%s'" %
6319 (node, num_cpus), errors.ECODE_ENVIRON)
6320 if requested > num_cpus:
6321 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6322 "required" % (node, num_cpus, requested),
6326 class LUInstanceStartup(LogicalUnit):
6327 """Starts an instance.
6330 HPATH = "instance-start"
6331 HTYPE = constants.HTYPE_INSTANCE
6334 def CheckArguments(self):
6336 if self.op.beparams:
6337 # fill the beparams dict
6338 objects.UpgradeBeParams(self.op.beparams)
6339 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6341 def ExpandNames(self):
6342 self._ExpandAndLockInstance()
6344 def BuildHooksEnv(self):
6347 This runs on master, primary and secondary nodes of the instance.
6351 "FORCE": self.op.force,
6354 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6358 def BuildHooksNodes(self):
6359 """Build hooks nodes.
6362 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6365 def CheckPrereq(self):
6366 """Check prerequisites.
6368 This checks that the instance is in the cluster.
6371 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6372 assert self.instance is not None, \
6373 "Cannot retrieve locked instance %s" % self.op.instance_name
6376 if self.op.hvparams:
6377 # check hypervisor parameter syntax (locally)
6378 cluster = self.cfg.GetClusterInfo()
6379 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6380 filled_hvp = cluster.FillHV(instance)
6381 filled_hvp.update(self.op.hvparams)
6382 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6383 hv_type.CheckParameterSyntax(filled_hvp)
6384 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6386 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6388 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6390 if self.primary_offline and self.op.ignore_offline_nodes:
6391 self.proc.LogWarning("Ignoring offline primary node")
6393 if self.op.hvparams or self.op.beparams:
6394 self.proc.LogWarning("Overridden parameters are ignored")
6396 _CheckNodeOnline(self, instance.primary_node)
6398 bep = self.cfg.GetClusterInfo().FillBE(instance)
6400 # check bridges existence
6401 _CheckInstanceBridgesExist(self, instance)
6403 remote_info = self.rpc.call_instance_info(instance.primary_node,
6405 instance.hypervisor)
6406 remote_info.Raise("Error checking node %s" % instance.primary_node,
6407 prereq=True, ecode=errors.ECODE_ENVIRON)
6408 if not remote_info.payload: # not running already
6409 _CheckNodeFreeMemory(self, instance.primary_node,
6410 "starting instance %s" % instance.name,
6411 bep[constants.BE_MAXMEM], instance.hypervisor)
6413 def Exec(self, feedback_fn):
6414 """Start the instance.
6417 instance = self.instance
6418 force = self.op.force
6420 if not self.op.no_remember:
6421 self.cfg.MarkInstanceUp(instance.name)
6423 if self.primary_offline:
6424 assert self.op.ignore_offline_nodes
6425 self.proc.LogInfo("Primary node offline, marked instance as started")
6427 node_current = instance.primary_node
6429 _StartInstanceDisks(self, instance, force)
6432 self.rpc.call_instance_start(node_current,
6433 (instance, self.op.hvparams,
6435 self.op.startup_paused)
6436 msg = result.fail_msg
6438 _ShutdownInstanceDisks(self, instance)
6439 raise errors.OpExecError("Could not start instance: %s" % msg)
6442 class LUInstanceReboot(LogicalUnit):
6443 """Reboot an instance.
6446 HPATH = "instance-reboot"
6447 HTYPE = constants.HTYPE_INSTANCE
6450 def ExpandNames(self):
6451 self._ExpandAndLockInstance()
6453 def BuildHooksEnv(self):
6456 This runs on master, primary and secondary nodes of the instance.
6460 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6461 "REBOOT_TYPE": self.op.reboot_type,
6462 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6465 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6469 def BuildHooksNodes(self):
6470 """Build hooks nodes.
6473 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6476 def CheckPrereq(self):
6477 """Check prerequisites.
6479 This checks that the instance is in the cluster.
6482 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6483 assert self.instance is not None, \
6484 "Cannot retrieve locked instance %s" % self.op.instance_name
6485 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6486 _CheckNodeOnline(self, instance.primary_node)
6488 # check bridges existence
6489 _CheckInstanceBridgesExist(self, instance)
6491 def Exec(self, feedback_fn):
6492 """Reboot the instance.
6495 instance = self.instance
6496 ignore_secondaries = self.op.ignore_secondaries
6497 reboot_type = self.op.reboot_type
6499 remote_info = self.rpc.call_instance_info(instance.primary_node,
6501 instance.hypervisor)
6502 remote_info.Raise("Error checking node %s" % instance.primary_node)
6503 instance_running = bool(remote_info.payload)
6505 node_current = instance.primary_node
6507 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6508 constants.INSTANCE_REBOOT_HARD]:
6509 for disk in instance.disks:
6510 self.cfg.SetDiskID(disk, node_current)
6511 result = self.rpc.call_instance_reboot(node_current, instance,
6513 self.op.shutdown_timeout)
6514 result.Raise("Could not reboot instance")
6516 if instance_running:
6517 result = self.rpc.call_instance_shutdown(node_current, instance,
6518 self.op.shutdown_timeout)
6519 result.Raise("Could not shutdown instance for full reboot")
6520 _ShutdownInstanceDisks(self, instance)
6522 self.LogInfo("Instance %s was already stopped, starting now",
6524 _StartInstanceDisks(self, instance, ignore_secondaries)
6525 result = self.rpc.call_instance_start(node_current,
6526 (instance, None, None), False)
6527 msg = result.fail_msg
6529 _ShutdownInstanceDisks(self, instance)
6530 raise errors.OpExecError("Could not start instance for"
6531 " full reboot: %s" % msg)
6533 self.cfg.MarkInstanceUp(instance.name)
6536 class LUInstanceShutdown(LogicalUnit):
6537 """Shutdown an instance.
6540 HPATH = "instance-stop"
6541 HTYPE = constants.HTYPE_INSTANCE
6544 def ExpandNames(self):
6545 self._ExpandAndLockInstance()
6547 def BuildHooksEnv(self):
6550 This runs on master, primary and secondary nodes of the instance.
6553 env = _BuildInstanceHookEnvByObject(self, self.instance)
6554 env["TIMEOUT"] = self.op.timeout
6557 def BuildHooksNodes(self):
6558 """Build hooks nodes.
6561 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6564 def CheckPrereq(self):
6565 """Check prerequisites.
6567 This checks that the instance is in the cluster.
6570 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6571 assert self.instance is not None, \
6572 "Cannot retrieve locked instance %s" % self.op.instance_name
6574 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6576 self.primary_offline = \
6577 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6579 if self.primary_offline and self.op.ignore_offline_nodes:
6580 self.proc.LogWarning("Ignoring offline primary node")
6582 _CheckNodeOnline(self, self.instance.primary_node)
6584 def Exec(self, feedback_fn):
6585 """Shutdown the instance.
6588 instance = self.instance
6589 node_current = instance.primary_node
6590 timeout = self.op.timeout
6592 if not self.op.no_remember:
6593 self.cfg.MarkInstanceDown(instance.name)
6595 if self.primary_offline:
6596 assert self.op.ignore_offline_nodes
6597 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6599 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6600 msg = result.fail_msg
6602 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6604 _ShutdownInstanceDisks(self, instance)
6607 class LUInstanceReinstall(LogicalUnit):
6608 """Reinstall an instance.
6611 HPATH = "instance-reinstall"
6612 HTYPE = constants.HTYPE_INSTANCE
6615 def ExpandNames(self):
6616 self._ExpandAndLockInstance()
6618 def BuildHooksEnv(self):
6621 This runs on master, primary and secondary nodes of the instance.
6624 return _BuildInstanceHookEnvByObject(self, self.instance)
6626 def BuildHooksNodes(self):
6627 """Build hooks nodes.
6630 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6633 def CheckPrereq(self):
6634 """Check prerequisites.
6636 This checks that the instance is in the cluster and is not running.
6639 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6640 assert instance is not None, \
6641 "Cannot retrieve locked instance %s" % self.op.instance_name
6642 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6643 " offline, cannot reinstall")
6644 for node in instance.secondary_nodes:
6645 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6646 " cannot reinstall")
6648 if instance.disk_template == constants.DT_DISKLESS:
6649 raise errors.OpPrereqError("Instance '%s' has no disks" %
6650 self.op.instance_name,
6652 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6654 if self.op.os_type is not None:
6656 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6657 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6658 instance_os = self.op.os_type
6660 instance_os = instance.os
6662 nodelist = list(instance.all_nodes)
6664 if self.op.osparams:
6665 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6666 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6667 self.os_inst = i_osdict # the new dict (without defaults)
6671 self.instance = instance
6673 def Exec(self, feedback_fn):
6674 """Reinstall the instance.
6677 inst = self.instance
6679 if self.op.os_type is not None:
6680 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6681 inst.os = self.op.os_type
6682 # Write to configuration
6683 self.cfg.Update(inst, feedback_fn)
6685 _StartInstanceDisks(self, inst, None)
6687 feedback_fn("Running the instance OS create scripts...")
6688 # FIXME: pass debug option from opcode to backend
6689 result = self.rpc.call_instance_os_add(inst.primary_node,
6690 (inst, self.os_inst), True,
6691 self.op.debug_level)
6692 result.Raise("Could not install OS for instance %s on node %s" %
6693 (inst.name, inst.primary_node))
6695 _ShutdownInstanceDisks(self, inst)
6698 class LUInstanceRecreateDisks(LogicalUnit):
6699 """Recreate an instance's missing disks.
6702 HPATH = "instance-recreate-disks"
6703 HTYPE = constants.HTYPE_INSTANCE
6706 def CheckArguments(self):
6707 # normalise the disk list
6708 self.op.disks = sorted(frozenset(self.op.disks))
6710 def ExpandNames(self):
6711 self._ExpandAndLockInstance()
6712 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6714 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6715 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6717 self.needed_locks[locking.LEVEL_NODE] = []
6719 def DeclareLocks(self, level):
6720 if level == locking.LEVEL_NODE:
6721 # if we replace the nodes, we only need to lock the old primary,
6722 # otherwise we need to lock all nodes for disk re-creation
6723 primary_only = bool(self.op.nodes)
6724 self._LockInstancesNodes(primary_only=primary_only)
6725 elif level == locking.LEVEL_NODE_RES:
6727 self.needed_locks[locking.LEVEL_NODE_RES] = \
6728 self.needed_locks[locking.LEVEL_NODE][:]
6730 def BuildHooksEnv(self):
6733 This runs on master, primary and secondary nodes of the instance.
6736 return _BuildInstanceHookEnvByObject(self, self.instance)
6738 def BuildHooksNodes(self):
6739 """Build hooks nodes.
6742 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6745 def CheckPrereq(self):
6746 """Check prerequisites.
6748 This checks that the instance is in the cluster and is not running.
6751 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6752 assert instance is not None, \
6753 "Cannot retrieve locked instance %s" % self.op.instance_name
6755 if len(self.op.nodes) != len(instance.all_nodes):
6756 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6757 " %d replacement nodes were specified" %
6758 (instance.name, len(instance.all_nodes),
6759 len(self.op.nodes)),
6761 assert instance.disk_template != constants.DT_DRBD8 or \
6762 len(self.op.nodes) == 2
6763 assert instance.disk_template != constants.DT_PLAIN or \
6764 len(self.op.nodes) == 1
6765 primary_node = self.op.nodes[0]
6767 primary_node = instance.primary_node
6768 _CheckNodeOnline(self, primary_node)
6770 if instance.disk_template == constants.DT_DISKLESS:
6771 raise errors.OpPrereqError("Instance '%s' has no disks" %
6772 self.op.instance_name, errors.ECODE_INVAL)
6773 # if we replace nodes *and* the old primary is offline, we don't
6775 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6776 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6777 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6778 if not (self.op.nodes and old_pnode.offline):
6779 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6780 msg="cannot recreate disks")
6782 if not self.op.disks:
6783 self.op.disks = range(len(instance.disks))
6785 for idx in self.op.disks:
6786 if idx >= len(instance.disks):
6787 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6789 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6790 raise errors.OpPrereqError("Can't recreate disks partially and"
6791 " change the nodes at the same time",
6793 self.instance = instance
6795 def Exec(self, feedback_fn):
6796 """Recreate the disks.
6799 instance = self.instance
6801 assert (self.owned_locks(locking.LEVEL_NODE) ==
6802 self.owned_locks(locking.LEVEL_NODE_RES))
6805 mods = [] # keeps track of needed logical_id changes
6807 for idx, disk in enumerate(instance.disks):
6808 if idx not in self.op.disks: # disk idx has not been passed in
6811 # update secondaries for disks, if needed
6813 if disk.dev_type == constants.LD_DRBD8:
6814 # need to update the nodes and minors
6815 assert len(self.op.nodes) == 2
6816 assert len(disk.logical_id) == 6 # otherwise disk internals
6818 (_, _, old_port, _, _, old_secret) = disk.logical_id
6819 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6820 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6821 new_minors[0], new_minors[1], old_secret)
6822 assert len(disk.logical_id) == len(new_id)
6823 mods.append((idx, new_id))
6825 # now that we have passed all asserts above, we can apply the mods
6826 # in a single run (to avoid partial changes)
6827 for idx, new_id in mods:
6828 instance.disks[idx].logical_id = new_id
6830 # change primary node, if needed
6832 instance.primary_node = self.op.nodes[0]
6833 self.LogWarning("Changing the instance's nodes, you will have to"
6834 " remove any disks left on the older nodes manually")
6837 self.cfg.Update(instance, feedback_fn)
6839 _CreateDisks(self, instance, to_skip=to_skip)
6842 class LUInstanceRename(LogicalUnit):
6843 """Rename an instance.
6846 HPATH = "instance-rename"
6847 HTYPE = constants.HTYPE_INSTANCE
6849 def CheckArguments(self):
6853 if self.op.ip_check and not self.op.name_check:
6854 # TODO: make the ip check more flexible and not depend on the name check
6855 raise errors.OpPrereqError("IP address check requires a name check",
6858 def BuildHooksEnv(self):
6861 This runs on master, primary and secondary nodes of the instance.
6864 env = _BuildInstanceHookEnvByObject(self, self.instance)
6865 env["INSTANCE_NEW_NAME"] = self.op.new_name
6868 def BuildHooksNodes(self):
6869 """Build hooks nodes.
6872 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6875 def CheckPrereq(self):
6876 """Check prerequisites.
6878 This checks that the instance is in the cluster and is not running.
6881 self.op.instance_name = _ExpandInstanceName(self.cfg,
6882 self.op.instance_name)
6883 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884 assert instance is not None
6885 _CheckNodeOnline(self, instance.primary_node)
6886 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6887 msg="cannot rename")
6888 self.instance = instance
6890 new_name = self.op.new_name
6891 if self.op.name_check:
6892 hostname = netutils.GetHostname(name=new_name)
6893 if hostname.name != new_name:
6894 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6896 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6897 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6898 " same as given hostname '%s'") %
6899 (hostname.name, self.op.new_name),
6901 new_name = self.op.new_name = hostname.name
6902 if (self.op.ip_check and
6903 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6904 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6905 (hostname.ip, new_name),
6906 errors.ECODE_NOTUNIQUE)
6908 instance_list = self.cfg.GetInstanceList()
6909 if new_name in instance_list and new_name != instance.name:
6910 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6911 new_name, errors.ECODE_EXISTS)
6913 def Exec(self, feedback_fn):
6914 """Rename the instance.
6917 inst = self.instance
6918 old_name = inst.name
6920 rename_file_storage = False
6921 if (inst.disk_template in constants.DTS_FILEBASED and
6922 self.op.new_name != inst.name):
6923 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6924 rename_file_storage = True
6926 self.cfg.RenameInstance(inst.name, self.op.new_name)
6927 # Change the instance lock. This is definitely safe while we hold the BGL.
6928 # Otherwise the new lock would have to be added in acquired mode.
6930 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6931 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6933 # re-read the instance from the configuration after rename
6934 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6936 if rename_file_storage:
6937 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6938 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6939 old_file_storage_dir,
6940 new_file_storage_dir)
6941 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6942 " (but the instance has been renamed in Ganeti)" %
6943 (inst.primary_node, old_file_storage_dir,
6944 new_file_storage_dir))
6946 _StartInstanceDisks(self, inst, None)
6948 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6949 old_name, self.op.debug_level)
6950 msg = result.fail_msg
6952 msg = ("Could not run OS rename script for instance %s on node %s"
6953 " (but the instance has been renamed in Ganeti): %s" %
6954 (inst.name, inst.primary_node, msg))
6955 self.proc.LogWarning(msg)
6957 _ShutdownInstanceDisks(self, inst)
6962 class LUInstanceRemove(LogicalUnit):
6963 """Remove an instance.
6966 HPATH = "instance-remove"
6967 HTYPE = constants.HTYPE_INSTANCE
6970 def ExpandNames(self):
6971 self._ExpandAndLockInstance()
6972 self.needed_locks[locking.LEVEL_NODE] = []
6973 self.needed_locks[locking.LEVEL_NODE_RES] = []
6974 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6976 def DeclareLocks(self, level):
6977 if level == locking.LEVEL_NODE:
6978 self._LockInstancesNodes()
6979 elif level == locking.LEVEL_NODE_RES:
6981 self.needed_locks[locking.LEVEL_NODE_RES] = \
6982 self.needed_locks[locking.LEVEL_NODE][:]
6984 def BuildHooksEnv(self):
6987 This runs on master, primary and secondary nodes of the instance.
6990 env = _BuildInstanceHookEnvByObject(self, self.instance)
6991 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6994 def BuildHooksNodes(self):
6995 """Build hooks nodes.
6998 nl = [self.cfg.GetMasterNode()]
6999 nl_post = list(self.instance.all_nodes) + nl
7000 return (nl, nl_post)
7002 def CheckPrereq(self):
7003 """Check prerequisites.
7005 This checks that the instance is in the cluster.
7008 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7009 assert self.instance is not None, \
7010 "Cannot retrieve locked instance %s" % self.op.instance_name
7012 def Exec(self, feedback_fn):
7013 """Remove the instance.
7016 instance = self.instance
7017 logging.info("Shutting down instance %s on node %s",
7018 instance.name, instance.primary_node)
7020 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7021 self.op.shutdown_timeout)
7022 msg = result.fail_msg
7024 if self.op.ignore_failures:
7025 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7027 raise errors.OpExecError("Could not shutdown instance %s on"
7029 (instance.name, instance.primary_node, msg))
7031 assert (self.owned_locks(locking.LEVEL_NODE) ==
7032 self.owned_locks(locking.LEVEL_NODE_RES))
7033 assert not (set(instance.all_nodes) -
7034 self.owned_locks(locking.LEVEL_NODE)), \
7035 "Not owning correct locks"
7037 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7040 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7041 """Utility function to remove an instance.
7044 logging.info("Removing block devices for instance %s", instance.name)
7046 if not _RemoveDisks(lu, instance):
7047 if not ignore_failures:
7048 raise errors.OpExecError("Can't remove instance's disks")
7049 feedback_fn("Warning: can't remove instance's disks")
7051 logging.info("Removing instance %s out of cluster config", instance.name)
7053 lu.cfg.RemoveInstance(instance.name)
7055 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7056 "Instance lock removal conflict"
7058 # Remove lock for the instance
7059 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7062 class LUInstanceQuery(NoHooksLU):
7063 """Logical unit for querying instances.
7066 # pylint: disable=W0142
7069 def CheckArguments(self):
7070 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7071 self.op.output_fields, self.op.use_locking)
7073 def ExpandNames(self):
7074 self.iq.ExpandNames(self)
7076 def DeclareLocks(self, level):
7077 self.iq.DeclareLocks(self, level)
7079 def Exec(self, feedback_fn):
7080 return self.iq.OldStyleQuery(self)
7083 class LUInstanceFailover(LogicalUnit):
7084 """Failover an instance.
7087 HPATH = "instance-failover"
7088 HTYPE = constants.HTYPE_INSTANCE
7091 def CheckArguments(self):
7092 """Check the arguments.
7095 self.iallocator = getattr(self.op, "iallocator", None)
7096 self.target_node = getattr(self.op, "target_node", None)
7098 def ExpandNames(self):
7099 self._ExpandAndLockInstance()
7101 if self.op.target_node is not None:
7102 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7104 self.needed_locks[locking.LEVEL_NODE] = []
7105 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7107 ignore_consistency = self.op.ignore_consistency
7108 shutdown_timeout = self.op.shutdown_timeout
7109 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7112 ignore_consistency=ignore_consistency,
7113 shutdown_timeout=shutdown_timeout)
7114 self.tasklets = [self._migrater]
7116 def DeclareLocks(self, level):
7117 if level == locking.LEVEL_NODE:
7118 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7119 if instance.disk_template in constants.DTS_EXT_MIRROR:
7120 if self.op.target_node is None:
7121 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7123 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7124 self.op.target_node]
7125 del self.recalculate_locks[locking.LEVEL_NODE]
7127 self._LockInstancesNodes()
7129 def BuildHooksEnv(self):
7132 This runs on master, primary and secondary nodes of the instance.
7135 instance = self._migrater.instance
7136 source_node = instance.primary_node
7137 target_node = self.op.target_node
7139 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7140 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7141 "OLD_PRIMARY": source_node,
7142 "NEW_PRIMARY": target_node,
7145 if instance.disk_template in constants.DTS_INT_MIRROR:
7146 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7147 env["NEW_SECONDARY"] = source_node
7149 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7151 env.update(_BuildInstanceHookEnvByObject(self, instance))
7155 def BuildHooksNodes(self):
7156 """Build hooks nodes.
7159 instance = self._migrater.instance
7160 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7161 return (nl, nl + [instance.primary_node])
7164 class LUInstanceMigrate(LogicalUnit):
7165 """Migrate an instance.
7167 This is migration without shutting down, compared to the failover,
7168 which is done with shutdown.
7171 HPATH = "instance-migrate"
7172 HTYPE = constants.HTYPE_INSTANCE
7175 def ExpandNames(self):
7176 self._ExpandAndLockInstance()
7178 if self.op.target_node is not None:
7179 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7181 self.needed_locks[locking.LEVEL_NODE] = []
7182 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7184 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7185 cleanup=self.op.cleanup,
7187 fallback=self.op.allow_failover)
7188 self.tasklets = [self._migrater]
7190 def DeclareLocks(self, level):
7191 if level == locking.LEVEL_NODE:
7192 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7193 if instance.disk_template in constants.DTS_EXT_MIRROR:
7194 if self.op.target_node is None:
7195 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7197 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7198 self.op.target_node]
7199 del self.recalculate_locks[locking.LEVEL_NODE]
7201 self._LockInstancesNodes()
7203 def BuildHooksEnv(self):
7206 This runs on master, primary and secondary nodes of the instance.
7209 instance = self._migrater.instance
7210 source_node = instance.primary_node
7211 target_node = self.op.target_node
7212 env = _BuildInstanceHookEnvByObject(self, instance)
7214 "MIGRATE_LIVE": self._migrater.live,
7215 "MIGRATE_CLEANUP": self.op.cleanup,
7216 "OLD_PRIMARY": source_node,
7217 "NEW_PRIMARY": target_node,
7220 if instance.disk_template in constants.DTS_INT_MIRROR:
7221 env["OLD_SECONDARY"] = target_node
7222 env["NEW_SECONDARY"] = source_node
7224 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7228 def BuildHooksNodes(self):
7229 """Build hooks nodes.
7232 instance = self._migrater.instance
7233 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7234 return (nl, nl + [instance.primary_node])
7237 class LUInstanceMove(LogicalUnit):
7238 """Move an instance by data-copying.
7241 HPATH = "instance-move"
7242 HTYPE = constants.HTYPE_INSTANCE
7245 def ExpandNames(self):
7246 self._ExpandAndLockInstance()
7247 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7248 self.op.target_node = target_node
7249 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7250 self.needed_locks[locking.LEVEL_NODE_RES] = []
7251 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7253 def DeclareLocks(self, level):
7254 if level == locking.LEVEL_NODE:
7255 self._LockInstancesNodes(primary_only=True)
7256 elif level == locking.LEVEL_NODE_RES:
7258 self.needed_locks[locking.LEVEL_NODE_RES] = \
7259 self.needed_locks[locking.LEVEL_NODE][:]
7261 def BuildHooksEnv(self):
7264 This runs on master, primary and secondary nodes of the instance.
7268 "TARGET_NODE": self.op.target_node,
7269 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7271 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7274 def BuildHooksNodes(self):
7275 """Build hooks nodes.
7279 self.cfg.GetMasterNode(),
7280 self.instance.primary_node,
7281 self.op.target_node,
7285 def CheckPrereq(self):
7286 """Check prerequisites.
7288 This checks that the instance is in the cluster.
7291 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7292 assert self.instance is not None, \
7293 "Cannot retrieve locked instance %s" % self.op.instance_name
7295 node = self.cfg.GetNodeInfo(self.op.target_node)
7296 assert node is not None, \
7297 "Cannot retrieve locked node %s" % self.op.target_node
7299 self.target_node = target_node = node.name
7301 if target_node == instance.primary_node:
7302 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7303 (instance.name, target_node),
7306 bep = self.cfg.GetClusterInfo().FillBE(instance)
7308 for idx, dsk in enumerate(instance.disks):
7309 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7310 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7311 " cannot copy" % idx, errors.ECODE_STATE)
7313 _CheckNodeOnline(self, target_node)
7314 _CheckNodeNotDrained(self, target_node)
7315 _CheckNodeVmCapable(self, target_node)
7317 if instance.admin_state == constants.ADMINST_UP:
7318 # check memory requirements on the secondary node
7319 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7320 instance.name, bep[constants.BE_MAXMEM],
7321 instance.hypervisor)
7323 self.LogInfo("Not checking memory on the secondary node as"
7324 " instance will not be started")
7326 # check bridge existance
7327 _CheckInstanceBridgesExist(self, instance, node=target_node)
7329 def Exec(self, feedback_fn):
7330 """Move an instance.
7332 The move is done by shutting it down on its present node, copying
7333 the data over (slow) and starting it on the new node.
7336 instance = self.instance
7338 source_node = instance.primary_node
7339 target_node = self.target_node
7341 self.LogInfo("Shutting down instance %s on source node %s",
7342 instance.name, source_node)
7344 assert (self.owned_locks(locking.LEVEL_NODE) ==
7345 self.owned_locks(locking.LEVEL_NODE_RES))
7347 result = self.rpc.call_instance_shutdown(source_node, instance,
7348 self.op.shutdown_timeout)
7349 msg = result.fail_msg
7351 if self.op.ignore_consistency:
7352 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7353 " Proceeding anyway. Please make sure node"
7354 " %s is down. Error details: %s",
7355 instance.name, source_node, source_node, msg)
7357 raise errors.OpExecError("Could not shutdown instance %s on"
7359 (instance.name, source_node, msg))
7361 # create the target disks
7363 _CreateDisks(self, instance, target_node=target_node)
7364 except errors.OpExecError:
7365 self.LogWarning("Device creation failed, reverting...")
7367 _RemoveDisks(self, instance, target_node=target_node)
7369 self.cfg.ReleaseDRBDMinors(instance.name)
7372 cluster_name = self.cfg.GetClusterInfo().cluster_name
7375 # activate, get path, copy the data over
7376 for idx, disk in enumerate(instance.disks):
7377 self.LogInfo("Copying data for disk %d", idx)
7378 result = self.rpc.call_blockdev_assemble(target_node, disk,
7379 instance.name, True, idx)
7381 self.LogWarning("Can't assemble newly created disk %d: %s",
7382 idx, result.fail_msg)
7383 errs.append(result.fail_msg)
7385 dev_path = result.payload
7386 result = self.rpc.call_blockdev_export(source_node, disk,
7387 target_node, dev_path,
7390 self.LogWarning("Can't copy data over for disk %d: %s",
7391 idx, result.fail_msg)
7392 errs.append(result.fail_msg)
7396 self.LogWarning("Some disks failed to copy, aborting")
7398 _RemoveDisks(self, instance, target_node=target_node)
7400 self.cfg.ReleaseDRBDMinors(instance.name)
7401 raise errors.OpExecError("Errors during disk copy: %s" %
7404 instance.primary_node = target_node
7405 self.cfg.Update(instance, feedback_fn)
7407 self.LogInfo("Removing the disks on the original node")
7408 _RemoveDisks(self, instance, target_node=source_node)
7410 # Only start the instance if it's marked as up
7411 if instance.admin_state == constants.ADMINST_UP:
7412 self.LogInfo("Starting instance %s on node %s",
7413 instance.name, target_node)
7415 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7416 ignore_secondaries=True)
7418 _ShutdownInstanceDisks(self, instance)
7419 raise errors.OpExecError("Can't activate the instance's disks")
7421 result = self.rpc.call_instance_start(target_node,
7422 (instance, None, None), False)
7423 msg = result.fail_msg
7425 _ShutdownInstanceDisks(self, instance)
7426 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7427 (instance.name, target_node, msg))
7430 class LUNodeMigrate(LogicalUnit):
7431 """Migrate all instances from a node.
7434 HPATH = "node-migrate"
7435 HTYPE = constants.HTYPE_NODE
7438 def CheckArguments(self):
7441 def ExpandNames(self):
7442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7444 self.share_locks = _ShareAll()
7445 self.needed_locks = {
7446 locking.LEVEL_NODE: [self.op.node_name],
7449 def BuildHooksEnv(self):
7452 This runs on the master, the primary and all the secondaries.
7456 "NODE_NAME": self.op.node_name,
7459 def BuildHooksNodes(self):
7460 """Build hooks nodes.
7463 nl = [self.cfg.GetMasterNode()]
7466 def CheckPrereq(self):
7469 def Exec(self, feedback_fn):
7470 # Prepare jobs for migration instances
7472 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7475 iallocator=self.op.iallocator,
7476 target_node=self.op.target_node)]
7477 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7480 # TODO: Run iallocator in this opcode and pass correct placement options to
7481 # OpInstanceMigrate. Since other jobs can modify the cluster between
7482 # running the iallocator and the actual migration, a good consistency model
7483 # will have to be found.
7485 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7486 frozenset([self.op.node_name]))
7488 return ResultWithJobs(jobs)
7491 class TLMigrateInstance(Tasklet):
7492 """Tasklet class for instance migration.
7495 @ivar live: whether the migration will be done live or non-live;
7496 this variable is initalized only after CheckPrereq has run
7497 @type cleanup: boolean
7498 @ivar cleanup: Wheater we cleanup from a failed migration
7499 @type iallocator: string
7500 @ivar iallocator: The iallocator used to determine target_node
7501 @type target_node: string
7502 @ivar target_node: If given, the target_node to reallocate the instance to
7503 @type failover: boolean
7504 @ivar failover: Whether operation results in failover or migration
7505 @type fallback: boolean
7506 @ivar fallback: Whether fallback to failover is allowed if migration not
7508 @type ignore_consistency: boolean
7509 @ivar ignore_consistency: Wheter we should ignore consistency between source
7511 @type shutdown_timeout: int
7512 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7517 _MIGRATION_POLL_INTERVAL = 1 # seconds
7518 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7520 def __init__(self, lu, instance_name, cleanup=False,
7521 failover=False, fallback=False,
7522 ignore_consistency=False,
7523 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7524 """Initializes this class.
7527 Tasklet.__init__(self, lu)
7530 self.instance_name = instance_name
7531 self.cleanup = cleanup
7532 self.live = False # will be overridden later
7533 self.failover = failover
7534 self.fallback = fallback
7535 self.ignore_consistency = ignore_consistency
7536 self.shutdown_timeout = shutdown_timeout
7538 def CheckPrereq(self):
7539 """Check prerequisites.
7541 This checks that the instance is in the cluster.
7544 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7545 instance = self.cfg.GetInstanceInfo(instance_name)
7546 assert instance is not None
7547 self.instance = instance
7549 if (not self.cleanup and
7550 not instance.admin_state == constants.ADMINST_UP and
7551 not self.failover and self.fallback):
7552 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7553 " switching to failover")
7554 self.failover = True
7556 if instance.disk_template not in constants.DTS_MIRRORED:
7561 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7562 " %s" % (instance.disk_template, text),
7565 if instance.disk_template in constants.DTS_EXT_MIRROR:
7566 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7568 if self.lu.op.iallocator:
7569 self._RunAllocator()
7571 # We set set self.target_node as it is required by
7573 self.target_node = self.lu.op.target_node
7575 # self.target_node is already populated, either directly or by the
7577 target_node = self.target_node
7578 if self.target_node == instance.primary_node:
7579 raise errors.OpPrereqError("Cannot migrate instance %s"
7580 " to its primary (%s)" %
7581 (instance.name, instance.primary_node))
7583 if len(self.lu.tasklets) == 1:
7584 # It is safe to release locks only when we're the only tasklet
7586 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7587 keep=[instance.primary_node, self.target_node])
7590 secondary_nodes = instance.secondary_nodes
7591 if not secondary_nodes:
7592 raise errors.ConfigurationError("No secondary node but using"
7593 " %s disk template" %
7594 instance.disk_template)
7595 target_node = secondary_nodes[0]
7596 if self.lu.op.iallocator or (self.lu.op.target_node and
7597 self.lu.op.target_node != target_node):
7599 text = "failed over"
7602 raise errors.OpPrereqError("Instances with disk template %s cannot"
7603 " be %s to arbitrary nodes"
7604 " (neither an iallocator nor a target"
7605 " node can be passed)" %
7606 (instance.disk_template, text),
7609 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7611 # check memory requirements on the secondary node
7612 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7613 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7614 instance.name, i_be[constants.BE_MAXMEM],
7615 instance.hypervisor)
7617 self.lu.LogInfo("Not checking memory on the secondary node as"
7618 " instance will not be started")
7620 # check if failover must be forced instead of migration
7621 if (not self.cleanup and not self.failover and
7622 i_be[constants.BE_ALWAYS_FAILOVER]):
7624 self.lu.LogInfo("Instance configured to always failover; fallback"
7626 self.failover = True
7628 raise errors.OpPrereqError("This instance has been configured to"
7629 " always failover, please allow failover",
7632 # check bridge existance
7633 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7635 if not self.cleanup:
7636 _CheckNodeNotDrained(self.lu, target_node)
7637 if not self.failover:
7638 result = self.rpc.call_instance_migratable(instance.primary_node,
7640 if result.fail_msg and self.fallback:
7641 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7643 self.failover = True
7645 result.Raise("Can't migrate, please use failover",
7646 prereq=True, ecode=errors.ECODE_STATE)
7648 assert not (self.failover and self.cleanup)
7650 if not self.failover:
7651 if self.lu.op.live is not None and self.lu.op.mode is not None:
7652 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7653 " parameters are accepted",
7655 if self.lu.op.live is not None:
7657 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7659 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7660 # reset the 'live' parameter to None so that repeated
7661 # invocations of CheckPrereq do not raise an exception
7662 self.lu.op.live = None
7663 elif self.lu.op.mode is None:
7664 # read the default value from the hypervisor
7665 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7667 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7669 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7671 # Failover is never live
7674 def _RunAllocator(self):
7675 """Run the allocator based on input opcode.
7678 ial = IAllocator(self.cfg, self.rpc,
7679 mode=constants.IALLOCATOR_MODE_RELOC,
7680 name=self.instance_name,
7681 # TODO See why hail breaks with a single node below
7682 relocate_from=[self.instance.primary_node,
7683 self.instance.primary_node],
7686 ial.Run(self.lu.op.iallocator)
7689 raise errors.OpPrereqError("Can't compute nodes using"
7690 " iallocator '%s': %s" %
7691 (self.lu.op.iallocator, ial.info),
7693 if len(ial.result) != ial.required_nodes:
7694 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7695 " of nodes (%s), required %s" %
7696 (self.lu.op.iallocator, len(ial.result),
7697 ial.required_nodes), errors.ECODE_FAULT)
7698 self.target_node = ial.result[0]
7699 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7700 self.instance_name, self.lu.op.iallocator,
7701 utils.CommaJoin(ial.result))
7703 def _WaitUntilSync(self):
7704 """Poll with custom rpc for disk sync.
7706 This uses our own step-based rpc call.
7709 self.feedback_fn("* wait until resync is done")
7713 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7715 self.instance.disks)
7717 for node, nres in result.items():
7718 nres.Raise("Cannot resync disks on node %s" % node)
7719 node_done, node_percent = nres.payload
7720 all_done = all_done and node_done
7721 if node_percent is not None:
7722 min_percent = min(min_percent, node_percent)
7724 if min_percent < 100:
7725 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7728 def _EnsureSecondary(self, node):
7729 """Demote a node to secondary.
7732 self.feedback_fn("* switching node %s to secondary mode" % node)
7734 for dev in self.instance.disks:
7735 self.cfg.SetDiskID(dev, node)
7737 result = self.rpc.call_blockdev_close(node, self.instance.name,
7738 self.instance.disks)
7739 result.Raise("Cannot change disk to secondary on node %s" % node)
7741 def _GoStandalone(self):
7742 """Disconnect from the network.
7745 self.feedback_fn("* changing into standalone mode")
7746 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7747 self.instance.disks)
7748 for node, nres in result.items():
7749 nres.Raise("Cannot disconnect disks node %s" % node)
7751 def _GoReconnect(self, multimaster):
7752 """Reconnect to the network.
7758 msg = "single-master"
7759 self.feedback_fn("* changing disks into %s mode" % msg)
7760 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7761 self.instance.disks,
7762 self.instance.name, multimaster)
7763 for node, nres in result.items():
7764 nres.Raise("Cannot change disks config on node %s" % node)
7766 def _ExecCleanup(self):
7767 """Try to cleanup after a failed migration.
7769 The cleanup is done by:
7770 - check that the instance is running only on one node
7771 (and update the config if needed)
7772 - change disks on its secondary node to secondary
7773 - wait until disks are fully synchronized
7774 - disconnect from the network
7775 - change disks into single-master mode
7776 - wait again until disks are fully synchronized
7779 instance = self.instance
7780 target_node = self.target_node
7781 source_node = self.source_node
7783 # check running on only one node
7784 self.feedback_fn("* checking where the instance actually runs"
7785 " (if this hangs, the hypervisor might be in"
7787 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7788 for node, result in ins_l.items():
7789 result.Raise("Can't contact node %s" % node)
7791 runningon_source = instance.name in ins_l[source_node].payload
7792 runningon_target = instance.name in ins_l[target_node].payload
7794 if runningon_source and runningon_target:
7795 raise errors.OpExecError("Instance seems to be running on two nodes,"
7796 " or the hypervisor is confused; you will have"
7797 " to ensure manually that it runs only on one"
7798 " and restart this operation")
7800 if not (runningon_source or runningon_target):
7801 raise errors.OpExecError("Instance does not seem to be running at all;"
7802 " in this case it's safer to repair by"
7803 " running 'gnt-instance stop' to ensure disk"
7804 " shutdown, and then restarting it")
7806 if runningon_target:
7807 # the migration has actually succeeded, we need to update the config
7808 self.feedback_fn("* instance running on secondary node (%s),"
7809 " updating config" % target_node)
7810 instance.primary_node = target_node
7811 self.cfg.Update(instance, self.feedback_fn)
7812 demoted_node = source_node
7814 self.feedback_fn("* instance confirmed to be running on its"
7815 " primary node (%s)" % source_node)
7816 demoted_node = target_node
7818 if instance.disk_template in constants.DTS_INT_MIRROR:
7819 self._EnsureSecondary(demoted_node)
7821 self._WaitUntilSync()
7822 except errors.OpExecError:
7823 # we ignore here errors, since if the device is standalone, it
7824 # won't be able to sync
7826 self._GoStandalone()
7827 self._GoReconnect(False)
7828 self._WaitUntilSync()
7830 self.feedback_fn("* done")
7832 def _RevertDiskStatus(self):
7833 """Try to revert the disk status after a failed migration.
7836 target_node = self.target_node
7837 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7841 self._EnsureSecondary(target_node)
7842 self._GoStandalone()
7843 self._GoReconnect(False)
7844 self._WaitUntilSync()
7845 except errors.OpExecError, err:
7846 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7847 " please try to recover the instance manually;"
7848 " error '%s'" % str(err))
7850 def _AbortMigration(self):
7851 """Call the hypervisor code to abort a started migration.
7854 instance = self.instance
7855 target_node = self.target_node
7856 source_node = self.source_node
7857 migration_info = self.migration_info
7859 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7863 abort_msg = abort_result.fail_msg
7865 logging.error("Aborting migration failed on target node %s: %s",
7866 target_node, abort_msg)
7867 # Don't raise an exception here, as we stil have to try to revert the
7868 # disk status, even if this step failed.
7870 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7871 instance, False, self.live)
7872 abort_msg = abort_result.fail_msg
7874 logging.error("Aborting migration failed on source node %s: %s",
7875 source_node, abort_msg)
7877 def _ExecMigration(self):
7878 """Migrate an instance.
7880 The migrate is done by:
7881 - change the disks into dual-master mode
7882 - wait until disks are fully synchronized again
7883 - migrate the instance
7884 - change disks on the new secondary node (the old primary) to secondary
7885 - wait until disks are fully synchronized
7886 - change disks into single-master mode
7889 instance = self.instance
7890 target_node = self.target_node
7891 source_node = self.source_node
7893 # Check for hypervisor version mismatch and warn the user.
7894 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7895 None, [self.instance.hypervisor])
7896 for ninfo in nodeinfo.values():
7897 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7899 (_, _, (src_info, )) = nodeinfo[source_node].payload
7900 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7902 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7903 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7904 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7905 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7906 if src_version != dst_version:
7907 self.feedback_fn("* warning: hypervisor version mismatch between"
7908 " source (%s) and target (%s) node" %
7909 (src_version, dst_version))
7911 self.feedback_fn("* checking disk consistency between source and target")
7912 for dev in instance.disks:
7913 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7914 raise errors.OpExecError("Disk %s is degraded or not fully"
7915 " synchronized on target node,"
7916 " aborting migration" % dev.iv_name)
7918 # First get the migration information from the remote node
7919 result = self.rpc.call_migration_info(source_node, instance)
7920 msg = result.fail_msg
7922 log_err = ("Failed fetching source migration information from %s: %s" %
7924 logging.error(log_err)
7925 raise errors.OpExecError(log_err)
7927 self.migration_info = migration_info = result.payload
7929 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7930 # Then switch the disks to master/master mode
7931 self._EnsureSecondary(target_node)
7932 self._GoStandalone()
7933 self._GoReconnect(True)
7934 self._WaitUntilSync()
7936 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7937 result = self.rpc.call_accept_instance(target_node,
7940 self.nodes_ip[target_node])
7942 msg = result.fail_msg
7944 logging.error("Instance pre-migration failed, trying to revert"
7945 " disk status: %s", msg)
7946 self.feedback_fn("Pre-migration failed, aborting")
7947 self._AbortMigration()
7948 self._RevertDiskStatus()
7949 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7950 (instance.name, msg))
7952 self.feedback_fn("* migrating instance to %s" % target_node)
7953 result = self.rpc.call_instance_migrate(source_node, instance,
7954 self.nodes_ip[target_node],
7956 msg = result.fail_msg
7958 logging.error("Instance migration failed, trying to revert"
7959 " disk status: %s", msg)
7960 self.feedback_fn("Migration failed, aborting")
7961 self._AbortMigration()
7962 self._RevertDiskStatus()
7963 raise errors.OpExecError("Could not migrate instance %s: %s" %
7964 (instance.name, msg))
7966 self.feedback_fn("* starting memory transfer")
7967 last_feedback = time.time()
7969 result = self.rpc.call_instance_get_migration_status(source_node,
7971 msg = result.fail_msg
7972 ms = result.payload # MigrationStatus instance
7973 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7974 logging.error("Instance migration failed, trying to revert"
7975 " disk status: %s", msg)
7976 self.feedback_fn("Migration failed, aborting")
7977 self._AbortMigration()
7978 self._RevertDiskStatus()
7979 raise errors.OpExecError("Could not migrate instance %s: %s" %
7980 (instance.name, msg))
7982 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7983 self.feedback_fn("* memory transfer complete")
7986 if (utils.TimeoutExpired(last_feedback,
7987 self._MIGRATION_FEEDBACK_INTERVAL) and
7988 ms.transferred_ram is not None):
7989 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7990 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7991 last_feedback = time.time()
7993 time.sleep(self._MIGRATION_POLL_INTERVAL)
7995 result = self.rpc.call_instance_finalize_migration_src(source_node,
7999 msg = result.fail_msg
8001 logging.error("Instance migration succeeded, but finalization failed"
8002 " on the source node: %s", msg)
8003 raise errors.OpExecError("Could not finalize instance migration: %s" %
8006 instance.primary_node = target_node
8008 # distribute new instance config to the other nodes
8009 self.cfg.Update(instance, self.feedback_fn)
8011 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8015 msg = result.fail_msg
8017 logging.error("Instance migration succeeded, but finalization failed"
8018 " on the target node: %s", msg)
8019 raise errors.OpExecError("Could not finalize instance migration: %s" %
8022 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8023 self._EnsureSecondary(source_node)
8024 self._WaitUntilSync()
8025 self._GoStandalone()
8026 self._GoReconnect(False)
8027 self._WaitUntilSync()
8029 self.feedback_fn("* done")
8031 def _ExecFailover(self):
8032 """Failover an instance.
8034 The failover is done by shutting it down on its present node and
8035 starting it on the secondary.
8038 instance = self.instance
8039 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8041 source_node = instance.primary_node
8042 target_node = self.target_node
8044 if instance.admin_state == constants.ADMINST_UP:
8045 self.feedback_fn("* checking disk consistency between source and target")
8046 for dev in instance.disks:
8047 # for drbd, these are drbd over lvm
8048 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8049 if primary_node.offline:
8050 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8052 (primary_node.name, dev.iv_name, target_node))
8053 elif not self.ignore_consistency:
8054 raise errors.OpExecError("Disk %s is degraded on target node,"
8055 " aborting failover" % dev.iv_name)
8057 self.feedback_fn("* not checking disk consistency as instance is not"
8060 self.feedback_fn("* shutting down instance on source node")
8061 logging.info("Shutting down instance %s on node %s",
8062 instance.name, source_node)
8064 result = self.rpc.call_instance_shutdown(source_node, instance,
8065 self.shutdown_timeout)
8066 msg = result.fail_msg
8068 if self.ignore_consistency or primary_node.offline:
8069 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8070 " proceeding anyway; please make sure node"
8071 " %s is down; error details: %s",
8072 instance.name, source_node, source_node, msg)
8074 raise errors.OpExecError("Could not shutdown instance %s on"
8076 (instance.name, source_node, msg))
8078 self.feedback_fn("* deactivating the instance's disks on source node")
8079 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8080 raise errors.OpExecError("Can't shut down the instance's disks")
8082 instance.primary_node = target_node
8083 # distribute new instance config to the other nodes
8084 self.cfg.Update(instance, self.feedback_fn)
8086 # Only start the instance if it's marked as up
8087 if instance.admin_state == constants.ADMINST_UP:
8088 self.feedback_fn("* activating the instance's disks on target node %s" %
8090 logging.info("Starting instance %s on node %s",
8091 instance.name, target_node)
8093 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8094 ignore_secondaries=True)
8096 _ShutdownInstanceDisks(self.lu, instance)
8097 raise errors.OpExecError("Can't activate the instance's disks")
8099 self.feedback_fn("* starting the instance on the target node %s" %
8101 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8103 msg = result.fail_msg
8105 _ShutdownInstanceDisks(self.lu, instance)
8106 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8107 (instance.name, target_node, msg))
8109 def Exec(self, feedback_fn):
8110 """Perform the migration.
8113 self.feedback_fn = feedback_fn
8114 self.source_node = self.instance.primary_node
8116 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8117 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8118 self.target_node = self.instance.secondary_nodes[0]
8119 # Otherwise self.target_node has been populated either
8120 # directly, or through an iallocator.
8122 self.all_nodes = [self.source_node, self.target_node]
8123 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8124 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8127 feedback_fn("Failover instance %s" % self.instance.name)
8128 self._ExecFailover()
8130 feedback_fn("Migrating instance %s" % self.instance.name)
8133 return self._ExecCleanup()
8135 return self._ExecMigration()
8138 def _CreateBlockDev(lu, node, instance, device, force_create,
8140 """Create a tree of block devices on a given node.
8142 If this device type has to be created on secondaries, create it and
8145 If not, just recurse to children keeping the same 'force' value.
8147 @param lu: the lu on whose behalf we execute
8148 @param node: the node on which to create the device
8149 @type instance: L{objects.Instance}
8150 @param instance: the instance which owns the device
8151 @type device: L{objects.Disk}
8152 @param device: the device to create
8153 @type force_create: boolean
8154 @param force_create: whether to force creation of this device; this
8155 will be change to True whenever we find a device which has
8156 CreateOnSecondary() attribute
8157 @param info: the extra 'metadata' we should attach to the device
8158 (this will be represented as a LVM tag)
8159 @type force_open: boolean
8160 @param force_open: this parameter will be passes to the
8161 L{backend.BlockdevCreate} function where it specifies
8162 whether we run on primary or not, and it affects both
8163 the child assembly and the device own Open() execution
8166 if device.CreateOnSecondary():
8170 for child in device.children:
8171 _CreateBlockDev(lu, node, instance, child, force_create,
8174 if not force_create:
8177 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8180 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8181 """Create a single block device on a given node.
8183 This will not recurse over children of the device, so they must be
8186 @param lu: the lu on whose behalf we execute
8187 @param node: the node on which to create the device
8188 @type instance: L{objects.Instance}
8189 @param instance: the instance which owns the device
8190 @type device: L{objects.Disk}
8191 @param device: the device to create
8192 @param info: the extra 'metadata' we should attach to the device
8193 (this will be represented as a LVM tag)
8194 @type force_open: boolean
8195 @param force_open: this parameter will be passes to the
8196 L{backend.BlockdevCreate} function where it specifies
8197 whether we run on primary or not, and it affects both
8198 the child assembly and the device own Open() execution
8201 lu.cfg.SetDiskID(device, node)
8202 result = lu.rpc.call_blockdev_create(node, device, device.size,
8203 instance.name, force_open, info)
8204 result.Raise("Can't create block device %s on"
8205 " node %s for instance %s" % (device, node, instance.name))
8206 if device.physical_id is None:
8207 device.physical_id = result.payload
8210 def _GenerateUniqueNames(lu, exts):
8211 """Generate a suitable LV name.
8213 This will generate a logical volume name for the given instance.
8218 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8219 results.append("%s%s" % (new_id, val))
8223 def _ComputeLDParams(disk_template, disk_params):
8224 """Computes Logical Disk parameters from Disk Template parameters.
8226 @type disk_template: string
8227 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8228 @type disk_params: dict
8229 @param disk_params: disk template parameters; dict(template_name -> parameters
8231 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8232 contains the LD parameters of the node. The tree is flattened in-order.
8235 if disk_template not in constants.DISK_TEMPLATES:
8236 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8239 dt_params = disk_params[disk_template]
8240 if disk_template == constants.DT_DRBD8:
8242 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8243 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8244 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8245 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8246 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8247 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8248 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8252 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8255 result.append(drbd_params)
8259 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8262 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8264 result.append(data_params)
8268 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8271 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8273 result.append(meta_params)
8275 elif (disk_template == constants.DT_FILE or
8276 disk_template == constants.DT_SHARED_FILE):
8277 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8279 elif disk_template == constants.DT_PLAIN:
8281 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8284 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8286 result.append(params)
8288 elif disk_template == constants.DT_BLOCK:
8289 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8294 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8295 iv_name, p_minor, s_minor, drbd_params, data_params,
8297 """Generate a drbd8 device complete with its children.
8300 assert len(vgnames) == len(names) == 2
8301 port = lu.cfg.AllocatePort()
8302 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8304 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8305 logical_id=(vgnames[0], names[0]),
8307 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8308 logical_id=(vgnames[1], names[1]),
8310 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8311 logical_id=(primary, secondary, port,
8314 children=[dev_data, dev_meta],
8315 iv_name=iv_name, params=drbd_params)
8319 def _GenerateDiskTemplate(lu, template_name,
8320 instance_name, primary_node,
8321 secondary_nodes, disk_info,
8322 file_storage_dir, file_driver,
8323 base_index, feedback_fn, disk_params):
8324 """Generate the entire disk layout for a given template type.
8327 #TODO: compute space requirements
8329 vgname = lu.cfg.GetVGName()
8330 disk_count = len(disk_info)
8332 ld_params = _ComputeLDParams(template_name, disk_params)
8333 if template_name == constants.DT_DISKLESS:
8335 elif template_name == constants.DT_PLAIN:
8336 if len(secondary_nodes) != 0:
8337 raise errors.ProgrammerError("Wrong template configuration")
8339 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8340 for i in range(disk_count)])
8341 for idx, disk in enumerate(disk_info):
8342 disk_index = idx + base_index
8343 vg = disk.get(constants.IDISK_VG, vgname)
8344 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8345 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8346 size=disk[constants.IDISK_SIZE],
8347 logical_id=(vg, names[idx]),
8348 iv_name="disk/%d" % disk_index,
8349 mode=disk[constants.IDISK_MODE],
8350 params=ld_params[0])
8351 disks.append(disk_dev)
8352 elif template_name == constants.DT_DRBD8:
8353 drbd_params, data_params, meta_params = ld_params
8354 if len(secondary_nodes) != 1:
8355 raise errors.ProgrammerError("Wrong template configuration")
8356 remote_node = secondary_nodes[0]
8357 minors = lu.cfg.AllocateDRBDMinor(
8358 [primary_node, remote_node] * len(disk_info), instance_name)
8361 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8362 for i in range(disk_count)]):
8363 names.append(lv_prefix + "_data")
8364 names.append(lv_prefix + "_meta")
8365 for idx, disk in enumerate(disk_info):
8366 disk_index = idx + base_index
8367 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8368 data_vg = disk.get(constants.IDISK_VG, vgname)
8369 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8370 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8371 disk[constants.IDISK_SIZE],
8373 names[idx * 2:idx * 2 + 2],
8374 "disk/%d" % disk_index,
8375 minors[idx * 2], minors[idx * 2 + 1],
8376 drbd_params, data_params, meta_params)
8377 disk_dev.mode = disk[constants.IDISK_MODE]
8378 disks.append(disk_dev)
8379 elif template_name == constants.DT_FILE:
8380 if len(secondary_nodes) != 0:
8381 raise errors.ProgrammerError("Wrong template configuration")
8383 opcodes.RequireFileStorage()
8385 for idx, disk in enumerate(disk_info):
8386 disk_index = idx + base_index
8387 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8388 size=disk[constants.IDISK_SIZE],
8389 iv_name="disk/%d" % disk_index,
8390 logical_id=(file_driver,
8391 "%s/disk%d" % (file_storage_dir,
8393 mode=disk[constants.IDISK_MODE],
8394 params=ld_params[0])
8395 disks.append(disk_dev)
8396 elif template_name == constants.DT_SHARED_FILE:
8397 if len(secondary_nodes) != 0:
8398 raise errors.ProgrammerError("Wrong template configuration")
8400 opcodes.RequireSharedFileStorage()
8402 for idx, disk in enumerate(disk_info):
8403 disk_index = idx + base_index
8404 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8405 size=disk[constants.IDISK_SIZE],
8406 iv_name="disk/%d" % disk_index,
8407 logical_id=(file_driver,
8408 "%s/disk%d" % (file_storage_dir,
8410 mode=disk[constants.IDISK_MODE],
8411 params=ld_params[0])
8412 disks.append(disk_dev)
8413 elif template_name == constants.DT_BLOCK:
8414 if len(secondary_nodes) != 0:
8415 raise errors.ProgrammerError("Wrong template configuration")
8417 for idx, disk in enumerate(disk_info):
8418 disk_index = idx + base_index
8419 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8420 size=disk[constants.IDISK_SIZE],
8421 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8422 disk[constants.IDISK_ADOPT]),
8423 iv_name="disk/%d" % disk_index,
8424 mode=disk[constants.IDISK_MODE],
8425 params=ld_params[0])
8426 disks.append(disk_dev)
8429 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8433 def _GetInstanceInfoText(instance):
8434 """Compute that text that should be added to the disk's metadata.
8437 return "originstname+%s" % instance.name
8440 def _CalcEta(time_taken, written, total_size):
8441 """Calculates the ETA based on size written and total size.
8443 @param time_taken: The time taken so far
8444 @param written: amount written so far
8445 @param total_size: The total size of data to be written
8446 @return: The remaining time in seconds
8449 avg_time = time_taken / float(written)
8450 return (total_size - written) * avg_time
8453 def _WipeDisks(lu, instance):
8454 """Wipes instance disks.
8456 @type lu: L{LogicalUnit}
8457 @param lu: the logical unit on whose behalf we execute
8458 @type instance: L{objects.Instance}
8459 @param instance: the instance whose disks we should create
8460 @return: the success of the wipe
8463 node = instance.primary_node
8465 for device in instance.disks:
8466 lu.cfg.SetDiskID(device, node)
8468 logging.info("Pause sync of instance %s disks", instance.name)
8469 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8471 for idx, success in enumerate(result.payload):
8473 logging.warn("pause-sync of instance %s for disks %d failed",
8477 for idx, device in enumerate(instance.disks):
8478 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8479 # MAX_WIPE_CHUNK at max
8480 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8481 constants.MIN_WIPE_CHUNK_PERCENT)
8482 # we _must_ make this an int, otherwise rounding errors will
8484 wipe_chunk_size = int(wipe_chunk_size)
8486 lu.LogInfo("* Wiping disk %d", idx)
8487 logging.info("Wiping disk %d for instance %s, node %s using"
8488 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8493 start_time = time.time()
8495 while offset < size:
8496 wipe_size = min(wipe_chunk_size, size - offset)
8497 logging.debug("Wiping disk %d, offset %s, chunk %s",
8498 idx, offset, wipe_size)
8499 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8500 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8501 (idx, offset, wipe_size))
8504 if now - last_output >= 60:
8505 eta = _CalcEta(now - start_time, offset, size)
8506 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8507 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8510 logging.info("Resume sync of instance %s disks", instance.name)
8512 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8514 for idx, success in enumerate(result.payload):
8516 lu.LogWarning("Resume sync of disk %d failed, please have a"
8517 " look at the status and troubleshoot the issue", idx)
8518 logging.warn("resume-sync of instance %s for disks %d failed",
8522 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8523 """Create all disks for an instance.
8525 This abstracts away some work from AddInstance.
8527 @type lu: L{LogicalUnit}
8528 @param lu: the logical unit on whose behalf we execute
8529 @type instance: L{objects.Instance}
8530 @param instance: the instance whose disks we should create
8532 @param to_skip: list of indices to skip
8533 @type target_node: string
8534 @param target_node: if passed, overrides the target node for creation
8536 @return: the success of the creation
8539 info = _GetInstanceInfoText(instance)
8540 if target_node is None:
8541 pnode = instance.primary_node
8542 all_nodes = instance.all_nodes
8547 if instance.disk_template in constants.DTS_FILEBASED:
8548 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8549 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8551 result.Raise("Failed to create directory '%s' on"
8552 " node %s" % (file_storage_dir, pnode))
8554 # Note: this needs to be kept in sync with adding of disks in
8555 # LUInstanceSetParams
8556 for idx, device in enumerate(instance.disks):
8557 if to_skip and idx in to_skip:
8559 logging.info("Creating volume %s for instance %s",
8560 device.iv_name, instance.name)
8562 for node in all_nodes:
8563 f_create = node == pnode
8564 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8567 def _RemoveDisks(lu, instance, target_node=None):
8568 """Remove all disks for an instance.
8570 This abstracts away some work from `AddInstance()` and
8571 `RemoveInstance()`. Note that in case some of the devices couldn't
8572 be removed, the removal will continue with the other ones (compare
8573 with `_CreateDisks()`).
8575 @type lu: L{LogicalUnit}
8576 @param lu: the logical unit on whose behalf we execute
8577 @type instance: L{objects.Instance}
8578 @param instance: the instance whose disks we should remove
8579 @type target_node: string
8580 @param target_node: used to override the node on which to remove the disks
8582 @return: the success of the removal
8585 logging.info("Removing block devices for instance %s", instance.name)
8588 for device in instance.disks:
8590 edata = [(target_node, device)]
8592 edata = device.ComputeNodeTree(instance.primary_node)
8593 for node, disk in edata:
8594 lu.cfg.SetDiskID(disk, node)
8595 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8597 lu.LogWarning("Could not remove block device %s on node %s,"
8598 " continuing anyway: %s", device.iv_name, node, msg)
8601 # if this is a DRBD disk, return its port to the pool
8602 if device.dev_type in constants.LDS_DRBD:
8603 tcp_port = device.logical_id[2]
8604 lu.cfg.AddTcpUdpPort(tcp_port)
8606 if instance.disk_template == constants.DT_FILE:
8607 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8611 tgt = instance.primary_node
8612 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8614 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8615 file_storage_dir, instance.primary_node, result.fail_msg)
8621 def _ComputeDiskSizePerVG(disk_template, disks):
8622 """Compute disk size requirements in the volume group
8625 def _compute(disks, payload):
8626 """Universal algorithm.
8631 vgs[disk[constants.IDISK_VG]] = \
8632 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8636 # Required free disk space as a function of disk and swap space
8638 constants.DT_DISKLESS: {},
8639 constants.DT_PLAIN: _compute(disks, 0),
8640 # 128 MB are added for drbd metadata for each disk
8641 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8642 constants.DT_FILE: {},
8643 constants.DT_SHARED_FILE: {},
8646 if disk_template not in req_size_dict:
8647 raise errors.ProgrammerError("Disk template '%s' size requirement"
8648 " is unknown" % disk_template)
8650 return req_size_dict[disk_template]
8653 def _ComputeDiskSize(disk_template, disks):
8654 """Compute disk size requirements in the volume group
8657 # Required free disk space as a function of disk and swap space
8659 constants.DT_DISKLESS: None,
8660 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8661 # 128 MB are added for drbd metadata for each disk
8663 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8664 constants.DT_FILE: None,
8665 constants.DT_SHARED_FILE: 0,
8666 constants.DT_BLOCK: 0,
8669 if disk_template not in req_size_dict:
8670 raise errors.ProgrammerError("Disk template '%s' size requirement"
8671 " is unknown" % disk_template)
8673 return req_size_dict[disk_template]
8676 def _FilterVmNodes(lu, nodenames):
8677 """Filters out non-vm_capable nodes from a list.
8679 @type lu: L{LogicalUnit}
8680 @param lu: the logical unit for which we check
8681 @type nodenames: list
8682 @param nodenames: the list of nodes on which we should check
8684 @return: the list of vm-capable nodes
8687 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8688 return [name for name in nodenames if name not in vm_nodes]
8691 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8692 """Hypervisor parameter validation.
8694 This function abstract the hypervisor parameter validation to be
8695 used in both instance create and instance modify.
8697 @type lu: L{LogicalUnit}
8698 @param lu: the logical unit for which we check
8699 @type nodenames: list
8700 @param nodenames: the list of nodes on which we should check
8701 @type hvname: string
8702 @param hvname: the name of the hypervisor we should use
8703 @type hvparams: dict
8704 @param hvparams: the parameters which we need to check
8705 @raise errors.OpPrereqError: if the parameters are not valid
8708 nodenames = _FilterVmNodes(lu, nodenames)
8710 cluster = lu.cfg.GetClusterInfo()
8711 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8713 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8714 for node in nodenames:
8718 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8721 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8722 """OS parameters validation.
8724 @type lu: L{LogicalUnit}
8725 @param lu: the logical unit for which we check
8726 @type required: boolean
8727 @param required: whether the validation should fail if the OS is not
8729 @type nodenames: list
8730 @param nodenames: the list of nodes on which we should check
8731 @type osname: string
8732 @param osname: the name of the hypervisor we should use
8733 @type osparams: dict
8734 @param osparams: the parameters which we need to check
8735 @raise errors.OpPrereqError: if the parameters are not valid
8738 nodenames = _FilterVmNodes(lu, nodenames)
8739 result = lu.rpc.call_os_validate(nodenames, required, osname,
8740 [constants.OS_VALIDATE_PARAMETERS],
8742 for node, nres in result.items():
8743 # we don't check for offline cases since this should be run only
8744 # against the master node and/or an instance's nodes
8745 nres.Raise("OS Parameters validation failed on node %s" % node)
8746 if not nres.payload:
8747 lu.LogInfo("OS %s not found on node %s, validation skipped",
8751 class LUInstanceCreate(LogicalUnit):
8752 """Create an instance.
8755 HPATH = "instance-add"
8756 HTYPE = constants.HTYPE_INSTANCE
8759 def CheckArguments(self):
8763 # do not require name_check to ease forward/backward compatibility
8765 if self.op.no_install and self.op.start:
8766 self.LogInfo("No-installation mode selected, disabling startup")
8767 self.op.start = False
8768 # validate/normalize the instance name
8769 self.op.instance_name = \
8770 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8772 if self.op.ip_check and not self.op.name_check:
8773 # TODO: make the ip check more flexible and not depend on the name check
8774 raise errors.OpPrereqError("Cannot do IP address check without a name"
8775 " check", errors.ECODE_INVAL)
8777 # check nics' parameter names
8778 for nic in self.op.nics:
8779 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8781 # check disks. parameter names and consistent adopt/no-adopt strategy
8782 has_adopt = has_no_adopt = False
8783 for disk in self.op.disks:
8784 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8785 if constants.IDISK_ADOPT in disk:
8789 if has_adopt and has_no_adopt:
8790 raise errors.OpPrereqError("Either all disks are adopted or none is",
8793 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8794 raise errors.OpPrereqError("Disk adoption is not supported for the"
8795 " '%s' disk template" %
8796 self.op.disk_template,
8798 if self.op.iallocator is not None:
8799 raise errors.OpPrereqError("Disk adoption not allowed with an"
8800 " iallocator script", errors.ECODE_INVAL)
8801 if self.op.mode == constants.INSTANCE_IMPORT:
8802 raise errors.OpPrereqError("Disk adoption not allowed for"
8803 " instance import", errors.ECODE_INVAL)
8805 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8806 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8807 " but no 'adopt' parameter given" %
8808 self.op.disk_template,
8811 self.adopt_disks = has_adopt
8813 # instance name verification
8814 if self.op.name_check:
8815 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8816 self.op.instance_name = self.hostname1.name
8817 # used in CheckPrereq for ip ping check
8818 self.check_ip = self.hostname1.ip
8820 self.check_ip = None
8822 # file storage checks
8823 if (self.op.file_driver and
8824 not self.op.file_driver in constants.FILE_DRIVER):
8825 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8826 self.op.file_driver, errors.ECODE_INVAL)
8828 if self.op.disk_template == constants.DT_FILE:
8829 opcodes.RequireFileStorage()
8830 elif self.op.disk_template == constants.DT_SHARED_FILE:
8831 opcodes.RequireSharedFileStorage()
8833 ### Node/iallocator related checks
8834 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8836 if self.op.pnode is not None:
8837 if self.op.disk_template in constants.DTS_INT_MIRROR:
8838 if self.op.snode is None:
8839 raise errors.OpPrereqError("The networked disk templates need"
8840 " a mirror node", errors.ECODE_INVAL)
8842 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8844 self.op.snode = None
8846 self._cds = _GetClusterDomainSecret()
8848 if self.op.mode == constants.INSTANCE_IMPORT:
8849 # On import force_variant must be True, because if we forced it at
8850 # initial install, our only chance when importing it back is that it
8852 self.op.force_variant = True
8854 if self.op.no_install:
8855 self.LogInfo("No-installation mode has no effect during import")
8857 elif self.op.mode == constants.INSTANCE_CREATE:
8858 if self.op.os_type is None:
8859 raise errors.OpPrereqError("No guest OS specified",
8861 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8862 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8863 " installation" % self.op.os_type,
8865 if self.op.disk_template is None:
8866 raise errors.OpPrereqError("No disk template specified",
8869 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8870 # Check handshake to ensure both clusters have the same domain secret
8871 src_handshake = self.op.source_handshake
8872 if not src_handshake:
8873 raise errors.OpPrereqError("Missing source handshake",
8876 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8879 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8882 # Load and check source CA
8883 self.source_x509_ca_pem = self.op.source_x509_ca
8884 if not self.source_x509_ca_pem:
8885 raise errors.OpPrereqError("Missing source X509 CA",
8889 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8891 except OpenSSL.crypto.Error, err:
8892 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8893 (err, ), errors.ECODE_INVAL)
8895 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8896 if errcode is not None:
8897 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8900 self.source_x509_ca = cert
8902 src_instance_name = self.op.source_instance_name
8903 if not src_instance_name:
8904 raise errors.OpPrereqError("Missing source instance name",
8907 self.source_instance_name = \
8908 netutils.GetHostname(name=src_instance_name).name
8911 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8912 self.op.mode, errors.ECODE_INVAL)
8914 def ExpandNames(self):
8915 """ExpandNames for CreateInstance.
8917 Figure out the right locks for instance creation.
8920 self.needed_locks = {}
8922 instance_name = self.op.instance_name
8923 # this is just a preventive check, but someone might still add this
8924 # instance in the meantime, and creation will fail at lock-add time
8925 if instance_name in self.cfg.GetInstanceList():
8926 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8927 instance_name, errors.ECODE_EXISTS)
8929 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8931 if self.op.iallocator:
8932 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8933 # specifying a group on instance creation and then selecting nodes from
8935 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8936 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8938 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8939 nodelist = [self.op.pnode]
8940 if self.op.snode is not None:
8941 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8942 nodelist.append(self.op.snode)
8943 self.needed_locks[locking.LEVEL_NODE] = nodelist
8944 # Lock resources of instance's primary and secondary nodes (copy to
8945 # prevent accidential modification)
8946 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8948 # in case of import lock the source node too
8949 if self.op.mode == constants.INSTANCE_IMPORT:
8950 src_node = self.op.src_node
8951 src_path = self.op.src_path
8953 if src_path is None:
8954 self.op.src_path = src_path = self.op.instance_name
8956 if src_node is None:
8957 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8958 self.op.src_node = None
8959 if os.path.isabs(src_path):
8960 raise errors.OpPrereqError("Importing an instance from a path"
8961 " requires a source node option",
8964 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8965 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8966 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8967 if not os.path.isabs(src_path):
8968 self.op.src_path = src_path = \
8969 utils.PathJoin(constants.EXPORT_DIR, src_path)
8971 def _RunAllocator(self):
8972 """Run the allocator based on input opcode.
8975 nics = [n.ToDict() for n in self.nics]
8976 ial = IAllocator(self.cfg, self.rpc,
8977 mode=constants.IALLOCATOR_MODE_ALLOC,
8978 name=self.op.instance_name,
8979 disk_template=self.op.disk_template,
8982 vcpus=self.be_full[constants.BE_VCPUS],
8983 memory=self.be_full[constants.BE_MAXMEM],
8986 hypervisor=self.op.hypervisor,
8989 ial.Run(self.op.iallocator)
8992 raise errors.OpPrereqError("Can't compute nodes using"
8993 " iallocator '%s': %s" %
8994 (self.op.iallocator, ial.info),
8996 if len(ial.result) != ial.required_nodes:
8997 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8998 " of nodes (%s), required %s" %
8999 (self.op.iallocator, len(ial.result),
9000 ial.required_nodes), errors.ECODE_FAULT)
9001 self.op.pnode = ial.result[0]
9002 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9003 self.op.instance_name, self.op.iallocator,
9004 utils.CommaJoin(ial.result))
9005 if ial.required_nodes == 2:
9006 self.op.snode = ial.result[1]
9008 def BuildHooksEnv(self):
9011 This runs on master, primary and secondary nodes of the instance.
9015 "ADD_MODE": self.op.mode,
9017 if self.op.mode == constants.INSTANCE_IMPORT:
9018 env["SRC_NODE"] = self.op.src_node
9019 env["SRC_PATH"] = self.op.src_path
9020 env["SRC_IMAGES"] = self.src_images
9022 env.update(_BuildInstanceHookEnv(
9023 name=self.op.instance_name,
9024 primary_node=self.op.pnode,
9025 secondary_nodes=self.secondaries,
9026 status=self.op.start,
9027 os_type=self.op.os_type,
9028 minmem=self.be_full[constants.BE_MINMEM],
9029 maxmem=self.be_full[constants.BE_MAXMEM],
9030 vcpus=self.be_full[constants.BE_VCPUS],
9031 nics=_NICListToTuple(self, self.nics),
9032 disk_template=self.op.disk_template,
9033 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9034 for d in self.disks],
9037 hypervisor_name=self.op.hypervisor,
9043 def BuildHooksNodes(self):
9044 """Build hooks nodes.
9047 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9050 def _ReadExportInfo(self):
9051 """Reads the export information from disk.
9053 It will override the opcode source node and path with the actual
9054 information, if these two were not specified before.
9056 @return: the export information
9059 assert self.op.mode == constants.INSTANCE_IMPORT
9061 src_node = self.op.src_node
9062 src_path = self.op.src_path
9064 if src_node is None:
9065 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9066 exp_list = self.rpc.call_export_list(locked_nodes)
9068 for node in exp_list:
9069 if exp_list[node].fail_msg:
9071 if src_path in exp_list[node].payload:
9073 self.op.src_node = src_node = node
9074 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9078 raise errors.OpPrereqError("No export found for relative path %s" %
9079 src_path, errors.ECODE_INVAL)
9081 _CheckNodeOnline(self, src_node)
9082 result = self.rpc.call_export_info(src_node, src_path)
9083 result.Raise("No export or invalid export found in dir %s" % src_path)
9085 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9086 if not export_info.has_section(constants.INISECT_EXP):
9087 raise errors.ProgrammerError("Corrupted export config",
9088 errors.ECODE_ENVIRON)
9090 ei_version = export_info.get(constants.INISECT_EXP, "version")
9091 if (int(ei_version) != constants.EXPORT_VERSION):
9092 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9093 (ei_version, constants.EXPORT_VERSION),
9094 errors.ECODE_ENVIRON)
9097 def _ReadExportParams(self, einfo):
9098 """Use export parameters as defaults.
9100 In case the opcode doesn't specify (as in override) some instance
9101 parameters, then try to use them from the export information, if
9105 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9107 if self.op.disk_template is None:
9108 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9109 self.op.disk_template = einfo.get(constants.INISECT_INS,
9111 if self.op.disk_template not in constants.DISK_TEMPLATES:
9112 raise errors.OpPrereqError("Disk template specified in configuration"
9113 " file is not one of the allowed values:"
9114 " %s" % " ".join(constants.DISK_TEMPLATES))
9116 raise errors.OpPrereqError("No disk template specified and the export"
9117 " is missing the disk_template information",
9120 if not self.op.disks:
9122 # TODO: import the disk iv_name too
9123 for idx in range(constants.MAX_DISKS):
9124 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9125 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9126 disks.append({constants.IDISK_SIZE: disk_sz})
9127 self.op.disks = disks
9128 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9129 raise errors.OpPrereqError("No disk info specified and the export"
9130 " is missing the disk information",
9133 if not self.op.nics:
9135 for idx in range(constants.MAX_NICS):
9136 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9138 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9139 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9146 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9147 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9149 if (self.op.hypervisor is None and
9150 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9151 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9153 if einfo.has_section(constants.INISECT_HYP):
9154 # use the export parameters but do not override the ones
9155 # specified by the user
9156 for name, value in einfo.items(constants.INISECT_HYP):
9157 if name not in self.op.hvparams:
9158 self.op.hvparams[name] = value
9160 if einfo.has_section(constants.INISECT_BEP):
9161 # use the parameters, without overriding
9162 for name, value in einfo.items(constants.INISECT_BEP):
9163 if name not in self.op.beparams:
9164 self.op.beparams[name] = value
9165 # Compatibility for the old "memory" be param
9166 if name == constants.BE_MEMORY:
9167 if constants.BE_MAXMEM not in self.op.beparams:
9168 self.op.beparams[constants.BE_MAXMEM] = value
9169 if constants.BE_MINMEM not in self.op.beparams:
9170 self.op.beparams[constants.BE_MINMEM] = value
9172 # try to read the parameters old style, from the main section
9173 for name in constants.BES_PARAMETERS:
9174 if (name not in self.op.beparams and
9175 einfo.has_option(constants.INISECT_INS, name)):
9176 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9178 if einfo.has_section(constants.INISECT_OSP):
9179 # use the parameters, without overriding
9180 for name, value in einfo.items(constants.INISECT_OSP):
9181 if name not in self.op.osparams:
9182 self.op.osparams[name] = value
9184 def _RevertToDefaults(self, cluster):
9185 """Revert the instance parameters to the default values.
9189 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9190 for name in self.op.hvparams.keys():
9191 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9192 del self.op.hvparams[name]
9194 be_defs = cluster.SimpleFillBE({})
9195 for name in self.op.beparams.keys():
9196 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9197 del self.op.beparams[name]
9199 nic_defs = cluster.SimpleFillNIC({})
9200 for nic in self.op.nics:
9201 for name in constants.NICS_PARAMETERS:
9202 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9205 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9206 for name in self.op.osparams.keys():
9207 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9208 del self.op.osparams[name]
9210 def _CalculateFileStorageDir(self):
9211 """Calculate final instance file storage dir.
9214 # file storage dir calculation/check
9215 self.instance_file_storage_dir = None
9216 if self.op.disk_template in constants.DTS_FILEBASED:
9217 # build the full file storage dir path
9220 if self.op.disk_template == constants.DT_SHARED_FILE:
9221 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9223 get_fsd_fn = self.cfg.GetFileStorageDir
9225 cfg_storagedir = get_fsd_fn()
9226 if not cfg_storagedir:
9227 raise errors.OpPrereqError("Cluster file storage dir not defined")
9228 joinargs.append(cfg_storagedir)
9230 if self.op.file_storage_dir is not None:
9231 joinargs.append(self.op.file_storage_dir)
9233 joinargs.append(self.op.instance_name)
9235 # pylint: disable=W0142
9236 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9238 def CheckPrereq(self):
9239 """Check prerequisites.
9242 self._CalculateFileStorageDir()
9244 if self.op.mode == constants.INSTANCE_IMPORT:
9245 export_info = self._ReadExportInfo()
9246 self._ReadExportParams(export_info)
9248 if (not self.cfg.GetVGName() and
9249 self.op.disk_template not in constants.DTS_NOT_LVM):
9250 raise errors.OpPrereqError("Cluster does not support lvm-based"
9251 " instances", errors.ECODE_STATE)
9253 if (self.op.hypervisor is None or
9254 self.op.hypervisor == constants.VALUE_AUTO):
9255 self.op.hypervisor = self.cfg.GetHypervisorType()
9257 cluster = self.cfg.GetClusterInfo()
9258 enabled_hvs = cluster.enabled_hypervisors
9259 if self.op.hypervisor not in enabled_hvs:
9260 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9261 " cluster (%s)" % (self.op.hypervisor,
9262 ",".join(enabled_hvs)),
9265 # Check tag validity
9266 for tag in self.op.tags:
9267 objects.TaggableObject.ValidateTag(tag)
9269 # check hypervisor parameter syntax (locally)
9270 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9271 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9273 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9274 hv_type.CheckParameterSyntax(filled_hvp)
9275 self.hv_full = filled_hvp
9276 # check that we don't specify global parameters on an instance
9277 _CheckGlobalHvParams(self.op.hvparams)
9279 # fill and remember the beparams dict
9280 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9281 for param, value in self.op.beparams.iteritems():
9282 if value == constants.VALUE_AUTO:
9283 self.op.beparams[param] = default_beparams[param]
9284 objects.UpgradeBeParams(self.op.beparams)
9285 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9286 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9288 # build os parameters
9289 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9291 # now that hvp/bep are in final format, let's reset to defaults,
9293 if self.op.identify_defaults:
9294 self._RevertToDefaults(cluster)
9298 for idx, nic in enumerate(self.op.nics):
9299 nic_mode_req = nic.get(constants.INIC_MODE, None)
9300 nic_mode = nic_mode_req
9301 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9302 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9304 # in routed mode, for the first nic, the default ip is 'auto'
9305 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9306 default_ip_mode = constants.VALUE_AUTO
9308 default_ip_mode = constants.VALUE_NONE
9310 # ip validity checks
9311 ip = nic.get(constants.INIC_IP, default_ip_mode)
9312 if ip is None or ip.lower() == constants.VALUE_NONE:
9314 elif ip.lower() == constants.VALUE_AUTO:
9315 if not self.op.name_check:
9316 raise errors.OpPrereqError("IP address set to auto but name checks"
9317 " have been skipped",
9319 nic_ip = self.hostname1.ip
9321 if not netutils.IPAddress.IsValid(ip):
9322 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9326 # TODO: check the ip address for uniqueness
9327 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9328 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9331 # MAC address verification
9332 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9333 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9334 mac = utils.NormalizeAndValidateMac(mac)
9337 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9338 except errors.ReservationError:
9339 raise errors.OpPrereqError("MAC address %s already in use"
9340 " in cluster" % mac,
9341 errors.ECODE_NOTUNIQUE)
9343 # Build nic parameters
9344 link = nic.get(constants.INIC_LINK, None)
9345 if link == constants.VALUE_AUTO:
9346 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9349 nicparams[constants.NIC_MODE] = nic_mode
9351 nicparams[constants.NIC_LINK] = link
9353 check_params = cluster.SimpleFillNIC(nicparams)
9354 objects.NIC.CheckParameterSyntax(check_params)
9355 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9357 # disk checks/pre-build
9358 default_vg = self.cfg.GetVGName()
9360 for disk in self.op.disks:
9361 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9362 if mode not in constants.DISK_ACCESS_SET:
9363 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9364 mode, errors.ECODE_INVAL)
9365 size = disk.get(constants.IDISK_SIZE, None)
9367 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9370 except (TypeError, ValueError):
9371 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9374 data_vg = disk.get(constants.IDISK_VG, default_vg)
9376 constants.IDISK_SIZE: size,
9377 constants.IDISK_MODE: mode,
9378 constants.IDISK_VG: data_vg,
9380 if constants.IDISK_METAVG in disk:
9381 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9382 if constants.IDISK_ADOPT in disk:
9383 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9384 self.disks.append(new_disk)
9386 if self.op.mode == constants.INSTANCE_IMPORT:
9388 for idx in range(len(self.disks)):
9389 option = "disk%d_dump" % idx
9390 if export_info.has_option(constants.INISECT_INS, option):
9391 # FIXME: are the old os-es, disk sizes, etc. useful?
9392 export_name = export_info.get(constants.INISECT_INS, option)
9393 image = utils.PathJoin(self.op.src_path, export_name)
9394 disk_images.append(image)
9396 disk_images.append(False)
9398 self.src_images = disk_images
9400 old_name = export_info.get(constants.INISECT_INS, "name")
9401 if self.op.instance_name == old_name:
9402 for idx, nic in enumerate(self.nics):
9403 if nic.mac == constants.VALUE_AUTO:
9404 nic_mac_ini = "nic%d_mac" % idx
9405 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9407 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9409 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9410 if self.op.ip_check:
9411 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9412 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9413 (self.check_ip, self.op.instance_name),
9414 errors.ECODE_NOTUNIQUE)
9416 #### mac address generation
9417 # By generating here the mac address both the allocator and the hooks get
9418 # the real final mac address rather than the 'auto' or 'generate' value.
9419 # There is a race condition between the generation and the instance object
9420 # creation, which means that we know the mac is valid now, but we're not
9421 # sure it will be when we actually add the instance. If things go bad
9422 # adding the instance will abort because of a duplicate mac, and the
9423 # creation job will fail.
9424 for nic in self.nics:
9425 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9426 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9430 if self.op.iallocator is not None:
9431 self._RunAllocator()
9433 # Release all unneeded node locks
9434 _ReleaseLocks(self, locking.LEVEL_NODE,
9435 keep=filter(None, [self.op.pnode, self.op.snode,
9438 #### node related checks
9440 # check primary node
9441 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9442 assert self.pnode is not None, \
9443 "Cannot retrieve locked node %s" % self.op.pnode
9445 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9446 pnode.name, errors.ECODE_STATE)
9448 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9449 pnode.name, errors.ECODE_STATE)
9450 if not pnode.vm_capable:
9451 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9452 " '%s'" % pnode.name, errors.ECODE_STATE)
9454 self.secondaries = []
9456 # mirror node verification
9457 if self.op.disk_template in constants.DTS_INT_MIRROR:
9458 if self.op.snode == pnode.name:
9459 raise errors.OpPrereqError("The secondary node cannot be the"
9460 " primary node", errors.ECODE_INVAL)
9461 _CheckNodeOnline(self, self.op.snode)
9462 _CheckNodeNotDrained(self, self.op.snode)
9463 _CheckNodeVmCapable(self, self.op.snode)
9464 self.secondaries.append(self.op.snode)
9466 snode = self.cfg.GetNodeInfo(self.op.snode)
9467 if pnode.group != snode.group:
9468 self.LogWarning("The primary and secondary nodes are in two"
9469 " different node groups; the disk parameters"
9470 " from the first disk's node group will be"
9473 nodenames = [pnode.name] + self.secondaries
9475 # disk parameters (not customizable at instance or node level)
9476 # just use the primary node parameters, ignoring the secondary.
9477 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9479 if not self.adopt_disks:
9480 # Check lv size requirements, if not adopting
9481 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9482 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9484 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9485 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9486 disk[constants.IDISK_ADOPT])
9487 for disk in self.disks])
9488 if len(all_lvs) != len(self.disks):
9489 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9491 for lv_name in all_lvs:
9493 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9494 # to ReserveLV uses the same syntax
9495 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9496 except errors.ReservationError:
9497 raise errors.OpPrereqError("LV named %s used by another instance" %
9498 lv_name, errors.ECODE_NOTUNIQUE)
9500 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9501 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9503 node_lvs = self.rpc.call_lv_list([pnode.name],
9504 vg_names.payload.keys())[pnode.name]
9505 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9506 node_lvs = node_lvs.payload
9508 delta = all_lvs.difference(node_lvs.keys())
9510 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9511 utils.CommaJoin(delta),
9513 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9515 raise errors.OpPrereqError("Online logical volumes found, cannot"
9516 " adopt: %s" % utils.CommaJoin(online_lvs),
9518 # update the size of disk based on what is found
9519 for dsk in self.disks:
9520 dsk[constants.IDISK_SIZE] = \
9521 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9522 dsk[constants.IDISK_ADOPT])][0]))
9524 elif self.op.disk_template == constants.DT_BLOCK:
9525 # Normalize and de-duplicate device paths
9526 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9527 for disk in self.disks])
9528 if len(all_disks) != len(self.disks):
9529 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9531 baddisks = [d for d in all_disks
9532 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9534 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9535 " cannot be adopted" %
9536 (", ".join(baddisks),
9537 constants.ADOPTABLE_BLOCKDEV_ROOT),
9540 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9541 list(all_disks))[pnode.name]
9542 node_disks.Raise("Cannot get block device information from node %s" %
9544 node_disks = node_disks.payload
9545 delta = all_disks.difference(node_disks.keys())
9547 raise errors.OpPrereqError("Missing block device(s): %s" %
9548 utils.CommaJoin(delta),
9550 for dsk in self.disks:
9551 dsk[constants.IDISK_SIZE] = \
9552 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9554 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9556 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9557 # check OS parameters (remotely)
9558 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9560 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9562 # memory check on primary node
9563 #TODO(dynmem): use MINMEM for checking
9565 _CheckNodeFreeMemory(self, self.pnode.name,
9566 "creating instance %s" % self.op.instance_name,
9567 self.be_full[constants.BE_MAXMEM],
9570 self.dry_run_result = list(nodenames)
9572 def Exec(self, feedback_fn):
9573 """Create and add the instance to the cluster.
9576 instance = self.op.instance_name
9577 pnode_name = self.pnode.name
9579 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9580 self.owned_locks(locking.LEVEL_NODE)), \
9581 "Node locks differ from node resource locks"
9583 ht_kind = self.op.hypervisor
9584 if ht_kind in constants.HTS_REQ_PORT:
9585 network_port = self.cfg.AllocatePort()
9589 disks = _GenerateDiskTemplate(self,
9590 self.op.disk_template,
9591 instance, pnode_name,
9594 self.instance_file_storage_dir,
9595 self.op.file_driver,
9600 iobj = objects.Instance(name=instance, os=self.op.os_type,
9601 primary_node=pnode_name,
9602 nics=self.nics, disks=disks,
9603 disk_template=self.op.disk_template,
9604 admin_state=constants.ADMINST_DOWN,
9605 network_port=network_port,
9606 beparams=self.op.beparams,
9607 hvparams=self.op.hvparams,
9608 hypervisor=self.op.hypervisor,
9609 osparams=self.op.osparams,
9613 for tag in self.op.tags:
9616 if self.adopt_disks:
9617 if self.op.disk_template == constants.DT_PLAIN:
9618 # rename LVs to the newly-generated names; we need to construct
9619 # 'fake' LV disks with the old data, plus the new unique_id
9620 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9622 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9623 rename_to.append(t_dsk.logical_id)
9624 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9625 self.cfg.SetDiskID(t_dsk, pnode_name)
9626 result = self.rpc.call_blockdev_rename(pnode_name,
9627 zip(tmp_disks, rename_to))
9628 result.Raise("Failed to rename adoped LVs")
9630 feedback_fn("* creating instance disks...")
9632 _CreateDisks(self, iobj)
9633 except errors.OpExecError:
9634 self.LogWarning("Device creation failed, reverting...")
9636 _RemoveDisks(self, iobj)
9638 self.cfg.ReleaseDRBDMinors(instance)
9641 feedback_fn("adding instance %s to cluster config" % instance)
9643 self.cfg.AddInstance(iobj, self.proc.GetECId())
9645 # Declare that we don't want to remove the instance lock anymore, as we've
9646 # added the instance to the config
9647 del self.remove_locks[locking.LEVEL_INSTANCE]
9649 if self.op.mode == constants.INSTANCE_IMPORT:
9650 # Release unused nodes
9651 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9654 _ReleaseLocks(self, locking.LEVEL_NODE)
9657 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9658 feedback_fn("* wiping instance disks...")
9660 _WipeDisks(self, iobj)
9661 except errors.OpExecError, err:
9662 logging.exception("Wiping disks failed")
9663 self.LogWarning("Wiping instance disks failed (%s)", err)
9667 # Something is already wrong with the disks, don't do anything else
9669 elif self.op.wait_for_sync:
9670 disk_abort = not _WaitForSync(self, iobj)
9671 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9672 # make sure the disks are not degraded (still sync-ing is ok)
9673 feedback_fn("* checking mirrors status")
9674 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9679 _RemoveDisks(self, iobj)
9680 self.cfg.RemoveInstance(iobj.name)
9681 # Make sure the instance lock gets removed
9682 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9683 raise errors.OpExecError("There are some degraded disks for"
9686 # Release all node resource locks
9687 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9689 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9690 if self.op.mode == constants.INSTANCE_CREATE:
9691 if not self.op.no_install:
9692 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9693 not self.op.wait_for_sync)
9695 feedback_fn("* pausing disk sync to install instance OS")
9696 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9698 for idx, success in enumerate(result.payload):
9700 logging.warn("pause-sync of instance %s for disk %d failed",
9703 feedback_fn("* running the instance OS create scripts...")
9704 # FIXME: pass debug option from opcode to backend
9706 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9707 self.op.debug_level)
9709 feedback_fn("* resuming disk sync")
9710 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9712 for idx, success in enumerate(result.payload):
9714 logging.warn("resume-sync of instance %s for disk %d failed",
9717 os_add_result.Raise("Could not add os for instance %s"
9718 " on node %s" % (instance, pnode_name))
9720 elif self.op.mode == constants.INSTANCE_IMPORT:
9721 feedback_fn("* running the instance OS import scripts...")
9725 for idx, image in enumerate(self.src_images):
9729 # FIXME: pass debug option from opcode to backend
9730 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9731 constants.IEIO_FILE, (image, ),
9732 constants.IEIO_SCRIPT,
9733 (iobj.disks[idx], idx),
9735 transfers.append(dt)
9738 masterd.instance.TransferInstanceData(self, feedback_fn,
9739 self.op.src_node, pnode_name,
9740 self.pnode.secondary_ip,
9742 if not compat.all(import_result):
9743 self.LogWarning("Some disks for instance %s on node %s were not"
9744 " imported successfully" % (instance, pnode_name))
9746 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9747 feedback_fn("* preparing remote import...")
9748 # The source cluster will stop the instance before attempting to make a
9749 # connection. In some cases stopping an instance can take a long time,
9750 # hence the shutdown timeout is added to the connection timeout.
9751 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9752 self.op.source_shutdown_timeout)
9753 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9755 assert iobj.primary_node == self.pnode.name
9757 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9758 self.source_x509_ca,
9759 self._cds, timeouts)
9760 if not compat.all(disk_results):
9761 # TODO: Should the instance still be started, even if some disks
9762 # failed to import (valid for local imports, too)?
9763 self.LogWarning("Some disks for instance %s on node %s were not"
9764 " imported successfully" % (instance, pnode_name))
9766 # Run rename script on newly imported instance
9767 assert iobj.name == instance
9768 feedback_fn("Running rename script for %s" % instance)
9769 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9770 self.source_instance_name,
9771 self.op.debug_level)
9773 self.LogWarning("Failed to run rename script for %s on node"
9774 " %s: %s" % (instance, pnode_name, result.fail_msg))
9777 # also checked in the prereq part
9778 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9781 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9784 iobj.admin_state = constants.ADMINST_UP
9785 self.cfg.Update(iobj, feedback_fn)
9786 logging.info("Starting instance %s on node %s", instance, pnode_name)
9787 feedback_fn("* starting instance...")
9788 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9790 result.Raise("Could not start instance")
9792 return list(iobj.all_nodes)
9795 class LUInstanceConsole(NoHooksLU):
9796 """Connect to an instance's console.
9798 This is somewhat special in that it returns the command line that
9799 you need to run on the master node in order to connect to the
9805 def ExpandNames(self):
9806 self.share_locks = _ShareAll()
9807 self._ExpandAndLockInstance()
9809 def CheckPrereq(self):
9810 """Check prerequisites.
9812 This checks that the instance is in the cluster.
9815 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9816 assert self.instance is not None, \
9817 "Cannot retrieve locked instance %s" % self.op.instance_name
9818 _CheckNodeOnline(self, self.instance.primary_node)
9820 def Exec(self, feedback_fn):
9821 """Connect to the console of an instance
9824 instance = self.instance
9825 node = instance.primary_node
9827 node_insts = self.rpc.call_instance_list([node],
9828 [instance.hypervisor])[node]
9829 node_insts.Raise("Can't get node information from %s" % node)
9831 if instance.name not in node_insts.payload:
9832 if instance.admin_state == constants.ADMINST_UP:
9833 state = constants.INSTST_ERRORDOWN
9834 elif instance.admin_state == constants.ADMINST_DOWN:
9835 state = constants.INSTST_ADMINDOWN
9837 state = constants.INSTST_ADMINOFFLINE
9838 raise errors.OpExecError("Instance %s is not running (state %s)" %
9839 (instance.name, state))
9841 logging.debug("Connecting to console of %s on %s", instance.name, node)
9843 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9846 def _GetInstanceConsole(cluster, instance):
9847 """Returns console information for an instance.
9849 @type cluster: L{objects.Cluster}
9850 @type instance: L{objects.Instance}
9854 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9855 # beparams and hvparams are passed separately, to avoid editing the
9856 # instance and then saving the defaults in the instance itself.
9857 hvparams = cluster.FillHV(instance)
9858 beparams = cluster.FillBE(instance)
9859 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9861 assert console.instance == instance.name
9862 assert console.Validate()
9864 return console.ToDict()
9867 class LUInstanceReplaceDisks(LogicalUnit):
9868 """Replace the disks of an instance.
9871 HPATH = "mirrors-replace"
9872 HTYPE = constants.HTYPE_INSTANCE
9875 def CheckArguments(self):
9876 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9879 def ExpandNames(self):
9880 self._ExpandAndLockInstance()
9882 assert locking.LEVEL_NODE not in self.needed_locks
9883 assert locking.LEVEL_NODE_RES not in self.needed_locks
9884 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9886 assert self.op.iallocator is None or self.op.remote_node is None, \
9887 "Conflicting options"
9889 if self.op.remote_node is not None:
9890 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9892 # Warning: do not remove the locking of the new secondary here
9893 # unless DRBD8.AddChildren is changed to work in parallel;
9894 # currently it doesn't since parallel invocations of
9895 # FindUnusedMinor will conflict
9896 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9897 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9899 self.needed_locks[locking.LEVEL_NODE] = []
9900 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9902 if self.op.iallocator is not None:
9903 # iallocator will select a new node in the same group
9904 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9906 self.needed_locks[locking.LEVEL_NODE_RES] = []
9908 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9909 self.op.iallocator, self.op.remote_node,
9910 self.op.disks, False, self.op.early_release)
9912 self.tasklets = [self.replacer]
9914 def DeclareLocks(self, level):
9915 if level == locking.LEVEL_NODEGROUP:
9916 assert self.op.remote_node is None
9917 assert self.op.iallocator is not None
9918 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9920 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9921 # Lock all groups used by instance optimistically; this requires going
9922 # via the node before it's locked, requiring verification later on
9923 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9924 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9926 elif level == locking.LEVEL_NODE:
9927 if self.op.iallocator is not None:
9928 assert self.op.remote_node is None
9929 assert not self.needed_locks[locking.LEVEL_NODE]
9931 # Lock member nodes of all locked groups
9932 self.needed_locks[locking.LEVEL_NODE] = [node_name
9933 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9934 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9936 self._LockInstancesNodes()
9937 elif level == locking.LEVEL_NODE_RES:
9939 self.needed_locks[locking.LEVEL_NODE_RES] = \
9940 self.needed_locks[locking.LEVEL_NODE]
9942 def BuildHooksEnv(self):
9945 This runs on the master, the primary and all the secondaries.
9948 instance = self.replacer.instance
9950 "MODE": self.op.mode,
9951 "NEW_SECONDARY": self.op.remote_node,
9952 "OLD_SECONDARY": instance.secondary_nodes[0],
9954 env.update(_BuildInstanceHookEnvByObject(self, instance))
9957 def BuildHooksNodes(self):
9958 """Build hooks nodes.
9961 instance = self.replacer.instance
9963 self.cfg.GetMasterNode(),
9964 instance.primary_node,
9966 if self.op.remote_node is not None:
9967 nl.append(self.op.remote_node)
9970 def CheckPrereq(self):
9971 """Check prerequisites.
9974 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9975 self.op.iallocator is None)
9977 # Verify if node group locks are still correct
9978 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9980 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9982 return LogicalUnit.CheckPrereq(self)
9985 class TLReplaceDisks(Tasklet):
9986 """Replaces disks for an instance.
9988 Note: Locking is not within the scope of this class.
9991 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9992 disks, delay_iallocator, early_release):
9993 """Initializes this class.
9996 Tasklet.__init__(self, lu)
9999 self.instance_name = instance_name
10001 self.iallocator_name = iallocator_name
10002 self.remote_node = remote_node
10004 self.delay_iallocator = delay_iallocator
10005 self.early_release = early_release
10008 self.instance = None
10009 self.new_node = None
10010 self.target_node = None
10011 self.other_node = None
10012 self.remote_node_info = None
10013 self.node_secondary_ip = None
10016 def CheckArguments(mode, remote_node, iallocator):
10017 """Helper function for users of this class.
10020 # check for valid parameter combination
10021 if mode == constants.REPLACE_DISK_CHG:
10022 if remote_node is None and iallocator is None:
10023 raise errors.OpPrereqError("When changing the secondary either an"
10024 " iallocator script must be used or the"
10025 " new node given", errors.ECODE_INVAL)
10027 if remote_node is not None and iallocator is not None:
10028 raise errors.OpPrereqError("Give either the iallocator or the new"
10029 " secondary, not both", errors.ECODE_INVAL)
10031 elif remote_node is not None or iallocator is not None:
10032 # Not replacing the secondary
10033 raise errors.OpPrereqError("The iallocator and new node options can"
10034 " only be used when changing the"
10035 " secondary node", errors.ECODE_INVAL)
10038 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10039 """Compute a new secondary node using an IAllocator.
10042 ial = IAllocator(lu.cfg, lu.rpc,
10043 mode=constants.IALLOCATOR_MODE_RELOC,
10044 name=instance_name,
10045 relocate_from=list(relocate_from))
10047 ial.Run(iallocator_name)
10049 if not ial.success:
10050 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10051 " %s" % (iallocator_name, ial.info),
10052 errors.ECODE_NORES)
10054 if len(ial.result) != ial.required_nodes:
10055 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10056 " of nodes (%s), required %s" %
10058 len(ial.result), ial.required_nodes),
10059 errors.ECODE_FAULT)
10061 remote_node_name = ial.result[0]
10063 lu.LogInfo("Selected new secondary for instance '%s': %s",
10064 instance_name, remote_node_name)
10066 return remote_node_name
10068 def _FindFaultyDisks(self, node_name):
10069 """Wrapper for L{_FindFaultyInstanceDisks}.
10072 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10075 def _CheckDisksActivated(self, instance):
10076 """Checks if the instance disks are activated.
10078 @param instance: The instance to check disks
10079 @return: True if they are activated, False otherwise
10082 nodes = instance.all_nodes
10084 for idx, dev in enumerate(instance.disks):
10086 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10087 self.cfg.SetDiskID(dev, node)
10089 result = self.rpc.call_blockdev_find(node, dev)
10093 elif result.fail_msg or not result.payload:
10098 def CheckPrereq(self):
10099 """Check prerequisites.
10101 This checks that the instance is in the cluster.
10104 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10105 assert instance is not None, \
10106 "Cannot retrieve locked instance %s" % self.instance_name
10108 if instance.disk_template != constants.DT_DRBD8:
10109 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10110 " instances", errors.ECODE_INVAL)
10112 if len(instance.secondary_nodes) != 1:
10113 raise errors.OpPrereqError("The instance has a strange layout,"
10114 " expected one secondary but found %d" %
10115 len(instance.secondary_nodes),
10116 errors.ECODE_FAULT)
10118 if not self.delay_iallocator:
10119 self._CheckPrereq2()
10121 def _CheckPrereq2(self):
10122 """Check prerequisites, second part.
10124 This function should always be part of CheckPrereq. It was separated and is
10125 now called from Exec because during node evacuation iallocator was only
10126 called with an unmodified cluster model, not taking planned changes into
10130 instance = self.instance
10131 secondary_node = instance.secondary_nodes[0]
10133 if self.iallocator_name is None:
10134 remote_node = self.remote_node
10136 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10137 instance.name, instance.secondary_nodes)
10139 if remote_node is None:
10140 self.remote_node_info = None
10142 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10143 "Remote node '%s' is not locked" % remote_node
10145 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10146 assert self.remote_node_info is not None, \
10147 "Cannot retrieve locked node %s" % remote_node
10149 if remote_node == self.instance.primary_node:
10150 raise errors.OpPrereqError("The specified node is the primary node of"
10151 " the instance", errors.ECODE_INVAL)
10153 if remote_node == secondary_node:
10154 raise errors.OpPrereqError("The specified node is already the"
10155 " secondary node of the instance",
10156 errors.ECODE_INVAL)
10158 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10159 constants.REPLACE_DISK_CHG):
10160 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10161 errors.ECODE_INVAL)
10163 if self.mode == constants.REPLACE_DISK_AUTO:
10164 if not self._CheckDisksActivated(instance):
10165 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10166 " first" % self.instance_name,
10167 errors.ECODE_STATE)
10168 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10169 faulty_secondary = self._FindFaultyDisks(secondary_node)
10171 if faulty_primary and faulty_secondary:
10172 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10173 " one node and can not be repaired"
10174 " automatically" % self.instance_name,
10175 errors.ECODE_STATE)
10178 self.disks = faulty_primary
10179 self.target_node = instance.primary_node
10180 self.other_node = secondary_node
10181 check_nodes = [self.target_node, self.other_node]
10182 elif faulty_secondary:
10183 self.disks = faulty_secondary
10184 self.target_node = secondary_node
10185 self.other_node = instance.primary_node
10186 check_nodes = [self.target_node, self.other_node]
10192 # Non-automatic modes
10193 if self.mode == constants.REPLACE_DISK_PRI:
10194 self.target_node = instance.primary_node
10195 self.other_node = secondary_node
10196 check_nodes = [self.target_node, self.other_node]
10198 elif self.mode == constants.REPLACE_DISK_SEC:
10199 self.target_node = secondary_node
10200 self.other_node = instance.primary_node
10201 check_nodes = [self.target_node, self.other_node]
10203 elif self.mode == constants.REPLACE_DISK_CHG:
10204 self.new_node = remote_node
10205 self.other_node = instance.primary_node
10206 self.target_node = secondary_node
10207 check_nodes = [self.new_node, self.other_node]
10209 _CheckNodeNotDrained(self.lu, remote_node)
10210 _CheckNodeVmCapable(self.lu, remote_node)
10212 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10213 assert old_node_info is not None
10214 if old_node_info.offline and not self.early_release:
10215 # doesn't make sense to delay the release
10216 self.early_release = True
10217 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10218 " early-release mode", secondary_node)
10221 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10224 # If not specified all disks should be replaced
10226 self.disks = range(len(self.instance.disks))
10228 # TODO: compute disk parameters
10229 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10230 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10231 if primary_node_info.group != secondary_node_info.group:
10232 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10233 " different node groups; the disk parameters of the"
10234 " primary node's group will be applied.")
10236 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10238 for node in check_nodes:
10239 _CheckNodeOnline(self.lu, node)
10241 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10244 if node_name is not None)
10246 # Release unneeded node and node resource locks
10247 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10248 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10250 # Release any owned node group
10251 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10252 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10254 # Check whether disks are valid
10255 for disk_idx in self.disks:
10256 instance.FindDisk(disk_idx)
10258 # Get secondary node IP addresses
10259 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10260 in self.cfg.GetMultiNodeInfo(touched_nodes))
10262 def Exec(self, feedback_fn):
10263 """Execute disk replacement.
10265 This dispatches the disk replacement to the appropriate handler.
10268 if self.delay_iallocator:
10269 self._CheckPrereq2()
10272 # Verify owned locks before starting operation
10273 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10274 assert set(owned_nodes) == set(self.node_secondary_ip), \
10275 ("Incorrect node locks, owning %s, expected %s" %
10276 (owned_nodes, self.node_secondary_ip.keys()))
10277 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10278 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10280 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10281 assert list(owned_instances) == [self.instance_name], \
10282 "Instance '%s' not locked" % self.instance_name
10284 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10285 "Should not own any node group lock at this point"
10288 feedback_fn("No disks need replacement")
10291 feedback_fn("Replacing disk(s) %s for %s" %
10292 (utils.CommaJoin(self.disks), self.instance.name))
10294 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10296 # Activate the instance disks if we're replacing them on a down instance
10298 _StartInstanceDisks(self.lu, self.instance, True)
10301 # Should we replace the secondary node?
10302 if self.new_node is not None:
10303 fn = self._ExecDrbd8Secondary
10305 fn = self._ExecDrbd8DiskOnly
10307 result = fn(feedback_fn)
10309 # Deactivate the instance disks if we're replacing them on a
10312 _SafeShutdownInstanceDisks(self.lu, self.instance)
10314 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10317 # Verify owned locks
10318 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10319 nodes = frozenset(self.node_secondary_ip)
10320 assert ((self.early_release and not owned_nodes) or
10321 (not self.early_release and not (set(owned_nodes) - nodes))), \
10322 ("Not owning the correct locks, early_release=%s, owned=%r,"
10323 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10327 def _CheckVolumeGroup(self, nodes):
10328 self.lu.LogInfo("Checking volume groups")
10330 vgname = self.cfg.GetVGName()
10332 # Make sure volume group exists on all involved nodes
10333 results = self.rpc.call_vg_list(nodes)
10335 raise errors.OpExecError("Can't list volume groups on the nodes")
10338 res = results[node]
10339 res.Raise("Error checking node %s" % node)
10340 if vgname not in res.payload:
10341 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10344 def _CheckDisksExistence(self, nodes):
10345 # Check disk existence
10346 for idx, dev in enumerate(self.instance.disks):
10347 if idx not in self.disks:
10351 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10352 self.cfg.SetDiskID(dev, node)
10354 result = self.rpc.call_blockdev_find(node, dev)
10356 msg = result.fail_msg
10357 if msg or not result.payload:
10359 msg = "disk not found"
10360 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10363 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10364 for idx, dev in enumerate(self.instance.disks):
10365 if idx not in self.disks:
10368 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10371 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10373 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10374 " replace disks for instance %s" %
10375 (node_name, self.instance.name))
10377 def _CreateNewStorage(self, node_name):
10378 """Create new storage on the primary or secondary node.
10380 This is only used for same-node replaces, not for changing the
10381 secondary node, hence we don't want to modify the existing disk.
10386 for idx, dev in enumerate(self.instance.disks):
10387 if idx not in self.disks:
10390 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10392 self.cfg.SetDiskID(dev, node_name)
10394 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10395 names = _GenerateUniqueNames(self.lu, lv_names)
10397 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10399 vg_data = dev.children[0].logical_id[0]
10400 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10401 logical_id=(vg_data, names[0]), params=data_p)
10402 vg_meta = dev.children[1].logical_id[0]
10403 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10404 logical_id=(vg_meta, names[1]), params=meta_p)
10406 new_lvs = [lv_data, lv_meta]
10407 old_lvs = [child.Copy() for child in dev.children]
10408 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10410 # we pass force_create=True to force the LVM creation
10411 for new_lv in new_lvs:
10412 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10413 _GetInstanceInfoText(self.instance), False)
10417 def _CheckDevices(self, node_name, iv_names):
10418 for name, (dev, _, _) in iv_names.iteritems():
10419 self.cfg.SetDiskID(dev, node_name)
10421 result = self.rpc.call_blockdev_find(node_name, dev)
10423 msg = result.fail_msg
10424 if msg or not result.payload:
10426 msg = "disk not found"
10427 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10430 if result.payload.is_degraded:
10431 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10433 def _RemoveOldStorage(self, node_name, iv_names):
10434 for name, (_, old_lvs, _) in iv_names.iteritems():
10435 self.lu.LogInfo("Remove logical volumes for %s" % name)
10438 self.cfg.SetDiskID(lv, node_name)
10440 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10442 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10443 hint="remove unused LVs manually")
10445 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10446 """Replace a disk on the primary or secondary for DRBD 8.
10448 The algorithm for replace is quite complicated:
10450 1. for each disk to be replaced:
10452 1. create new LVs on the target node with unique names
10453 1. detach old LVs from the drbd device
10454 1. rename old LVs to name_replaced.<time_t>
10455 1. rename new LVs to old LVs
10456 1. attach the new LVs (with the old names now) to the drbd device
10458 1. wait for sync across all devices
10460 1. for each modified disk:
10462 1. remove old LVs (which have the name name_replaces.<time_t>)
10464 Failures are not very well handled.
10469 # Step: check device activation
10470 self.lu.LogStep(1, steps_total, "Check device existence")
10471 self._CheckDisksExistence([self.other_node, self.target_node])
10472 self._CheckVolumeGroup([self.target_node, self.other_node])
10474 # Step: check other node consistency
10475 self.lu.LogStep(2, steps_total, "Check peer consistency")
10476 self._CheckDisksConsistency(self.other_node,
10477 self.other_node == self.instance.primary_node,
10480 # Step: create new storage
10481 self.lu.LogStep(3, steps_total, "Allocate new storage")
10482 iv_names = self._CreateNewStorage(self.target_node)
10484 # Step: for each lv, detach+rename*2+attach
10485 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10486 for dev, old_lvs, new_lvs in iv_names.itervalues():
10487 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10489 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10491 result.Raise("Can't detach drbd from local storage on node"
10492 " %s for device %s" % (self.target_node, dev.iv_name))
10494 #cfg.Update(instance)
10496 # ok, we created the new LVs, so now we know we have the needed
10497 # storage; as such, we proceed on the target node to rename
10498 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10499 # using the assumption that logical_id == physical_id (which in
10500 # turn is the unique_id on that node)
10502 # FIXME(iustin): use a better name for the replaced LVs
10503 temp_suffix = int(time.time())
10504 ren_fn = lambda d, suff: (d.physical_id[0],
10505 d.physical_id[1] + "_replaced-%s" % suff)
10507 # Build the rename list based on what LVs exist on the node
10508 rename_old_to_new = []
10509 for to_ren in old_lvs:
10510 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10511 if not result.fail_msg and result.payload:
10513 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10515 self.lu.LogInfo("Renaming the old LVs on the target node")
10516 result = self.rpc.call_blockdev_rename(self.target_node,
10518 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10520 # Now we rename the new LVs to the old LVs
10521 self.lu.LogInfo("Renaming the new LVs on the target node")
10522 rename_new_to_old = [(new, old.physical_id)
10523 for old, new in zip(old_lvs, new_lvs)]
10524 result = self.rpc.call_blockdev_rename(self.target_node,
10526 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10528 # Intermediate steps of in memory modifications
10529 for old, new in zip(old_lvs, new_lvs):
10530 new.logical_id = old.logical_id
10531 self.cfg.SetDiskID(new, self.target_node)
10533 # We need to modify old_lvs so that removal later removes the
10534 # right LVs, not the newly added ones; note that old_lvs is a
10536 for disk in old_lvs:
10537 disk.logical_id = ren_fn(disk, temp_suffix)
10538 self.cfg.SetDiskID(disk, self.target_node)
10540 # Now that the new lvs have the old name, we can add them to the device
10541 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10542 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10544 msg = result.fail_msg
10546 for new_lv in new_lvs:
10547 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10550 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10551 hint=("cleanup manually the unused logical"
10553 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10555 cstep = itertools.count(5)
10557 if self.early_release:
10558 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10559 self._RemoveOldStorage(self.target_node, iv_names)
10560 # TODO: Check if releasing locks early still makes sense
10561 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10563 # Release all resource locks except those used by the instance
10564 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10565 keep=self.node_secondary_ip.keys())
10567 # Release all node locks while waiting for sync
10568 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10570 # TODO: Can the instance lock be downgraded here? Take the optional disk
10571 # shutdown in the caller into consideration.
10574 # This can fail as the old devices are degraded and _WaitForSync
10575 # does a combined result over all disks, so we don't check its return value
10576 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10577 _WaitForSync(self.lu, self.instance)
10579 # Check all devices manually
10580 self._CheckDevices(self.instance.primary_node, iv_names)
10582 # Step: remove old storage
10583 if not self.early_release:
10584 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10585 self._RemoveOldStorage(self.target_node, iv_names)
10587 def _ExecDrbd8Secondary(self, feedback_fn):
10588 """Replace the secondary node for DRBD 8.
10590 The algorithm for replace is quite complicated:
10591 - for all disks of the instance:
10592 - create new LVs on the new node with same names
10593 - shutdown the drbd device on the old secondary
10594 - disconnect the drbd network on the primary
10595 - create the drbd device on the new secondary
10596 - network attach the drbd on the primary, using an artifice:
10597 the drbd code for Attach() will connect to the network if it
10598 finds a device which is connected to the good local disks but
10599 not network enabled
10600 - wait for sync across all devices
10601 - remove all disks from the old secondary
10603 Failures are not very well handled.
10608 pnode = self.instance.primary_node
10610 # Step: check device activation
10611 self.lu.LogStep(1, steps_total, "Check device existence")
10612 self._CheckDisksExistence([self.instance.primary_node])
10613 self._CheckVolumeGroup([self.instance.primary_node])
10615 # Step: check other node consistency
10616 self.lu.LogStep(2, steps_total, "Check peer consistency")
10617 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10619 # Step: create new storage
10620 self.lu.LogStep(3, steps_total, "Allocate new storage")
10621 for idx, dev in enumerate(self.instance.disks):
10622 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10623 (self.new_node, idx))
10624 # we pass force_create=True to force LVM creation
10625 for new_lv in dev.children:
10626 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10627 _GetInstanceInfoText(self.instance), False)
10629 # Step 4: dbrd minors and drbd setups changes
10630 # after this, we must manually remove the drbd minors on both the
10631 # error and the success paths
10632 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10633 minors = self.cfg.AllocateDRBDMinor([self.new_node
10634 for dev in self.instance.disks],
10635 self.instance.name)
10636 logging.debug("Allocated minors %r", minors)
10639 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10640 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10641 (self.new_node, idx))
10642 # create new devices on new_node; note that we create two IDs:
10643 # one without port, so the drbd will be activated without
10644 # networking information on the new node at this stage, and one
10645 # with network, for the latter activation in step 4
10646 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10647 if self.instance.primary_node == o_node1:
10650 assert self.instance.primary_node == o_node2, "Three-node instance?"
10653 new_alone_id = (self.instance.primary_node, self.new_node, None,
10654 p_minor, new_minor, o_secret)
10655 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10656 p_minor, new_minor, o_secret)
10658 iv_names[idx] = (dev, dev.children, new_net_id)
10659 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10661 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10662 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10663 logical_id=new_alone_id,
10664 children=dev.children,
10666 params=drbd_params)
10668 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10669 _GetInstanceInfoText(self.instance), False)
10670 except errors.GenericError:
10671 self.cfg.ReleaseDRBDMinors(self.instance.name)
10674 # We have new devices, shutdown the drbd on the old secondary
10675 for idx, dev in enumerate(self.instance.disks):
10676 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10677 self.cfg.SetDiskID(dev, self.target_node)
10678 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10680 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10681 "node: %s" % (idx, msg),
10682 hint=("Please cleanup this device manually as"
10683 " soon as possible"))
10685 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10686 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10687 self.instance.disks)[pnode]
10689 msg = result.fail_msg
10691 # detaches didn't succeed (unlikely)
10692 self.cfg.ReleaseDRBDMinors(self.instance.name)
10693 raise errors.OpExecError("Can't detach the disks from the network on"
10694 " old node: %s" % (msg,))
10696 # if we managed to detach at least one, we update all the disks of
10697 # the instance to point to the new secondary
10698 self.lu.LogInfo("Updating instance configuration")
10699 for dev, _, new_logical_id in iv_names.itervalues():
10700 dev.logical_id = new_logical_id
10701 self.cfg.SetDiskID(dev, self.instance.primary_node)
10703 self.cfg.Update(self.instance, feedback_fn)
10705 # Release all node locks (the configuration has been updated)
10706 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10708 # and now perform the drbd attach
10709 self.lu.LogInfo("Attaching primary drbds to new secondary"
10710 " (standalone => connected)")
10711 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10713 self.node_secondary_ip,
10714 self.instance.disks,
10715 self.instance.name,
10717 for to_node, to_result in result.items():
10718 msg = to_result.fail_msg
10720 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10722 hint=("please do a gnt-instance info to see the"
10723 " status of disks"))
10725 cstep = itertools.count(5)
10727 if self.early_release:
10728 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10729 self._RemoveOldStorage(self.target_node, iv_names)
10730 # TODO: Check if releasing locks early still makes sense
10731 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10733 # Release all resource locks except those used by the instance
10734 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10735 keep=self.node_secondary_ip.keys())
10737 # TODO: Can the instance lock be downgraded here? Take the optional disk
10738 # shutdown in the caller into consideration.
10741 # This can fail as the old devices are degraded and _WaitForSync
10742 # does a combined result over all disks, so we don't check its return value
10743 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10744 _WaitForSync(self.lu, self.instance)
10746 # Check all devices manually
10747 self._CheckDevices(self.instance.primary_node, iv_names)
10749 # Step: remove old storage
10750 if not self.early_release:
10751 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10752 self._RemoveOldStorage(self.target_node, iv_names)
10755 class LURepairNodeStorage(NoHooksLU):
10756 """Repairs the volume group on a node.
10761 def CheckArguments(self):
10762 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10764 storage_type = self.op.storage_type
10766 if (constants.SO_FIX_CONSISTENCY not in
10767 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10768 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10769 " repaired" % storage_type,
10770 errors.ECODE_INVAL)
10772 def ExpandNames(self):
10773 self.needed_locks = {
10774 locking.LEVEL_NODE: [self.op.node_name],
10777 def _CheckFaultyDisks(self, instance, node_name):
10778 """Ensure faulty disks abort the opcode or at least warn."""
10780 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10782 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10783 " node '%s'" % (instance.name, node_name),
10784 errors.ECODE_STATE)
10785 except errors.OpPrereqError, err:
10786 if self.op.ignore_consistency:
10787 self.proc.LogWarning(str(err.args[0]))
10791 def CheckPrereq(self):
10792 """Check prerequisites.
10795 # Check whether any instance on this node has faulty disks
10796 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10797 if inst.admin_state != constants.ADMINST_UP:
10799 check_nodes = set(inst.all_nodes)
10800 check_nodes.discard(self.op.node_name)
10801 for inst_node_name in check_nodes:
10802 self._CheckFaultyDisks(inst, inst_node_name)
10804 def Exec(self, feedback_fn):
10805 feedback_fn("Repairing storage unit '%s' on %s ..." %
10806 (self.op.name, self.op.node_name))
10808 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10809 result = self.rpc.call_storage_execute(self.op.node_name,
10810 self.op.storage_type, st_args,
10812 constants.SO_FIX_CONSISTENCY)
10813 result.Raise("Failed to repair storage unit '%s' on %s" %
10814 (self.op.name, self.op.node_name))
10817 class LUNodeEvacuate(NoHooksLU):
10818 """Evacuates instances off a list of nodes.
10823 _MODE2IALLOCATOR = {
10824 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10825 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10826 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10828 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10829 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10830 constants.IALLOCATOR_NEVAC_MODES)
10832 def CheckArguments(self):
10833 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10835 def ExpandNames(self):
10836 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10838 if self.op.remote_node is not None:
10839 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10840 assert self.op.remote_node
10842 if self.op.remote_node == self.op.node_name:
10843 raise errors.OpPrereqError("Can not use evacuated node as a new"
10844 " secondary node", errors.ECODE_INVAL)
10846 if self.op.mode != constants.NODE_EVAC_SEC:
10847 raise errors.OpPrereqError("Without the use of an iallocator only"
10848 " secondary instances can be evacuated",
10849 errors.ECODE_INVAL)
10852 self.share_locks = _ShareAll()
10853 self.needed_locks = {
10854 locking.LEVEL_INSTANCE: [],
10855 locking.LEVEL_NODEGROUP: [],
10856 locking.LEVEL_NODE: [],
10859 # Determine nodes (via group) optimistically, needs verification once locks
10860 # have been acquired
10861 self.lock_nodes = self._DetermineNodes()
10863 def _DetermineNodes(self):
10864 """Gets the list of nodes to operate on.
10867 if self.op.remote_node is None:
10868 # Iallocator will choose any node(s) in the same group
10869 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10871 group_nodes = frozenset([self.op.remote_node])
10873 # Determine nodes to be locked
10874 return set([self.op.node_name]) | group_nodes
10876 def _DetermineInstances(self):
10877 """Builds list of instances to operate on.
10880 assert self.op.mode in constants.NODE_EVAC_MODES
10882 if self.op.mode == constants.NODE_EVAC_PRI:
10883 # Primary instances only
10884 inst_fn = _GetNodePrimaryInstances
10885 assert self.op.remote_node is None, \
10886 "Evacuating primary instances requires iallocator"
10887 elif self.op.mode == constants.NODE_EVAC_SEC:
10888 # Secondary instances only
10889 inst_fn = _GetNodeSecondaryInstances
10892 assert self.op.mode == constants.NODE_EVAC_ALL
10893 inst_fn = _GetNodeInstances
10894 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10896 raise errors.OpPrereqError("Due to an issue with the iallocator"
10897 " interface it is not possible to evacuate"
10898 " all instances at once; specify explicitly"
10899 " whether to evacuate primary or secondary"
10901 errors.ECODE_INVAL)
10903 return inst_fn(self.cfg, self.op.node_name)
10905 def DeclareLocks(self, level):
10906 if level == locking.LEVEL_INSTANCE:
10907 # Lock instances optimistically, needs verification once node and group
10908 # locks have been acquired
10909 self.needed_locks[locking.LEVEL_INSTANCE] = \
10910 set(i.name for i in self._DetermineInstances())
10912 elif level == locking.LEVEL_NODEGROUP:
10913 # Lock node groups for all potential target nodes optimistically, needs
10914 # verification once nodes have been acquired
10915 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10916 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10918 elif level == locking.LEVEL_NODE:
10919 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10921 def CheckPrereq(self):
10923 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10924 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10925 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10927 need_nodes = self._DetermineNodes()
10929 if not owned_nodes.issuperset(need_nodes):
10930 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10931 " locks were acquired, current nodes are"
10932 " are '%s', used to be '%s'; retry the"
10934 (self.op.node_name,
10935 utils.CommaJoin(need_nodes),
10936 utils.CommaJoin(owned_nodes)),
10937 errors.ECODE_STATE)
10939 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10940 if owned_groups != wanted_groups:
10941 raise errors.OpExecError("Node groups changed since locks were acquired,"
10942 " current groups are '%s', used to be '%s';"
10943 " retry the operation" %
10944 (utils.CommaJoin(wanted_groups),
10945 utils.CommaJoin(owned_groups)))
10947 # Determine affected instances
10948 self.instances = self._DetermineInstances()
10949 self.instance_names = [i.name for i in self.instances]
10951 if set(self.instance_names) != owned_instances:
10952 raise errors.OpExecError("Instances on node '%s' changed since locks"
10953 " were acquired, current instances are '%s',"
10954 " used to be '%s'; retry the operation" %
10955 (self.op.node_name,
10956 utils.CommaJoin(self.instance_names),
10957 utils.CommaJoin(owned_instances)))
10959 if self.instance_names:
10960 self.LogInfo("Evacuating instances from node '%s': %s",
10962 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10964 self.LogInfo("No instances to evacuate from node '%s'",
10967 if self.op.remote_node is not None:
10968 for i in self.instances:
10969 if i.primary_node == self.op.remote_node:
10970 raise errors.OpPrereqError("Node %s is the primary node of"
10971 " instance %s, cannot use it as"
10973 (self.op.remote_node, i.name),
10974 errors.ECODE_INVAL)
10976 def Exec(self, feedback_fn):
10977 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10979 if not self.instance_names:
10980 # No instances to evacuate
10983 elif self.op.iallocator is not None:
10984 # TODO: Implement relocation to other group
10985 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10986 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10987 instances=list(self.instance_names))
10989 ial.Run(self.op.iallocator)
10991 if not ial.success:
10992 raise errors.OpPrereqError("Can't compute node evacuation using"
10993 " iallocator '%s': %s" %
10994 (self.op.iallocator, ial.info),
10995 errors.ECODE_NORES)
10997 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10999 elif self.op.remote_node is not None:
11000 assert self.op.mode == constants.NODE_EVAC_SEC
11002 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11003 remote_node=self.op.remote_node,
11005 mode=constants.REPLACE_DISK_CHG,
11006 early_release=self.op.early_release)]
11007 for instance_name in self.instance_names
11011 raise errors.ProgrammerError("No iallocator or remote node")
11013 return ResultWithJobs(jobs)
11016 def _SetOpEarlyRelease(early_release, op):
11017 """Sets C{early_release} flag on opcodes if available.
11021 op.early_release = early_release
11022 except AttributeError:
11023 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11028 def _NodeEvacDest(use_nodes, group, nodes):
11029 """Returns group or nodes depending on caller's choice.
11033 return utils.CommaJoin(nodes)
11038 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11039 """Unpacks the result of change-group and node-evacuate iallocator requests.
11041 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11042 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11044 @type lu: L{LogicalUnit}
11045 @param lu: Logical unit instance
11046 @type alloc_result: tuple/list
11047 @param alloc_result: Result from iallocator
11048 @type early_release: bool
11049 @param early_release: Whether to release locks early if possible
11050 @type use_nodes: bool
11051 @param use_nodes: Whether to display node names instead of groups
11054 (moved, failed, jobs) = alloc_result
11057 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11058 for (name, reason) in failed)
11059 lu.LogWarning("Unable to evacuate instances %s", failreason)
11060 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11063 lu.LogInfo("Instances to be moved: %s",
11064 utils.CommaJoin("%s (to %s)" %
11065 (name, _NodeEvacDest(use_nodes, group, nodes))
11066 for (name, group, nodes) in moved))
11068 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11069 map(opcodes.OpCode.LoadOpCode, ops))
11073 class LUInstanceGrowDisk(LogicalUnit):
11074 """Grow a disk of an instance.
11077 HPATH = "disk-grow"
11078 HTYPE = constants.HTYPE_INSTANCE
11081 def ExpandNames(self):
11082 self._ExpandAndLockInstance()
11083 self.needed_locks[locking.LEVEL_NODE] = []
11084 self.needed_locks[locking.LEVEL_NODE_RES] = []
11085 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11087 def DeclareLocks(self, level):
11088 if level == locking.LEVEL_NODE:
11089 self._LockInstancesNodes()
11090 elif level == locking.LEVEL_NODE_RES:
11092 self.needed_locks[locking.LEVEL_NODE_RES] = \
11093 self.needed_locks[locking.LEVEL_NODE][:]
11095 def BuildHooksEnv(self):
11096 """Build hooks env.
11098 This runs on the master, the primary and all the secondaries.
11102 "DISK": self.op.disk,
11103 "AMOUNT": self.op.amount,
11105 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11108 def BuildHooksNodes(self):
11109 """Build hooks nodes.
11112 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11115 def CheckPrereq(self):
11116 """Check prerequisites.
11118 This checks that the instance is in the cluster.
11121 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11122 assert instance is not None, \
11123 "Cannot retrieve locked instance %s" % self.op.instance_name
11124 nodenames = list(instance.all_nodes)
11125 for node in nodenames:
11126 _CheckNodeOnline(self, node)
11128 self.instance = instance
11130 if instance.disk_template not in constants.DTS_GROWABLE:
11131 raise errors.OpPrereqError("Instance's disk layout does not support"
11132 " growing", errors.ECODE_INVAL)
11134 self.disk = instance.FindDisk(self.op.disk)
11136 if instance.disk_template not in (constants.DT_FILE,
11137 constants.DT_SHARED_FILE):
11138 # TODO: check the free disk space for file, when that feature will be
11140 _CheckNodesFreeDiskPerVG(self, nodenames,
11141 self.disk.ComputeGrowth(self.op.amount))
11143 def Exec(self, feedback_fn):
11144 """Execute disk grow.
11147 instance = self.instance
11150 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11151 assert (self.owned_locks(locking.LEVEL_NODE) ==
11152 self.owned_locks(locking.LEVEL_NODE_RES))
11154 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11156 raise errors.OpExecError("Cannot activate block device to grow")
11158 feedback_fn("Growing disk %s of instance '%s' by %s" %
11159 (self.op.disk, instance.name,
11160 utils.FormatUnit(self.op.amount, "h")))
11162 # First run all grow ops in dry-run mode
11163 for node in instance.all_nodes:
11164 self.cfg.SetDiskID(disk, node)
11165 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11166 result.Raise("Grow request failed to node %s" % node)
11168 # We know that (as far as we can test) operations across different
11169 # nodes will succeed, time to run it for real
11170 for node in instance.all_nodes:
11171 self.cfg.SetDiskID(disk, node)
11172 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11173 result.Raise("Grow request failed to node %s" % node)
11175 # TODO: Rewrite code to work properly
11176 # DRBD goes into sync mode for a short amount of time after executing the
11177 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11178 # calling "resize" in sync mode fails. Sleeping for a short amount of
11179 # time is a work-around.
11182 disk.RecordGrow(self.op.amount)
11183 self.cfg.Update(instance, feedback_fn)
11185 # Changes have been recorded, release node lock
11186 _ReleaseLocks(self, locking.LEVEL_NODE)
11188 # Downgrade lock while waiting for sync
11189 self.glm.downgrade(locking.LEVEL_INSTANCE)
11191 if self.op.wait_for_sync:
11192 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11194 self.proc.LogWarning("Disk sync-ing has not returned a good"
11195 " status; please check the instance")
11196 if instance.admin_state != constants.ADMINST_UP:
11197 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11198 elif instance.admin_state != constants.ADMINST_UP:
11199 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11200 " not supposed to be running because no wait for"
11201 " sync mode was requested")
11203 assert self.owned_locks(locking.LEVEL_NODE_RES)
11204 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11207 class LUInstanceQueryData(NoHooksLU):
11208 """Query runtime instance data.
11213 def ExpandNames(self):
11214 self.needed_locks = {}
11216 # Use locking if requested or when non-static information is wanted
11217 if not (self.op.static or self.op.use_locking):
11218 self.LogWarning("Non-static data requested, locks need to be acquired")
11219 self.op.use_locking = True
11221 if self.op.instances or not self.op.use_locking:
11222 # Expand instance names right here
11223 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11225 # Will use acquired locks
11226 self.wanted_names = None
11228 if self.op.use_locking:
11229 self.share_locks = _ShareAll()
11231 if self.wanted_names is None:
11232 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11234 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11236 self.needed_locks[locking.LEVEL_NODE] = []
11237 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11239 def DeclareLocks(self, level):
11240 if self.op.use_locking and level == locking.LEVEL_NODE:
11241 self._LockInstancesNodes()
11243 def CheckPrereq(self):
11244 """Check prerequisites.
11246 This only checks the optional instance list against the existing names.
11249 if self.wanted_names is None:
11250 assert self.op.use_locking, "Locking was not used"
11251 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11253 self.wanted_instances = \
11254 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11256 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11257 """Returns the status of a block device
11260 if self.op.static or not node:
11263 self.cfg.SetDiskID(dev, node)
11265 result = self.rpc.call_blockdev_find(node, dev)
11269 result.Raise("Can't compute disk status for %s" % instance_name)
11271 status = result.payload
11275 return (status.dev_path, status.major, status.minor,
11276 status.sync_percent, status.estimated_time,
11277 status.is_degraded, status.ldisk_status)
11279 def _ComputeDiskStatus(self, instance, snode, dev):
11280 """Compute block device status.
11283 if dev.dev_type in constants.LDS_DRBD:
11284 # we change the snode then (otherwise we use the one passed in)
11285 if dev.logical_id[0] == instance.primary_node:
11286 snode = dev.logical_id[1]
11288 snode = dev.logical_id[0]
11290 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11291 instance.name, dev)
11292 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11295 dev_children = map(compat.partial(self._ComputeDiskStatus,
11302 "iv_name": dev.iv_name,
11303 "dev_type": dev.dev_type,
11304 "logical_id": dev.logical_id,
11305 "physical_id": dev.physical_id,
11306 "pstatus": dev_pstatus,
11307 "sstatus": dev_sstatus,
11308 "children": dev_children,
11313 def Exec(self, feedback_fn):
11314 """Gather and return data"""
11317 cluster = self.cfg.GetClusterInfo()
11319 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11320 for i in self.wanted_instances)
11321 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11322 if self.op.static or pnode.offline:
11323 remote_state = None
11325 self.LogWarning("Primary node %s is marked offline, returning static"
11326 " information only for instance %s" %
11327 (pnode.name, instance.name))
11329 remote_info = self.rpc.call_instance_info(instance.primary_node,
11331 instance.hypervisor)
11332 remote_info.Raise("Error checking node %s" % instance.primary_node)
11333 remote_info = remote_info.payload
11334 if remote_info and "state" in remote_info:
11335 remote_state = "up"
11337 if instance.admin_state == constants.ADMINST_UP:
11338 remote_state = "down"
11340 remote_state = instance.admin_state
11342 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11345 result[instance.name] = {
11346 "name": instance.name,
11347 "config_state": instance.admin_state,
11348 "run_state": remote_state,
11349 "pnode": instance.primary_node,
11350 "snodes": instance.secondary_nodes,
11352 # this happens to be the same format used for hooks
11353 "nics": _NICListToTuple(self, instance.nics),
11354 "disk_template": instance.disk_template,
11356 "hypervisor": instance.hypervisor,
11357 "network_port": instance.network_port,
11358 "hv_instance": instance.hvparams,
11359 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11360 "be_instance": instance.beparams,
11361 "be_actual": cluster.FillBE(instance),
11362 "os_instance": instance.osparams,
11363 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11364 "serial_no": instance.serial_no,
11365 "mtime": instance.mtime,
11366 "ctime": instance.ctime,
11367 "uuid": instance.uuid,
11373 class LUInstanceSetParams(LogicalUnit):
11374 """Modifies an instances's parameters.
11377 HPATH = "instance-modify"
11378 HTYPE = constants.HTYPE_INSTANCE
11381 def CheckArguments(self):
11382 if not (self.op.nics or self.op.disks or self.op.disk_template or
11383 self.op.hvparams or self.op.beparams or self.op.os_name or
11384 self.op.online_inst or self.op.offline_inst):
11385 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11387 if self.op.hvparams:
11388 _CheckGlobalHvParams(self.op.hvparams)
11392 for disk_op, disk_dict in self.op.disks:
11393 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11394 if disk_op == constants.DDM_REMOVE:
11395 disk_addremove += 1
11397 elif disk_op == constants.DDM_ADD:
11398 disk_addremove += 1
11400 if not isinstance(disk_op, int):
11401 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11402 if not isinstance(disk_dict, dict):
11403 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11404 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11406 if disk_op == constants.DDM_ADD:
11407 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11408 if mode not in constants.DISK_ACCESS_SET:
11409 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11410 errors.ECODE_INVAL)
11411 size = disk_dict.get(constants.IDISK_SIZE, None)
11413 raise errors.OpPrereqError("Required disk parameter size missing",
11414 errors.ECODE_INVAL)
11417 except (TypeError, ValueError), err:
11418 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11419 str(err), errors.ECODE_INVAL)
11420 disk_dict[constants.IDISK_SIZE] = size
11422 # modification of disk
11423 if constants.IDISK_SIZE in disk_dict:
11424 raise errors.OpPrereqError("Disk size change not possible, use"
11425 " grow-disk", errors.ECODE_INVAL)
11427 if disk_addremove > 1:
11428 raise errors.OpPrereqError("Only one disk add or remove operation"
11429 " supported at a time", errors.ECODE_INVAL)
11431 if self.op.disks and self.op.disk_template is not None:
11432 raise errors.OpPrereqError("Disk template conversion and other disk"
11433 " changes not supported at the same time",
11434 errors.ECODE_INVAL)
11436 if (self.op.disk_template and
11437 self.op.disk_template in constants.DTS_INT_MIRROR and
11438 self.op.remote_node is None):
11439 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11440 " one requires specifying a secondary node",
11441 errors.ECODE_INVAL)
11445 for nic_op, nic_dict in self.op.nics:
11446 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11447 if nic_op == constants.DDM_REMOVE:
11450 elif nic_op == constants.DDM_ADD:
11453 if not isinstance(nic_op, int):
11454 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11455 if not isinstance(nic_dict, dict):
11456 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11457 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11459 # nic_dict should be a dict
11460 nic_ip = nic_dict.get(constants.INIC_IP, None)
11461 if nic_ip is not None:
11462 if nic_ip.lower() == constants.VALUE_NONE:
11463 nic_dict[constants.INIC_IP] = None
11465 if not netutils.IPAddress.IsValid(nic_ip):
11466 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11467 errors.ECODE_INVAL)
11469 nic_bridge = nic_dict.get("bridge", None)
11470 nic_link = nic_dict.get(constants.INIC_LINK, None)
11471 if nic_bridge and nic_link:
11472 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11473 " at the same time", errors.ECODE_INVAL)
11474 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11475 nic_dict["bridge"] = None
11476 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11477 nic_dict[constants.INIC_LINK] = None
11479 if nic_op == constants.DDM_ADD:
11480 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11481 if nic_mac is None:
11482 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11484 if constants.INIC_MAC in nic_dict:
11485 nic_mac = nic_dict[constants.INIC_MAC]
11486 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11487 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11489 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11490 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11491 " modifying an existing nic",
11492 errors.ECODE_INVAL)
11494 if nic_addremove > 1:
11495 raise errors.OpPrereqError("Only one NIC add or remove operation"
11496 " supported at a time", errors.ECODE_INVAL)
11498 def ExpandNames(self):
11499 self._ExpandAndLockInstance()
11500 # Can't even acquire node locks in shared mode as upcoming changes in
11501 # Ganeti 2.6 will start to modify the node object on disk conversion
11502 self.needed_locks[locking.LEVEL_NODE] = []
11503 self.needed_locks[locking.LEVEL_NODE_RES] = []
11504 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11506 def DeclareLocks(self, level):
11507 if level == locking.LEVEL_NODE:
11508 self._LockInstancesNodes()
11509 if self.op.disk_template and self.op.remote_node:
11510 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11511 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11512 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11514 self.needed_locks[locking.LEVEL_NODE_RES] = \
11515 self.needed_locks[locking.LEVEL_NODE][:]
11517 def BuildHooksEnv(self):
11518 """Build hooks env.
11520 This runs on the master, primary and secondaries.
11524 if constants.BE_MINMEM in self.be_new:
11525 args["minmem"] = self.be_new[constants.BE_MINMEM]
11526 if constants.BE_MAXMEM in self.be_new:
11527 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11528 if constants.BE_VCPUS in self.be_new:
11529 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11530 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11531 # information at all.
11534 nic_override = dict(self.op.nics)
11535 for idx, nic in enumerate(self.instance.nics):
11536 if idx in nic_override:
11537 this_nic_override = nic_override[idx]
11539 this_nic_override = {}
11540 if constants.INIC_IP in this_nic_override:
11541 ip = this_nic_override[constants.INIC_IP]
11544 if constants.INIC_MAC in this_nic_override:
11545 mac = this_nic_override[constants.INIC_MAC]
11548 if idx in self.nic_pnew:
11549 nicparams = self.nic_pnew[idx]
11551 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11552 mode = nicparams[constants.NIC_MODE]
11553 link = nicparams[constants.NIC_LINK]
11554 args["nics"].append((ip, mac, mode, link))
11555 if constants.DDM_ADD in nic_override:
11556 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11557 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11558 nicparams = self.nic_pnew[constants.DDM_ADD]
11559 mode = nicparams[constants.NIC_MODE]
11560 link = nicparams[constants.NIC_LINK]
11561 args["nics"].append((ip, mac, mode, link))
11562 elif constants.DDM_REMOVE in nic_override:
11563 del args["nics"][-1]
11565 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11566 if self.op.disk_template:
11567 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11571 def BuildHooksNodes(self):
11572 """Build hooks nodes.
11575 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11578 def CheckPrereq(self):
11579 """Check prerequisites.
11581 This only checks the instance list against the existing names.
11584 # checking the new params on the primary/secondary nodes
11586 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11587 cluster = self.cluster = self.cfg.GetClusterInfo()
11588 assert self.instance is not None, \
11589 "Cannot retrieve locked instance %s" % self.op.instance_name
11590 pnode = instance.primary_node
11591 nodelist = list(instance.all_nodes)
11592 pnode_info = self.cfg.GetNodeInfo(pnode)
11593 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11596 if self.op.os_name and not self.op.force:
11597 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11598 self.op.force_variant)
11599 instance_os = self.op.os_name
11601 instance_os = instance.os
11603 if self.op.disk_template:
11604 if instance.disk_template == self.op.disk_template:
11605 raise errors.OpPrereqError("Instance already has disk template %s" %
11606 instance.disk_template, errors.ECODE_INVAL)
11608 if (instance.disk_template,
11609 self.op.disk_template) not in self._DISK_CONVERSIONS:
11610 raise errors.OpPrereqError("Unsupported disk template conversion from"
11611 " %s to %s" % (instance.disk_template,
11612 self.op.disk_template),
11613 errors.ECODE_INVAL)
11614 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11615 msg="cannot change disk template")
11616 if self.op.disk_template in constants.DTS_INT_MIRROR:
11617 if self.op.remote_node == pnode:
11618 raise errors.OpPrereqError("Given new secondary node %s is the same"
11619 " as the primary node of the instance" %
11620 self.op.remote_node, errors.ECODE_STATE)
11621 _CheckNodeOnline(self, self.op.remote_node)
11622 _CheckNodeNotDrained(self, self.op.remote_node)
11623 # FIXME: here we assume that the old instance type is DT_PLAIN
11624 assert instance.disk_template == constants.DT_PLAIN
11625 disks = [{constants.IDISK_SIZE: d.size,
11626 constants.IDISK_VG: d.logical_id[0]}
11627 for d in instance.disks]
11628 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11629 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11631 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11632 if pnode_info.group != snode_info.group:
11633 self.LogWarning("The primary and secondary nodes are in two"
11634 " different node groups; the disk parameters"
11635 " from the first disk's node group will be"
11638 # hvparams processing
11639 if self.op.hvparams:
11640 hv_type = instance.hypervisor
11641 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11642 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11643 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11646 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11647 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11648 self.hv_proposed = self.hv_new = hv_new # the new actual values
11649 self.hv_inst = i_hvdict # the new dict (without defaults)
11651 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11653 self.hv_new = self.hv_inst = {}
11655 # beparams processing
11656 if self.op.beparams:
11657 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11659 objects.UpgradeBeParams(i_bedict)
11660 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11661 be_new = cluster.SimpleFillBE(i_bedict)
11662 self.be_proposed = self.be_new = be_new # the new actual values
11663 self.be_inst = i_bedict # the new dict (without defaults)
11665 self.be_new = self.be_inst = {}
11666 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11667 be_old = cluster.FillBE(instance)
11669 # CPU param validation -- checking every time a paramtere is
11670 # changed to cover all cases where either CPU mask or vcpus have
11672 if (constants.BE_VCPUS in self.be_proposed and
11673 constants.HV_CPU_MASK in self.hv_proposed):
11675 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11676 # Verify mask is consistent with number of vCPUs. Can skip this
11677 # test if only 1 entry in the CPU mask, which means same mask
11678 # is applied to all vCPUs.
11679 if (len(cpu_list) > 1 and
11680 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11681 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11683 (self.be_proposed[constants.BE_VCPUS],
11684 self.hv_proposed[constants.HV_CPU_MASK]),
11685 errors.ECODE_INVAL)
11687 # Only perform this test if a new CPU mask is given
11688 if constants.HV_CPU_MASK in self.hv_new:
11689 # Calculate the largest CPU number requested
11690 max_requested_cpu = max(map(max, cpu_list))
11691 # Check that all of the instance's nodes have enough physical CPUs to
11692 # satisfy the requested CPU mask
11693 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11694 max_requested_cpu + 1, instance.hypervisor)
11696 # osparams processing
11697 if self.op.osparams:
11698 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11699 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11700 self.os_inst = i_osdict # the new dict (without defaults)
11706 #TODO(dynmem): do the appropriate check involving MINMEM
11707 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11708 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11709 mem_check_list = [pnode]
11710 if be_new[constants.BE_AUTO_BALANCE]:
11711 # either we changed auto_balance to yes or it was from before
11712 mem_check_list.extend(instance.secondary_nodes)
11713 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11714 instance.hypervisor)
11715 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11716 [instance.hypervisor])
11717 pninfo = nodeinfo[pnode]
11718 msg = pninfo.fail_msg
11720 # Assume the primary node is unreachable and go ahead
11721 self.warn.append("Can't get info from primary node %s: %s" %
11724 (_, _, (pnhvinfo, )) = pninfo.payload
11725 if not isinstance(pnhvinfo.get("memory_free", None), int):
11726 self.warn.append("Node data from primary node %s doesn't contain"
11727 " free memory information" % pnode)
11728 elif instance_info.fail_msg:
11729 self.warn.append("Can't get instance runtime information: %s" %
11730 instance_info.fail_msg)
11732 if instance_info.payload:
11733 current_mem = int(instance_info.payload["memory"])
11735 # Assume instance not running
11736 # (there is a slight race condition here, but it's not very
11737 # probable, and we have no other way to check)
11738 # TODO: Describe race condition
11740 #TODO(dynmem): do the appropriate check involving MINMEM
11741 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11742 pnhvinfo["memory_free"])
11744 raise errors.OpPrereqError("This change will prevent the instance"
11745 " from starting, due to %d MB of memory"
11746 " missing on its primary node" %
11748 errors.ECODE_NORES)
11750 if be_new[constants.BE_AUTO_BALANCE]:
11751 for node, nres in nodeinfo.items():
11752 if node not in instance.secondary_nodes:
11754 nres.Raise("Can't get info from secondary node %s" % node,
11755 prereq=True, ecode=errors.ECODE_STATE)
11756 (_, _, (nhvinfo, )) = nres.payload
11757 if not isinstance(nhvinfo.get("memory_free", None), int):
11758 raise errors.OpPrereqError("Secondary node %s didn't return free"
11759 " memory information" % node,
11760 errors.ECODE_STATE)
11761 #TODO(dynmem): do the appropriate check involving MINMEM
11762 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11763 raise errors.OpPrereqError("This change will prevent the instance"
11764 " from failover to its secondary node"
11765 " %s, due to not enough memory" % node,
11766 errors.ECODE_STATE)
11770 self.nic_pinst = {}
11771 for nic_op, nic_dict in self.op.nics:
11772 if nic_op == constants.DDM_REMOVE:
11773 if not instance.nics:
11774 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11775 errors.ECODE_INVAL)
11777 if nic_op != constants.DDM_ADD:
11779 if not instance.nics:
11780 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11781 " no NICs" % nic_op,
11782 errors.ECODE_INVAL)
11783 if nic_op < 0 or nic_op >= len(instance.nics):
11784 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11786 (nic_op, len(instance.nics) - 1),
11787 errors.ECODE_INVAL)
11788 old_nic_params = instance.nics[nic_op].nicparams
11789 old_nic_ip = instance.nics[nic_op].ip
11791 old_nic_params = {}
11794 update_params_dict = dict([(key, nic_dict[key])
11795 for key in constants.NICS_PARAMETERS
11796 if key in nic_dict])
11798 if "bridge" in nic_dict:
11799 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11801 new_nic_params = _GetUpdatedParams(old_nic_params,
11802 update_params_dict)
11803 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11804 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11805 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11806 self.nic_pinst[nic_op] = new_nic_params
11807 self.nic_pnew[nic_op] = new_filled_nic_params
11808 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11810 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11811 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11812 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11814 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11816 self.warn.append(msg)
11818 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11819 if new_nic_mode == constants.NIC_MODE_ROUTED:
11820 if constants.INIC_IP in nic_dict:
11821 nic_ip = nic_dict[constants.INIC_IP]
11823 nic_ip = old_nic_ip
11825 raise errors.OpPrereqError("Cannot set the nic ip to None"
11826 " on a routed nic", errors.ECODE_INVAL)
11827 if constants.INIC_MAC in nic_dict:
11828 nic_mac = nic_dict[constants.INIC_MAC]
11829 if nic_mac is None:
11830 raise errors.OpPrereqError("Cannot set the nic mac to None",
11831 errors.ECODE_INVAL)
11832 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11833 # otherwise generate the mac
11834 nic_dict[constants.INIC_MAC] = \
11835 self.cfg.GenerateMAC(self.proc.GetECId())
11837 # or validate/reserve the current one
11839 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11840 except errors.ReservationError:
11841 raise errors.OpPrereqError("MAC address %s already in use"
11842 " in cluster" % nic_mac,
11843 errors.ECODE_NOTUNIQUE)
11846 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11847 raise errors.OpPrereqError("Disk operations not supported for"
11848 " diskless instances",
11849 errors.ECODE_INVAL)
11850 for disk_op, _ in self.op.disks:
11851 if disk_op == constants.DDM_REMOVE:
11852 if len(instance.disks) == 1:
11853 raise errors.OpPrereqError("Cannot remove the last disk of"
11854 " an instance", errors.ECODE_INVAL)
11855 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11856 msg="cannot remove disks")
11858 if (disk_op == constants.DDM_ADD and
11859 len(instance.disks) >= constants.MAX_DISKS):
11860 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11861 " add more" % constants.MAX_DISKS,
11862 errors.ECODE_STATE)
11863 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11865 if disk_op < 0 or disk_op >= len(instance.disks):
11866 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11868 (disk_op, len(instance.disks)),
11869 errors.ECODE_INVAL)
11871 # disabling the instance
11872 if self.op.offline_inst:
11873 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11874 msg="cannot change instance state to offline")
11876 # enabling the instance
11877 if self.op.online_inst:
11878 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11879 msg="cannot make instance go online")
11881 def _ConvertPlainToDrbd(self, feedback_fn):
11882 """Converts an instance from plain to drbd.
11885 feedback_fn("Converting template to drbd")
11886 instance = self.instance
11887 pnode = instance.primary_node
11888 snode = self.op.remote_node
11890 assert instance.disk_template == constants.DT_PLAIN
11892 # create a fake disk info for _GenerateDiskTemplate
11893 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11894 constants.IDISK_VG: d.logical_id[0]}
11895 for d in instance.disks]
11896 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11897 instance.name, pnode, [snode],
11898 disk_info, None, None, 0, feedback_fn,
11900 info = _GetInstanceInfoText(instance)
11901 feedback_fn("Creating aditional volumes...")
11902 # first, create the missing data and meta devices
11903 for disk in new_disks:
11904 # unfortunately this is... not too nice
11905 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11907 for child in disk.children:
11908 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11909 # at this stage, all new LVs have been created, we can rename the
11911 feedback_fn("Renaming original volumes...")
11912 rename_list = [(o, n.children[0].logical_id)
11913 for (o, n) in zip(instance.disks, new_disks)]
11914 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11915 result.Raise("Failed to rename original LVs")
11917 feedback_fn("Initializing DRBD devices...")
11918 # all child devices are in place, we can now create the DRBD devices
11919 for disk in new_disks:
11920 for node in [pnode, snode]:
11921 f_create = node == pnode
11922 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11924 # at this point, the instance has been modified
11925 instance.disk_template = constants.DT_DRBD8
11926 instance.disks = new_disks
11927 self.cfg.Update(instance, feedback_fn)
11929 # Release node locks while waiting for sync
11930 _ReleaseLocks(self, locking.LEVEL_NODE)
11932 # disks are created, waiting for sync
11933 disk_abort = not _WaitForSync(self, instance,
11934 oneshot=not self.op.wait_for_sync)
11936 raise errors.OpExecError("There are some degraded disks for"
11937 " this instance, please cleanup manually")
11939 # Node resource locks will be released by caller
11941 def _ConvertDrbdToPlain(self, feedback_fn):
11942 """Converts an instance from drbd to plain.
11945 instance = self.instance
11947 assert len(instance.secondary_nodes) == 1
11948 assert instance.disk_template == constants.DT_DRBD8
11950 pnode = instance.primary_node
11951 snode = instance.secondary_nodes[0]
11952 feedback_fn("Converting template to plain")
11954 old_disks = instance.disks
11955 new_disks = [d.children[0] for d in old_disks]
11957 # copy over size and mode
11958 for parent, child in zip(old_disks, new_disks):
11959 child.size = parent.size
11960 child.mode = parent.mode
11962 # update instance structure
11963 instance.disks = new_disks
11964 instance.disk_template = constants.DT_PLAIN
11965 self.cfg.Update(instance, feedback_fn)
11967 # Release locks in case removing disks takes a while
11968 _ReleaseLocks(self, locking.LEVEL_NODE)
11970 feedback_fn("Removing volumes on the secondary node...")
11971 for disk in old_disks:
11972 self.cfg.SetDiskID(disk, snode)
11973 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11975 self.LogWarning("Could not remove block device %s on node %s,"
11976 " continuing anyway: %s", disk.iv_name, snode, msg)
11978 feedback_fn("Removing unneeded volumes on the primary node...")
11979 for idx, disk in enumerate(old_disks):
11980 meta = disk.children[1]
11981 self.cfg.SetDiskID(meta, pnode)
11982 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11984 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11985 " continuing anyway: %s", idx, pnode, msg)
11987 # this is a DRBD disk, return its port to the pool
11988 for disk in old_disks:
11989 tcp_port = disk.logical_id[2]
11990 self.cfg.AddTcpUdpPort(tcp_port)
11992 # Node resource locks will be released by caller
11994 def Exec(self, feedback_fn):
11995 """Modifies an instance.
11997 All parameters take effect only at the next restart of the instance.
12000 # Process here the warnings from CheckPrereq, as we don't have a
12001 # feedback_fn there.
12002 for warn in self.warn:
12003 feedback_fn("WARNING: %s" % warn)
12005 assert ((self.op.disk_template is None) ^
12006 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12007 "Not owning any node resource locks"
12010 instance = self.instance
12012 for disk_op, disk_dict in self.op.disks:
12013 if disk_op == constants.DDM_REMOVE:
12014 # remove the last disk
12015 device = instance.disks.pop()
12016 device_idx = len(instance.disks)
12017 for node, disk in device.ComputeNodeTree(instance.primary_node):
12018 self.cfg.SetDiskID(disk, node)
12019 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12021 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12022 " continuing anyway", device_idx, node, msg)
12023 result.append(("disk/%d" % device_idx, "remove"))
12025 # if this is a DRBD disk, return its port to the pool
12026 if device.dev_type in constants.LDS_DRBD:
12027 tcp_port = device.logical_id[2]
12028 self.cfg.AddTcpUdpPort(tcp_port)
12029 elif disk_op == constants.DDM_ADD:
12031 if instance.disk_template in (constants.DT_FILE,
12032 constants.DT_SHARED_FILE):
12033 file_driver, file_path = instance.disks[0].logical_id
12034 file_path = os.path.dirname(file_path)
12036 file_driver = file_path = None
12037 disk_idx_base = len(instance.disks)
12038 new_disk = _GenerateDiskTemplate(self,
12039 instance.disk_template,
12040 instance.name, instance.primary_node,
12041 instance.secondary_nodes,
12047 self.diskparams)[0]
12048 instance.disks.append(new_disk)
12049 info = _GetInstanceInfoText(instance)
12051 logging.info("Creating volume %s for instance %s",
12052 new_disk.iv_name, instance.name)
12053 # Note: this needs to be kept in sync with _CreateDisks
12055 for node in instance.all_nodes:
12056 f_create = node == instance.primary_node
12058 _CreateBlockDev(self, node, instance, new_disk,
12059 f_create, info, f_create)
12060 except errors.OpExecError, err:
12061 self.LogWarning("Failed to create volume %s (%s) on"
12063 new_disk.iv_name, new_disk, node, err)
12064 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12065 (new_disk.size, new_disk.mode)))
12067 # change a given disk
12068 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12069 result.append(("disk.mode/%d" % disk_op,
12070 disk_dict[constants.IDISK_MODE]))
12072 if self.op.disk_template:
12074 check_nodes = set(instance.all_nodes)
12075 if self.op.remote_node:
12076 check_nodes.add(self.op.remote_node)
12077 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12078 owned = self.owned_locks(level)
12079 assert not (check_nodes - owned), \
12080 ("Not owning the correct locks, owning %r, expected at least %r" %
12081 (owned, check_nodes))
12083 r_shut = _ShutdownInstanceDisks(self, instance)
12085 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12086 " proceed with disk template conversion")
12087 mode = (instance.disk_template, self.op.disk_template)
12089 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12091 self.cfg.ReleaseDRBDMinors(instance.name)
12093 result.append(("disk_template", self.op.disk_template))
12095 assert instance.disk_template == self.op.disk_template, \
12096 ("Expected disk template '%s', found '%s'" %
12097 (self.op.disk_template, instance.disk_template))
12099 # Release node and resource locks if there are any (they might already have
12100 # been released during disk conversion)
12101 _ReleaseLocks(self, locking.LEVEL_NODE)
12102 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12105 for nic_op, nic_dict in self.op.nics:
12106 if nic_op == constants.DDM_REMOVE:
12107 # remove the last nic
12108 del instance.nics[-1]
12109 result.append(("nic.%d" % len(instance.nics), "remove"))
12110 elif nic_op == constants.DDM_ADD:
12111 # mac and bridge should be set, by now
12112 mac = nic_dict[constants.INIC_MAC]
12113 ip = nic_dict.get(constants.INIC_IP, None)
12114 nicparams = self.nic_pinst[constants.DDM_ADD]
12115 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12116 instance.nics.append(new_nic)
12117 result.append(("nic.%d" % (len(instance.nics) - 1),
12118 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12119 (new_nic.mac, new_nic.ip,
12120 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12121 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12124 for key in (constants.INIC_MAC, constants.INIC_IP):
12125 if key in nic_dict:
12126 setattr(instance.nics[nic_op], key, nic_dict[key])
12127 if nic_op in self.nic_pinst:
12128 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12129 for key, val in nic_dict.iteritems():
12130 result.append(("nic.%s/%d" % (key, nic_op), val))
12133 if self.op.hvparams:
12134 instance.hvparams = self.hv_inst
12135 for key, val in self.op.hvparams.iteritems():
12136 result.append(("hv/%s" % key, val))
12139 if self.op.beparams:
12140 instance.beparams = self.be_inst
12141 for key, val in self.op.beparams.iteritems():
12142 result.append(("be/%s" % key, val))
12145 if self.op.os_name:
12146 instance.os = self.op.os_name
12149 if self.op.osparams:
12150 instance.osparams = self.os_inst
12151 for key, val in self.op.osparams.iteritems():
12152 result.append(("os/%s" % key, val))
12154 # online/offline instance
12155 if self.op.online_inst:
12156 self.cfg.MarkInstanceDown(instance.name)
12157 result.append(("admin_state", constants.ADMINST_DOWN))
12158 if self.op.offline_inst:
12159 self.cfg.MarkInstanceOffline(instance.name)
12160 result.append(("admin_state", constants.ADMINST_OFFLINE))
12162 self.cfg.Update(instance, feedback_fn)
12164 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12165 self.owned_locks(locking.LEVEL_NODE)), \
12166 "All node locks should have been released by now"
12170 _DISK_CONVERSIONS = {
12171 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12172 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12176 class LUInstanceChangeGroup(LogicalUnit):
12177 HPATH = "instance-change-group"
12178 HTYPE = constants.HTYPE_INSTANCE
12181 def ExpandNames(self):
12182 self.share_locks = _ShareAll()
12183 self.needed_locks = {
12184 locking.LEVEL_NODEGROUP: [],
12185 locking.LEVEL_NODE: [],
12188 self._ExpandAndLockInstance()
12190 if self.op.target_groups:
12191 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12192 self.op.target_groups)
12194 self.req_target_uuids = None
12196 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12198 def DeclareLocks(self, level):
12199 if level == locking.LEVEL_NODEGROUP:
12200 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12202 if self.req_target_uuids:
12203 lock_groups = set(self.req_target_uuids)
12205 # Lock all groups used by instance optimistically; this requires going
12206 # via the node before it's locked, requiring verification later on
12207 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12208 lock_groups.update(instance_groups)
12210 # No target groups, need to lock all of them
12211 lock_groups = locking.ALL_SET
12213 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12215 elif level == locking.LEVEL_NODE:
12216 if self.req_target_uuids:
12217 # Lock all nodes used by instances
12218 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12219 self._LockInstancesNodes()
12221 # Lock all nodes in all potential target groups
12222 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12223 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12224 member_nodes = [node_name
12225 for group in lock_groups
12226 for node_name in self.cfg.GetNodeGroup(group).members]
12227 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12229 # Lock all nodes as all groups are potential targets
12230 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12232 def CheckPrereq(self):
12233 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12234 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12235 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12237 assert (self.req_target_uuids is None or
12238 owned_groups.issuperset(self.req_target_uuids))
12239 assert owned_instances == set([self.op.instance_name])
12241 # Get instance information
12242 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12244 # Check if node groups for locked instance are still correct
12245 assert owned_nodes.issuperset(self.instance.all_nodes), \
12246 ("Instance %s's nodes changed while we kept the lock" %
12247 self.op.instance_name)
12249 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12252 if self.req_target_uuids:
12253 # User requested specific target groups
12254 self.target_uuids = self.req_target_uuids
12256 # All groups except those used by the instance are potential targets
12257 self.target_uuids = owned_groups - inst_groups
12259 conflicting_groups = self.target_uuids & inst_groups
12260 if conflicting_groups:
12261 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12262 " used by the instance '%s'" %
12263 (utils.CommaJoin(conflicting_groups),
12264 self.op.instance_name),
12265 errors.ECODE_INVAL)
12267 if not self.target_uuids:
12268 raise errors.OpPrereqError("There are no possible target groups",
12269 errors.ECODE_INVAL)
12271 def BuildHooksEnv(self):
12272 """Build hooks env.
12275 assert self.target_uuids
12278 "TARGET_GROUPS": " ".join(self.target_uuids),
12281 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12285 def BuildHooksNodes(self):
12286 """Build hooks nodes.
12289 mn = self.cfg.GetMasterNode()
12290 return ([mn], [mn])
12292 def Exec(self, feedback_fn):
12293 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12295 assert instances == [self.op.instance_name], "Instance not locked"
12297 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12298 instances=instances, target_groups=list(self.target_uuids))
12300 ial.Run(self.op.iallocator)
12302 if not ial.success:
12303 raise errors.OpPrereqError("Can't compute solution for changing group of"
12304 " instance '%s' using iallocator '%s': %s" %
12305 (self.op.instance_name, self.op.iallocator,
12307 errors.ECODE_NORES)
12309 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12311 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12312 " instance '%s'", len(jobs), self.op.instance_name)
12314 return ResultWithJobs(jobs)
12317 class LUBackupQuery(NoHooksLU):
12318 """Query the exports list
12323 def ExpandNames(self):
12324 self.needed_locks = {}
12325 self.share_locks[locking.LEVEL_NODE] = 1
12326 if not self.op.nodes:
12327 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12329 self.needed_locks[locking.LEVEL_NODE] = \
12330 _GetWantedNodes(self, self.op.nodes)
12332 def Exec(self, feedback_fn):
12333 """Compute the list of all the exported system images.
12336 @return: a dictionary with the structure node->(export-list)
12337 where export-list is a list of the instances exported on
12341 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12342 rpcresult = self.rpc.call_export_list(self.nodes)
12344 for node in rpcresult:
12345 if rpcresult[node].fail_msg:
12346 result[node] = False
12348 result[node] = rpcresult[node].payload
12353 class LUBackupPrepare(NoHooksLU):
12354 """Prepares an instance for an export and returns useful information.
12359 def ExpandNames(self):
12360 self._ExpandAndLockInstance()
12362 def CheckPrereq(self):
12363 """Check prerequisites.
12366 instance_name = self.op.instance_name
12368 self.instance = self.cfg.GetInstanceInfo(instance_name)
12369 assert self.instance is not None, \
12370 "Cannot retrieve locked instance %s" % self.op.instance_name
12371 _CheckNodeOnline(self, self.instance.primary_node)
12373 self._cds = _GetClusterDomainSecret()
12375 def Exec(self, feedback_fn):
12376 """Prepares an instance for an export.
12379 instance = self.instance
12381 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12382 salt = utils.GenerateSecret(8)
12384 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12385 result = self.rpc.call_x509_cert_create(instance.primary_node,
12386 constants.RIE_CERT_VALIDITY)
12387 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12389 (name, cert_pem) = result.payload
12391 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12395 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12396 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12398 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12404 class LUBackupExport(LogicalUnit):
12405 """Export an instance to an image in the cluster.
12408 HPATH = "instance-export"
12409 HTYPE = constants.HTYPE_INSTANCE
12412 def CheckArguments(self):
12413 """Check the arguments.
12416 self.x509_key_name = self.op.x509_key_name
12417 self.dest_x509_ca_pem = self.op.destination_x509_ca
12419 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12420 if not self.x509_key_name:
12421 raise errors.OpPrereqError("Missing X509 key name for encryption",
12422 errors.ECODE_INVAL)
12424 if not self.dest_x509_ca_pem:
12425 raise errors.OpPrereqError("Missing destination X509 CA",
12426 errors.ECODE_INVAL)
12428 def ExpandNames(self):
12429 self._ExpandAndLockInstance()
12431 # Lock all nodes for local exports
12432 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12433 # FIXME: lock only instance primary and destination node
12435 # Sad but true, for now we have do lock all nodes, as we don't know where
12436 # the previous export might be, and in this LU we search for it and
12437 # remove it from its current node. In the future we could fix this by:
12438 # - making a tasklet to search (share-lock all), then create the
12439 # new one, then one to remove, after
12440 # - removing the removal operation altogether
12441 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12443 def DeclareLocks(self, level):
12444 """Last minute lock declaration."""
12445 # All nodes are locked anyway, so nothing to do here.
12447 def BuildHooksEnv(self):
12448 """Build hooks env.
12450 This will run on the master, primary node and target node.
12454 "EXPORT_MODE": self.op.mode,
12455 "EXPORT_NODE": self.op.target_node,
12456 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12457 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12458 # TODO: Generic function for boolean env variables
12459 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12462 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12466 def BuildHooksNodes(self):
12467 """Build hooks nodes.
12470 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12472 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12473 nl.append(self.op.target_node)
12477 def CheckPrereq(self):
12478 """Check prerequisites.
12480 This checks that the instance and node names are valid.
12483 instance_name = self.op.instance_name
12485 self.instance = self.cfg.GetInstanceInfo(instance_name)
12486 assert self.instance is not None, \
12487 "Cannot retrieve locked instance %s" % self.op.instance_name
12488 _CheckNodeOnline(self, self.instance.primary_node)
12490 if (self.op.remove_instance and
12491 self.instance.admin_state == constants.ADMINST_UP and
12492 not self.op.shutdown):
12493 raise errors.OpPrereqError("Can not remove instance without shutting it"
12496 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12497 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12498 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12499 assert self.dst_node is not None
12501 _CheckNodeOnline(self, self.dst_node.name)
12502 _CheckNodeNotDrained(self, self.dst_node.name)
12505 self.dest_disk_info = None
12506 self.dest_x509_ca = None
12508 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12509 self.dst_node = None
12511 if len(self.op.target_node) != len(self.instance.disks):
12512 raise errors.OpPrereqError(("Received destination information for %s"
12513 " disks, but instance %s has %s disks") %
12514 (len(self.op.target_node), instance_name,
12515 len(self.instance.disks)),
12516 errors.ECODE_INVAL)
12518 cds = _GetClusterDomainSecret()
12520 # Check X509 key name
12522 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12523 except (TypeError, ValueError), err:
12524 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12526 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12527 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12528 errors.ECODE_INVAL)
12530 # Load and verify CA
12532 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12533 except OpenSSL.crypto.Error, err:
12534 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12535 (err, ), errors.ECODE_INVAL)
12537 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12538 if errcode is not None:
12539 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12540 (msg, ), errors.ECODE_INVAL)
12542 self.dest_x509_ca = cert
12544 # Verify target information
12546 for idx, disk_data in enumerate(self.op.target_node):
12548 (host, port, magic) = \
12549 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12550 except errors.GenericError, err:
12551 raise errors.OpPrereqError("Target info for disk %s: %s" %
12552 (idx, err), errors.ECODE_INVAL)
12554 disk_info.append((host, port, magic))
12556 assert len(disk_info) == len(self.op.target_node)
12557 self.dest_disk_info = disk_info
12560 raise errors.ProgrammerError("Unhandled export mode %r" %
12563 # instance disk type verification
12564 # TODO: Implement export support for file-based disks
12565 for disk in self.instance.disks:
12566 if disk.dev_type == constants.LD_FILE:
12567 raise errors.OpPrereqError("Export not supported for instances with"
12568 " file-based disks", errors.ECODE_INVAL)
12570 def _CleanupExports(self, feedback_fn):
12571 """Removes exports of current instance from all other nodes.
12573 If an instance in a cluster with nodes A..D was exported to node C, its
12574 exports will be removed from the nodes A, B and D.
12577 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12579 nodelist = self.cfg.GetNodeList()
12580 nodelist.remove(self.dst_node.name)
12582 # on one-node clusters nodelist will be empty after the removal
12583 # if we proceed the backup would be removed because OpBackupQuery
12584 # substitutes an empty list with the full cluster node list.
12585 iname = self.instance.name
12587 feedback_fn("Removing old exports for instance %s" % iname)
12588 exportlist = self.rpc.call_export_list(nodelist)
12589 for node in exportlist:
12590 if exportlist[node].fail_msg:
12592 if iname in exportlist[node].payload:
12593 msg = self.rpc.call_export_remove(node, iname).fail_msg
12595 self.LogWarning("Could not remove older export for instance %s"
12596 " on node %s: %s", iname, node, msg)
12598 def Exec(self, feedback_fn):
12599 """Export an instance to an image in the cluster.
12602 assert self.op.mode in constants.EXPORT_MODES
12604 instance = self.instance
12605 src_node = instance.primary_node
12607 if self.op.shutdown:
12608 # shutdown the instance, but not the disks
12609 feedback_fn("Shutting down instance %s" % instance.name)
12610 result = self.rpc.call_instance_shutdown(src_node, instance,
12611 self.op.shutdown_timeout)
12612 # TODO: Maybe ignore failures if ignore_remove_failures is set
12613 result.Raise("Could not shutdown instance %s on"
12614 " node %s" % (instance.name, src_node))
12616 # set the disks ID correctly since call_instance_start needs the
12617 # correct drbd minor to create the symlinks
12618 for disk in instance.disks:
12619 self.cfg.SetDiskID(disk, src_node)
12621 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12624 # Activate the instance disks if we'exporting a stopped instance
12625 feedback_fn("Activating disks for %s" % instance.name)
12626 _StartInstanceDisks(self, instance, None)
12629 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12632 helper.CreateSnapshots()
12634 if (self.op.shutdown and
12635 instance.admin_state == constants.ADMINST_UP and
12636 not self.op.remove_instance):
12637 assert not activate_disks
12638 feedback_fn("Starting instance %s" % instance.name)
12639 result = self.rpc.call_instance_start(src_node,
12640 (instance, None, None), False)
12641 msg = result.fail_msg
12643 feedback_fn("Failed to start instance: %s" % msg)
12644 _ShutdownInstanceDisks(self, instance)
12645 raise errors.OpExecError("Could not start instance: %s" % msg)
12647 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12648 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12649 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12650 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12651 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12653 (key_name, _, _) = self.x509_key_name
12656 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12659 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12660 key_name, dest_ca_pem,
12665 # Check for backwards compatibility
12666 assert len(dresults) == len(instance.disks)
12667 assert compat.all(isinstance(i, bool) for i in dresults), \
12668 "Not all results are boolean: %r" % dresults
12672 feedback_fn("Deactivating disks for %s" % instance.name)
12673 _ShutdownInstanceDisks(self, instance)
12675 if not (compat.all(dresults) and fin_resu):
12678 failures.append("export finalization")
12679 if not compat.all(dresults):
12680 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12682 failures.append("disk export: disk(s) %s" % fdsk)
12684 raise errors.OpExecError("Export failed, errors in %s" %
12685 utils.CommaJoin(failures))
12687 # At this point, the export was successful, we can cleanup/finish
12689 # Remove instance if requested
12690 if self.op.remove_instance:
12691 feedback_fn("Removing instance %s" % instance.name)
12692 _RemoveInstance(self, feedback_fn, instance,
12693 self.op.ignore_remove_failures)
12695 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12696 self._CleanupExports(feedback_fn)
12698 return fin_resu, dresults
12701 class LUBackupRemove(NoHooksLU):
12702 """Remove exports related to the named instance.
12707 def ExpandNames(self):
12708 self.needed_locks = {}
12709 # We need all nodes to be locked in order for RemoveExport to work, but we
12710 # don't need to lock the instance itself, as nothing will happen to it (and
12711 # we can remove exports also for a removed instance)
12712 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12714 def Exec(self, feedback_fn):
12715 """Remove any export.
12718 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12719 # If the instance was not found we'll try with the name that was passed in.
12720 # This will only work if it was an FQDN, though.
12722 if not instance_name:
12724 instance_name = self.op.instance_name
12726 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12727 exportlist = self.rpc.call_export_list(locked_nodes)
12729 for node in exportlist:
12730 msg = exportlist[node].fail_msg
12732 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12734 if instance_name in exportlist[node].payload:
12736 result = self.rpc.call_export_remove(node, instance_name)
12737 msg = result.fail_msg
12739 logging.error("Could not remove export for instance %s"
12740 " on node %s: %s", instance_name, node, msg)
12742 if fqdn_warn and not found:
12743 feedback_fn("Export not found. If trying to remove an export belonging"
12744 " to a deleted instance please use its Fully Qualified"
12748 class LUGroupAdd(LogicalUnit):
12749 """Logical unit for creating node groups.
12752 HPATH = "group-add"
12753 HTYPE = constants.HTYPE_GROUP
12756 def ExpandNames(self):
12757 # We need the new group's UUID here so that we can create and acquire the
12758 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12759 # that it should not check whether the UUID exists in the configuration.
12760 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12761 self.needed_locks = {}
12762 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12764 def CheckPrereq(self):
12765 """Check prerequisites.
12767 This checks that the given group name is not an existing node group
12772 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12773 except errors.OpPrereqError:
12776 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12777 " node group (UUID: %s)" %
12778 (self.op.group_name, existing_uuid),
12779 errors.ECODE_EXISTS)
12781 if self.op.ndparams:
12782 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12784 if self.op.diskparams:
12785 for templ in constants.DISK_TEMPLATES:
12786 if templ not in self.op.diskparams:
12787 self.op.diskparams[templ] = {}
12788 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12790 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12792 if self.op.ipolicy:
12793 cluster = self.cfg.GetClusterInfo()
12794 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12795 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12797 def BuildHooksEnv(self):
12798 """Build hooks env.
12802 "GROUP_NAME": self.op.group_name,
12805 def BuildHooksNodes(self):
12806 """Build hooks nodes.
12809 mn = self.cfg.GetMasterNode()
12810 return ([mn], [mn])
12812 def Exec(self, feedback_fn):
12813 """Add the node group to the cluster.
12816 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12817 uuid=self.group_uuid,
12818 alloc_policy=self.op.alloc_policy,
12819 ndparams=self.op.ndparams,
12820 diskparams=self.op.diskparams,
12821 ipolicy=self.op.ipolicy)
12823 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12824 del self.remove_locks[locking.LEVEL_NODEGROUP]
12827 class LUGroupAssignNodes(NoHooksLU):
12828 """Logical unit for assigning nodes to groups.
12833 def ExpandNames(self):
12834 # These raise errors.OpPrereqError on their own:
12835 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12836 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12838 # We want to lock all the affected nodes and groups. We have readily
12839 # available the list of nodes, and the *destination* group. To gather the
12840 # list of "source" groups, we need to fetch node information later on.
12841 self.needed_locks = {
12842 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12843 locking.LEVEL_NODE: self.op.nodes,
12846 def DeclareLocks(self, level):
12847 if level == locking.LEVEL_NODEGROUP:
12848 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12850 # Try to get all affected nodes' groups without having the group or node
12851 # lock yet. Needs verification later in the code flow.
12852 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12854 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12856 def CheckPrereq(self):
12857 """Check prerequisites.
12860 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12861 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12862 frozenset(self.op.nodes))
12864 expected_locks = (set([self.group_uuid]) |
12865 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12866 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12867 if actual_locks != expected_locks:
12868 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12869 " current groups are '%s', used to be '%s'" %
12870 (utils.CommaJoin(expected_locks),
12871 utils.CommaJoin(actual_locks)))
12873 self.node_data = self.cfg.GetAllNodesInfo()
12874 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12875 instance_data = self.cfg.GetAllInstancesInfo()
12877 if self.group is None:
12878 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12879 (self.op.group_name, self.group_uuid))
12881 (new_splits, previous_splits) = \
12882 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12883 for node in self.op.nodes],
12884 self.node_data, instance_data)
12887 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12889 if not self.op.force:
12890 raise errors.OpExecError("The following instances get split by this"
12891 " change and --force was not given: %s" %
12894 self.LogWarning("This operation will split the following instances: %s",
12897 if previous_splits:
12898 self.LogWarning("In addition, these already-split instances continue"
12899 " to be split across groups: %s",
12900 utils.CommaJoin(utils.NiceSort(previous_splits)))
12902 def Exec(self, feedback_fn):
12903 """Assign nodes to a new group.
12906 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12908 self.cfg.AssignGroupNodes(mods)
12911 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12912 """Check for split instances after a node assignment.
12914 This method considers a series of node assignments as an atomic operation,
12915 and returns information about split instances after applying the set of
12918 In particular, it returns information about newly split instances, and
12919 instances that were already split, and remain so after the change.
12921 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12924 @type changes: list of (node_name, new_group_uuid) pairs.
12925 @param changes: list of node assignments to consider.
12926 @param node_data: a dict with data for all nodes
12927 @param instance_data: a dict with all instances to consider
12928 @rtype: a two-tuple
12929 @return: a list of instances that were previously okay and result split as a
12930 consequence of this change, and a list of instances that were previously
12931 split and this change does not fix.
12934 changed_nodes = dict((node, group) for node, group in changes
12935 if node_data[node].group != group)
12937 all_split_instances = set()
12938 previously_split_instances = set()
12940 def InstanceNodes(instance):
12941 return [instance.primary_node] + list(instance.secondary_nodes)
12943 for inst in instance_data.values():
12944 if inst.disk_template not in constants.DTS_INT_MIRROR:
12947 instance_nodes = InstanceNodes(inst)
12949 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12950 previously_split_instances.add(inst.name)
12952 if len(set(changed_nodes.get(node, node_data[node].group)
12953 for node in instance_nodes)) > 1:
12954 all_split_instances.add(inst.name)
12956 return (list(all_split_instances - previously_split_instances),
12957 list(previously_split_instances & all_split_instances))
12960 class _GroupQuery(_QueryBase):
12961 FIELDS = query.GROUP_FIELDS
12963 def ExpandNames(self, lu):
12964 lu.needed_locks = {}
12966 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12967 self._cluster = lu.cfg.GetClusterInfo()
12968 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12971 self.wanted = [name_to_uuid[name]
12972 for name in utils.NiceSort(name_to_uuid.keys())]
12974 # Accept names to be either names or UUIDs.
12977 all_uuid = frozenset(self._all_groups.keys())
12979 for name in self.names:
12980 if name in all_uuid:
12981 self.wanted.append(name)
12982 elif name in name_to_uuid:
12983 self.wanted.append(name_to_uuid[name])
12985 missing.append(name)
12988 raise errors.OpPrereqError("Some groups do not exist: %s" %
12989 utils.CommaJoin(missing),
12990 errors.ECODE_NOENT)
12992 def DeclareLocks(self, lu, level):
12995 def _GetQueryData(self, lu):
12996 """Computes the list of node groups and their attributes.
12999 do_nodes = query.GQ_NODE in self.requested_data
13000 do_instances = query.GQ_INST in self.requested_data
13002 group_to_nodes = None
13003 group_to_instances = None
13005 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13006 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13007 # latter GetAllInstancesInfo() is not enough, for we have to go through
13008 # instance->node. Hence, we will need to process nodes even if we only need
13009 # instance information.
13010 if do_nodes or do_instances:
13011 all_nodes = lu.cfg.GetAllNodesInfo()
13012 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13015 for node in all_nodes.values():
13016 if node.group in group_to_nodes:
13017 group_to_nodes[node.group].append(node.name)
13018 node_to_group[node.name] = node.group
13021 all_instances = lu.cfg.GetAllInstancesInfo()
13022 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13024 for instance in all_instances.values():
13025 node = instance.primary_node
13026 if node in node_to_group:
13027 group_to_instances[node_to_group[node]].append(instance.name)
13030 # Do not pass on node information if it was not requested.
13031 group_to_nodes = None
13033 return query.GroupQueryData(self._cluster,
13034 [self._all_groups[uuid]
13035 for uuid in self.wanted],
13036 group_to_nodes, group_to_instances)
13039 class LUGroupQuery(NoHooksLU):
13040 """Logical unit for querying node groups.
13045 def CheckArguments(self):
13046 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13047 self.op.output_fields, False)
13049 def ExpandNames(self):
13050 self.gq.ExpandNames(self)
13052 def DeclareLocks(self, level):
13053 self.gq.DeclareLocks(self, level)
13055 def Exec(self, feedback_fn):
13056 return self.gq.OldStyleQuery(self)
13059 class LUGroupSetParams(LogicalUnit):
13060 """Modifies the parameters of a node group.
13063 HPATH = "group-modify"
13064 HTYPE = constants.HTYPE_GROUP
13067 def CheckArguments(self):
13070 self.op.diskparams,
13071 self.op.alloc_policy,
13073 self.op.disk_state,
13077 if all_changes.count(None) == len(all_changes):
13078 raise errors.OpPrereqError("Please pass at least one modification",
13079 errors.ECODE_INVAL)
13081 def ExpandNames(self):
13082 # This raises errors.OpPrereqError on its own:
13083 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13085 self.needed_locks = {
13086 locking.LEVEL_NODEGROUP: [self.group_uuid],
13089 def CheckPrereq(self):
13090 """Check prerequisites.
13093 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13095 if self.group is None:
13096 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13097 (self.op.group_name, self.group_uuid))
13099 if self.op.ndparams:
13100 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13101 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13102 self.new_ndparams = new_ndparams
13104 if self.op.diskparams:
13105 self.new_diskparams = dict()
13106 for templ in constants.DISK_TEMPLATES:
13107 if templ not in self.op.diskparams:
13108 self.op.diskparams[templ] = {}
13109 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13110 self.op.diskparams[templ])
13111 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13112 self.new_diskparams[templ] = new_templ_params
13114 if self.op.hv_state:
13115 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13116 self.group.hv_state_static)
13118 if self.op.disk_state:
13119 self.new_disk_state = \
13120 _MergeAndVerifyDiskState(self.op.disk_state,
13121 self.group.disk_state_static)
13123 if self.op.ipolicy:
13125 for key, value in self.op.ipolicy.iteritems():
13126 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13129 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13130 self.new_ipolicy = g_ipolicy
13131 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13133 def BuildHooksEnv(self):
13134 """Build hooks env.
13138 "GROUP_NAME": self.op.group_name,
13139 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13142 def BuildHooksNodes(self):
13143 """Build hooks nodes.
13146 mn = self.cfg.GetMasterNode()
13147 return ([mn], [mn])
13149 def Exec(self, feedback_fn):
13150 """Modifies the node group.
13155 if self.op.ndparams:
13156 self.group.ndparams = self.new_ndparams
13157 result.append(("ndparams", str(self.group.ndparams)))
13159 if self.op.diskparams:
13160 self.group.diskparams = self.new_diskparams
13161 result.append(("diskparams", str(self.group.diskparams)))
13163 if self.op.alloc_policy:
13164 self.group.alloc_policy = self.op.alloc_policy
13166 if self.op.hv_state:
13167 self.group.hv_state_static = self.new_hv_state
13169 if self.op.disk_state:
13170 self.group.disk_state_static = self.new_disk_state
13172 if self.op.ipolicy:
13173 self.group.ipolicy = self.new_ipolicy
13175 self.cfg.Update(self.group, feedback_fn)
13179 class LUGroupRemove(LogicalUnit):
13180 HPATH = "group-remove"
13181 HTYPE = constants.HTYPE_GROUP
13184 def ExpandNames(self):
13185 # This will raises errors.OpPrereqError on its own:
13186 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13187 self.needed_locks = {
13188 locking.LEVEL_NODEGROUP: [self.group_uuid],
13191 def CheckPrereq(self):
13192 """Check prerequisites.
13194 This checks that the given group name exists as a node group, that is
13195 empty (i.e., contains no nodes), and that is not the last group of the
13199 # Verify that the group is empty.
13200 group_nodes = [node.name
13201 for node in self.cfg.GetAllNodesInfo().values()
13202 if node.group == self.group_uuid]
13205 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13207 (self.op.group_name,
13208 utils.CommaJoin(utils.NiceSort(group_nodes))),
13209 errors.ECODE_STATE)
13211 # Verify the cluster would not be left group-less.
13212 if len(self.cfg.GetNodeGroupList()) == 1:
13213 raise errors.OpPrereqError("Group '%s' is the only group,"
13214 " cannot be removed" %
13215 self.op.group_name,
13216 errors.ECODE_STATE)
13218 def BuildHooksEnv(self):
13219 """Build hooks env.
13223 "GROUP_NAME": self.op.group_name,
13226 def BuildHooksNodes(self):
13227 """Build hooks nodes.
13230 mn = self.cfg.GetMasterNode()
13231 return ([mn], [mn])
13233 def Exec(self, feedback_fn):
13234 """Remove the node group.
13238 self.cfg.RemoveNodeGroup(self.group_uuid)
13239 except errors.ConfigurationError:
13240 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13241 (self.op.group_name, self.group_uuid))
13243 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13246 class LUGroupRename(LogicalUnit):
13247 HPATH = "group-rename"
13248 HTYPE = constants.HTYPE_GROUP
13251 def ExpandNames(self):
13252 # This raises errors.OpPrereqError on its own:
13253 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13255 self.needed_locks = {
13256 locking.LEVEL_NODEGROUP: [self.group_uuid],
13259 def CheckPrereq(self):
13260 """Check prerequisites.
13262 Ensures requested new name is not yet used.
13266 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13267 except errors.OpPrereqError:
13270 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13271 " node group (UUID: %s)" %
13272 (self.op.new_name, new_name_uuid),
13273 errors.ECODE_EXISTS)
13275 def BuildHooksEnv(self):
13276 """Build hooks env.
13280 "OLD_NAME": self.op.group_name,
13281 "NEW_NAME": self.op.new_name,
13284 def BuildHooksNodes(self):
13285 """Build hooks nodes.
13288 mn = self.cfg.GetMasterNode()
13290 all_nodes = self.cfg.GetAllNodesInfo()
13291 all_nodes.pop(mn, None)
13294 run_nodes.extend(node.name for node in all_nodes.values()
13295 if node.group == self.group_uuid)
13297 return (run_nodes, run_nodes)
13299 def Exec(self, feedback_fn):
13300 """Rename the node group.
13303 group = self.cfg.GetNodeGroup(self.group_uuid)
13306 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13307 (self.op.group_name, self.group_uuid))
13309 group.name = self.op.new_name
13310 self.cfg.Update(group, feedback_fn)
13312 return self.op.new_name
13315 class LUGroupEvacuate(LogicalUnit):
13316 HPATH = "group-evacuate"
13317 HTYPE = constants.HTYPE_GROUP
13320 def ExpandNames(self):
13321 # This raises errors.OpPrereqError on its own:
13322 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13324 if self.op.target_groups:
13325 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13326 self.op.target_groups)
13328 self.req_target_uuids = []
13330 if self.group_uuid in self.req_target_uuids:
13331 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13332 " as a target group (targets are %s)" %
13334 utils.CommaJoin(self.req_target_uuids)),
13335 errors.ECODE_INVAL)
13337 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13339 self.share_locks = _ShareAll()
13340 self.needed_locks = {
13341 locking.LEVEL_INSTANCE: [],
13342 locking.LEVEL_NODEGROUP: [],
13343 locking.LEVEL_NODE: [],
13346 def DeclareLocks(self, level):
13347 if level == locking.LEVEL_INSTANCE:
13348 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13350 # Lock instances optimistically, needs verification once node and group
13351 # locks have been acquired
13352 self.needed_locks[locking.LEVEL_INSTANCE] = \
13353 self.cfg.GetNodeGroupInstances(self.group_uuid)
13355 elif level == locking.LEVEL_NODEGROUP:
13356 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13358 if self.req_target_uuids:
13359 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13361 # Lock all groups used by instances optimistically; this requires going
13362 # via the node before it's locked, requiring verification later on
13363 lock_groups.update(group_uuid
13364 for instance_name in
13365 self.owned_locks(locking.LEVEL_INSTANCE)
13367 self.cfg.GetInstanceNodeGroups(instance_name))
13369 # No target groups, need to lock all of them
13370 lock_groups = locking.ALL_SET
13372 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13374 elif level == locking.LEVEL_NODE:
13375 # This will only lock the nodes in the group to be evacuated which
13376 # contain actual instances
13377 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13378 self._LockInstancesNodes()
13380 # Lock all nodes in group to be evacuated and target groups
13381 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13382 assert self.group_uuid in owned_groups
13383 member_nodes = [node_name
13384 for group in owned_groups
13385 for node_name in self.cfg.GetNodeGroup(group).members]
13386 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13388 def CheckPrereq(self):
13389 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13390 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13391 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13393 assert owned_groups.issuperset(self.req_target_uuids)
13394 assert self.group_uuid in owned_groups
13396 # Check if locked instances are still correct
13397 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13399 # Get instance information
13400 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13402 # Check if node groups for locked instances are still correct
13403 for instance_name in owned_instances:
13404 inst = self.instances[instance_name]
13405 assert owned_nodes.issuperset(inst.all_nodes), \
13406 "Instance %s's nodes changed while we kept the lock" % instance_name
13408 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13411 assert self.group_uuid in inst_groups, \
13412 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13414 if self.req_target_uuids:
13415 # User requested specific target groups
13416 self.target_uuids = self.req_target_uuids
13418 # All groups except the one to be evacuated are potential targets
13419 self.target_uuids = [group_uuid for group_uuid in owned_groups
13420 if group_uuid != self.group_uuid]
13422 if not self.target_uuids:
13423 raise errors.OpPrereqError("There are no possible target groups",
13424 errors.ECODE_INVAL)
13426 def BuildHooksEnv(self):
13427 """Build hooks env.
13431 "GROUP_NAME": self.op.group_name,
13432 "TARGET_GROUPS": " ".join(self.target_uuids),
13435 def BuildHooksNodes(self):
13436 """Build hooks nodes.
13439 mn = self.cfg.GetMasterNode()
13441 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13443 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13445 return (run_nodes, run_nodes)
13447 def Exec(self, feedback_fn):
13448 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13450 assert self.group_uuid not in self.target_uuids
13452 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13453 instances=instances, target_groups=self.target_uuids)
13455 ial.Run(self.op.iallocator)
13457 if not ial.success:
13458 raise errors.OpPrereqError("Can't compute group evacuation using"
13459 " iallocator '%s': %s" %
13460 (self.op.iallocator, ial.info),
13461 errors.ECODE_NORES)
13463 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13465 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13466 len(jobs), self.op.group_name)
13468 return ResultWithJobs(jobs)
13471 class TagsLU(NoHooksLU): # pylint: disable=W0223
13472 """Generic tags LU.
13474 This is an abstract class which is the parent of all the other tags LUs.
13477 def ExpandNames(self):
13478 self.group_uuid = None
13479 self.needed_locks = {}
13480 if self.op.kind == constants.TAG_NODE:
13481 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13482 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13483 elif self.op.kind == constants.TAG_INSTANCE:
13484 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13485 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13486 elif self.op.kind == constants.TAG_NODEGROUP:
13487 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13489 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13490 # not possible to acquire the BGL based on opcode parameters)
13492 def CheckPrereq(self):
13493 """Check prerequisites.
13496 if self.op.kind == constants.TAG_CLUSTER:
13497 self.target = self.cfg.GetClusterInfo()
13498 elif self.op.kind == constants.TAG_NODE:
13499 self.target = self.cfg.GetNodeInfo(self.op.name)
13500 elif self.op.kind == constants.TAG_INSTANCE:
13501 self.target = self.cfg.GetInstanceInfo(self.op.name)
13502 elif self.op.kind == constants.TAG_NODEGROUP:
13503 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13505 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13506 str(self.op.kind), errors.ECODE_INVAL)
13509 class LUTagsGet(TagsLU):
13510 """Returns the tags of a given object.
13515 def ExpandNames(self):
13516 TagsLU.ExpandNames(self)
13518 # Share locks as this is only a read operation
13519 self.share_locks = _ShareAll()
13521 def Exec(self, feedback_fn):
13522 """Returns the tag list.
13525 return list(self.target.GetTags())
13528 class LUTagsSearch(NoHooksLU):
13529 """Searches the tags for a given pattern.
13534 def ExpandNames(self):
13535 self.needed_locks = {}
13537 def CheckPrereq(self):
13538 """Check prerequisites.
13540 This checks the pattern passed for validity by compiling it.
13544 self.re = re.compile(self.op.pattern)
13545 except re.error, err:
13546 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13547 (self.op.pattern, err), errors.ECODE_INVAL)
13549 def Exec(self, feedback_fn):
13550 """Returns the tag list.
13554 tgts = [("/cluster", cfg.GetClusterInfo())]
13555 ilist = cfg.GetAllInstancesInfo().values()
13556 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13557 nlist = cfg.GetAllNodesInfo().values()
13558 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13559 tgts.extend(("/nodegroup/%s" % n.name, n)
13560 for n in cfg.GetAllNodeGroupsInfo().values())
13562 for path, target in tgts:
13563 for tag in target.GetTags():
13564 if self.re.search(tag):
13565 results.append((path, tag))
13569 class LUTagsSet(TagsLU):
13570 """Sets a tag on a given object.
13575 def CheckPrereq(self):
13576 """Check prerequisites.
13578 This checks the type and length of the tag name and value.
13581 TagsLU.CheckPrereq(self)
13582 for tag in self.op.tags:
13583 objects.TaggableObject.ValidateTag(tag)
13585 def Exec(self, feedback_fn):
13590 for tag in self.op.tags:
13591 self.target.AddTag(tag)
13592 except errors.TagError, err:
13593 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13594 self.cfg.Update(self.target, feedback_fn)
13597 class LUTagsDel(TagsLU):
13598 """Delete a list of tags from a given object.
13603 def CheckPrereq(self):
13604 """Check prerequisites.
13606 This checks that we have the given tag.
13609 TagsLU.CheckPrereq(self)
13610 for tag in self.op.tags:
13611 objects.TaggableObject.ValidateTag(tag)
13612 del_tags = frozenset(self.op.tags)
13613 cur_tags = self.target.GetTags()
13615 diff_tags = del_tags - cur_tags
13617 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13618 raise errors.OpPrereqError("Tag(s) %s not found" %
13619 (utils.CommaJoin(diff_names), ),
13620 errors.ECODE_NOENT)
13622 def Exec(self, feedback_fn):
13623 """Remove the tag from the object.
13626 for tag in self.op.tags:
13627 self.target.RemoveTag(tag)
13628 self.cfg.Update(self.target, feedback_fn)
13631 class LUTestDelay(NoHooksLU):
13632 """Sleep for a specified amount of time.
13634 This LU sleeps on the master and/or nodes for a specified amount of
13640 def ExpandNames(self):
13641 """Expand names and set required locks.
13643 This expands the node list, if any.
13646 self.needed_locks = {}
13647 if self.op.on_nodes:
13648 # _GetWantedNodes can be used here, but is not always appropriate to use
13649 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13650 # more information.
13651 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13652 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13654 def _TestDelay(self):
13655 """Do the actual sleep.
13658 if self.op.on_master:
13659 if not utils.TestDelay(self.op.duration):
13660 raise errors.OpExecError("Error during master delay test")
13661 if self.op.on_nodes:
13662 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13663 for node, node_result in result.items():
13664 node_result.Raise("Failure during rpc call to node %s" % node)
13666 def Exec(self, feedback_fn):
13667 """Execute the test delay opcode, with the wanted repetitions.
13670 if self.op.repeat == 0:
13673 top_value = self.op.repeat - 1
13674 for i in range(self.op.repeat):
13675 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13679 class LUTestJqueue(NoHooksLU):
13680 """Utility LU to test some aspects of the job queue.
13685 # Must be lower than default timeout for WaitForJobChange to see whether it
13686 # notices changed jobs
13687 _CLIENT_CONNECT_TIMEOUT = 20.0
13688 _CLIENT_CONFIRM_TIMEOUT = 60.0
13691 def _NotifyUsingSocket(cls, cb, errcls):
13692 """Opens a Unix socket and waits for another program to connect.
13695 @param cb: Callback to send socket name to client
13696 @type errcls: class
13697 @param errcls: Exception class to use for errors
13700 # Using a temporary directory as there's no easy way to create temporary
13701 # sockets without writing a custom loop around tempfile.mktemp and
13703 tmpdir = tempfile.mkdtemp()
13705 tmpsock = utils.PathJoin(tmpdir, "sock")
13707 logging.debug("Creating temporary socket at %s", tmpsock)
13708 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13713 # Send details to client
13716 # Wait for client to connect before continuing
13717 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13719 (conn, _) = sock.accept()
13720 except socket.error, err:
13721 raise errcls("Client didn't connect in time (%s)" % err)
13725 # Remove as soon as client is connected
13726 shutil.rmtree(tmpdir)
13728 # Wait for client to close
13731 # pylint: disable=E1101
13732 # Instance of '_socketobject' has no ... member
13733 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13735 except socket.error, err:
13736 raise errcls("Client failed to confirm notification (%s)" % err)
13740 def _SendNotification(self, test, arg, sockname):
13741 """Sends a notification to the client.
13744 @param test: Test name
13745 @param arg: Test argument (depends on test)
13746 @type sockname: string
13747 @param sockname: Socket path
13750 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13752 def _Notify(self, prereq, test, arg):
13753 """Notifies the client of a test.
13756 @param prereq: Whether this is a prereq-phase test
13758 @param test: Test name
13759 @param arg: Test argument (depends on test)
13763 errcls = errors.OpPrereqError
13765 errcls = errors.OpExecError
13767 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13771 def CheckArguments(self):
13772 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13773 self.expandnames_calls = 0
13775 def ExpandNames(self):
13776 checkargs_calls = getattr(self, "checkargs_calls", 0)
13777 if checkargs_calls < 1:
13778 raise errors.ProgrammerError("CheckArguments was not called")
13780 self.expandnames_calls += 1
13782 if self.op.notify_waitlock:
13783 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13785 self.LogInfo("Expanding names")
13787 # Get lock on master node (just to get a lock, not for a particular reason)
13788 self.needed_locks = {
13789 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13792 def Exec(self, feedback_fn):
13793 if self.expandnames_calls < 1:
13794 raise errors.ProgrammerError("ExpandNames was not called")
13796 if self.op.notify_exec:
13797 self._Notify(False, constants.JQT_EXEC, None)
13799 self.LogInfo("Executing")
13801 if self.op.log_messages:
13802 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13803 for idx, msg in enumerate(self.op.log_messages):
13804 self.LogInfo("Sending log message %s", idx + 1)
13805 feedback_fn(constants.JQT_MSGPREFIX + msg)
13806 # Report how many test messages have been sent
13807 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13810 raise errors.OpExecError("Opcode failure was requested")
13815 class IAllocator(object):
13816 """IAllocator framework.
13818 An IAllocator instance has three sets of attributes:
13819 - cfg that is needed to query the cluster
13820 - input data (all members of the _KEYS class attribute are required)
13821 - four buffer attributes (in|out_data|text), that represent the
13822 input (to the external script) in text and data structure format,
13823 and the output from it, again in two formats
13824 - the result variables from the script (success, info, nodes) for
13828 # pylint: disable=R0902
13829 # lots of instance attributes
13831 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13833 self.rpc = rpc_runner
13834 # init buffer variables
13835 self.in_text = self.out_text = self.in_data = self.out_data = None
13836 # init all input fields so that pylint is happy
13838 self.memory = self.disks = self.disk_template = None
13839 self.os = self.tags = self.nics = self.vcpus = None
13840 self.hypervisor = None
13841 self.relocate_from = None
13843 self.instances = None
13844 self.evac_mode = None
13845 self.target_groups = []
13847 self.required_nodes = None
13848 # init result fields
13849 self.success = self.info = self.result = None
13852 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13854 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13855 " IAllocator" % self.mode)
13857 keyset = [n for (n, _) in keydata]
13860 if key not in keyset:
13861 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13862 " IAllocator" % key)
13863 setattr(self, key, kwargs[key])
13866 if key not in kwargs:
13867 raise errors.ProgrammerError("Missing input parameter '%s' to"
13868 " IAllocator" % key)
13869 self._BuildInputData(compat.partial(fn, self), keydata)
13871 def _ComputeClusterData(self):
13872 """Compute the generic allocator input data.
13874 This is the data that is independent of the actual operation.
13878 cluster_info = cfg.GetClusterInfo()
13881 "version": constants.IALLOCATOR_VERSION,
13882 "cluster_name": cfg.GetClusterName(),
13883 "cluster_tags": list(cluster_info.GetTags()),
13884 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13885 # we don't have job IDs
13887 ninfo = cfg.GetAllNodesInfo()
13888 iinfo = cfg.GetAllInstancesInfo().values()
13889 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13892 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13894 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13895 hypervisor_name = self.hypervisor
13896 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13897 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13899 hypervisor_name = cluster_info.primary_hypervisor
13901 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13904 self.rpc.call_all_instances_info(node_list,
13905 cluster_info.enabled_hypervisors)
13907 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13909 config_ndata = self._ComputeBasicNodeData(ninfo)
13910 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13911 i_list, config_ndata)
13912 assert len(data["nodes"]) == len(ninfo), \
13913 "Incomplete node data computed"
13915 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13917 self.in_data = data
13920 def _ComputeNodeGroupData(cfg):
13921 """Compute node groups data.
13924 ng = dict((guuid, {
13925 "name": gdata.name,
13926 "alloc_policy": gdata.alloc_policy,
13928 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13933 def _ComputeBasicNodeData(node_cfg):
13934 """Compute global node data.
13937 @returns: a dict of name: (node dict, node config)
13940 # fill in static (config-based) values
13941 node_results = dict((ninfo.name, {
13942 "tags": list(ninfo.GetTags()),
13943 "primary_ip": ninfo.primary_ip,
13944 "secondary_ip": ninfo.secondary_ip,
13945 "offline": ninfo.offline,
13946 "drained": ninfo.drained,
13947 "master_candidate": ninfo.master_candidate,
13948 "group": ninfo.group,
13949 "master_capable": ninfo.master_capable,
13950 "vm_capable": ninfo.vm_capable,
13952 for ninfo in node_cfg.values())
13954 return node_results
13957 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13959 """Compute global node data.
13961 @param node_results: the basic node structures as filled from the config
13964 #TODO(dynmem): compute the right data on MAX and MIN memory
13965 # make a copy of the current dict
13966 node_results = dict(node_results)
13967 for nname, nresult in node_data.items():
13968 assert nname in node_results, "Missing basic data for node %s" % nname
13969 ninfo = node_cfg[nname]
13971 if not (ninfo.offline or ninfo.drained):
13972 nresult.Raise("Can't get data for node %s" % nname)
13973 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13975 remote_info = _MakeLegacyNodeInfo(nresult.payload)
13977 for attr in ["memory_total", "memory_free", "memory_dom0",
13978 "vg_size", "vg_free", "cpu_total"]:
13979 if attr not in remote_info:
13980 raise errors.OpExecError("Node '%s' didn't return attribute"
13981 " '%s'" % (nname, attr))
13982 if not isinstance(remote_info[attr], int):
13983 raise errors.OpExecError("Node '%s' returned invalid value"
13985 (nname, attr, remote_info[attr]))
13986 # compute memory used by primary instances
13987 i_p_mem = i_p_up_mem = 0
13988 for iinfo, beinfo in i_list:
13989 if iinfo.primary_node == nname:
13990 i_p_mem += beinfo[constants.BE_MAXMEM]
13991 if iinfo.name not in node_iinfo[nname].payload:
13994 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13995 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13996 remote_info["memory_free"] -= max(0, i_mem_diff)
13998 if iinfo.admin_state == constants.ADMINST_UP:
13999 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14001 # compute memory used by instances
14003 "total_memory": remote_info["memory_total"],
14004 "reserved_memory": remote_info["memory_dom0"],
14005 "free_memory": remote_info["memory_free"],
14006 "total_disk": remote_info["vg_size"],
14007 "free_disk": remote_info["vg_free"],
14008 "total_cpus": remote_info["cpu_total"],
14009 "i_pri_memory": i_p_mem,
14010 "i_pri_up_memory": i_p_up_mem,
14012 pnr_dyn.update(node_results[nname])
14013 node_results[nname] = pnr_dyn
14015 return node_results
14018 def _ComputeInstanceData(cluster_info, i_list):
14019 """Compute global instance data.
14023 for iinfo, beinfo in i_list:
14025 for nic in iinfo.nics:
14026 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14030 "mode": filled_params[constants.NIC_MODE],
14031 "link": filled_params[constants.NIC_LINK],
14033 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14034 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14035 nic_data.append(nic_dict)
14037 "tags": list(iinfo.GetTags()),
14038 "admin_state": iinfo.admin_state,
14039 "vcpus": beinfo[constants.BE_VCPUS],
14040 "memory": beinfo[constants.BE_MAXMEM],
14042 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14044 "disks": [{constants.IDISK_SIZE: dsk.size,
14045 constants.IDISK_MODE: dsk.mode}
14046 for dsk in iinfo.disks],
14047 "disk_template": iinfo.disk_template,
14048 "hypervisor": iinfo.hypervisor,
14050 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14052 instance_data[iinfo.name] = pir
14054 return instance_data
14056 def _AddNewInstance(self):
14057 """Add new instance data to allocator structure.
14059 This in combination with _AllocatorGetClusterData will create the
14060 correct structure needed as input for the allocator.
14062 The checks for the completeness of the opcode must have already been
14066 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14068 if self.disk_template in constants.DTS_INT_MIRROR:
14069 self.required_nodes = 2
14071 self.required_nodes = 1
14075 "disk_template": self.disk_template,
14078 "vcpus": self.vcpus,
14079 "memory": self.memory,
14080 "disks": self.disks,
14081 "disk_space_total": disk_space,
14083 "required_nodes": self.required_nodes,
14084 "hypervisor": self.hypervisor,
14089 def _AddRelocateInstance(self):
14090 """Add relocate instance data to allocator structure.
14092 This in combination with _IAllocatorGetClusterData will create the
14093 correct structure needed as input for the allocator.
14095 The checks for the completeness of the opcode must have already been
14099 instance = self.cfg.GetInstanceInfo(self.name)
14100 if instance is None:
14101 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14102 " IAllocator" % self.name)
14104 if instance.disk_template not in constants.DTS_MIRRORED:
14105 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14106 errors.ECODE_INVAL)
14108 if instance.disk_template in constants.DTS_INT_MIRROR and \
14109 len(instance.secondary_nodes) != 1:
14110 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14111 errors.ECODE_STATE)
14113 self.required_nodes = 1
14114 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14115 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14119 "disk_space_total": disk_space,
14120 "required_nodes": self.required_nodes,
14121 "relocate_from": self.relocate_from,
14125 def _AddNodeEvacuate(self):
14126 """Get data for node-evacuate requests.
14130 "instances": self.instances,
14131 "evac_mode": self.evac_mode,
14134 def _AddChangeGroup(self):
14135 """Get data for node-evacuate requests.
14139 "instances": self.instances,
14140 "target_groups": self.target_groups,
14143 def _BuildInputData(self, fn, keydata):
14144 """Build input data structures.
14147 self._ComputeClusterData()
14150 request["type"] = self.mode
14151 for keyname, keytype in keydata:
14152 if keyname not in request:
14153 raise errors.ProgrammerError("Request parameter %s is missing" %
14155 val = request[keyname]
14156 if not keytype(val):
14157 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14158 " validation, value %s, expected"
14159 " type %s" % (keyname, val, keytype))
14160 self.in_data["request"] = request
14162 self.in_text = serializer.Dump(self.in_data)
14164 _STRING_LIST = ht.TListOf(ht.TString)
14165 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14166 # pylint: disable=E1101
14167 # Class '...' has no 'OP_ID' member
14168 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14169 opcodes.OpInstanceMigrate.OP_ID,
14170 opcodes.OpInstanceReplaceDisks.OP_ID])
14174 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14175 ht.TItems([ht.TNonEmptyString,
14176 ht.TNonEmptyString,
14177 ht.TListOf(ht.TNonEmptyString),
14180 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14181 ht.TItems([ht.TNonEmptyString,
14184 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14185 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14188 constants.IALLOCATOR_MODE_ALLOC:
14191 ("name", ht.TString),
14192 ("memory", ht.TInt),
14193 ("disks", ht.TListOf(ht.TDict)),
14194 ("disk_template", ht.TString),
14195 ("os", ht.TString),
14196 ("tags", _STRING_LIST),
14197 ("nics", ht.TListOf(ht.TDict)),
14198 ("vcpus", ht.TInt),
14199 ("hypervisor", ht.TString),
14201 constants.IALLOCATOR_MODE_RELOC:
14202 (_AddRelocateInstance,
14203 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14205 constants.IALLOCATOR_MODE_NODE_EVAC:
14206 (_AddNodeEvacuate, [
14207 ("instances", _STRING_LIST),
14208 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14210 constants.IALLOCATOR_MODE_CHG_GROUP:
14211 (_AddChangeGroup, [
14212 ("instances", _STRING_LIST),
14213 ("target_groups", _STRING_LIST),
14217 def Run(self, name, validate=True, call_fn=None):
14218 """Run an instance allocator and return the results.
14221 if call_fn is None:
14222 call_fn = self.rpc.call_iallocator_runner
14224 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14225 result.Raise("Failure while running the iallocator script")
14227 self.out_text = result.payload
14229 self._ValidateResult()
14231 def _ValidateResult(self):
14232 """Process the allocator results.
14234 This will process and if successful save the result in
14235 self.out_data and the other parameters.
14239 rdict = serializer.Load(self.out_text)
14240 except Exception, err:
14241 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14243 if not isinstance(rdict, dict):
14244 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14246 # TODO: remove backwards compatiblity in later versions
14247 if "nodes" in rdict and "result" not in rdict:
14248 rdict["result"] = rdict["nodes"]
14251 for key in "success", "info", "result":
14252 if key not in rdict:
14253 raise errors.OpExecError("Can't parse iallocator results:"
14254 " missing key '%s'" % key)
14255 setattr(self, key, rdict[key])
14257 if not self._result_check(self.result):
14258 raise errors.OpExecError("Iallocator returned invalid result,"
14259 " expected %s, got %s" %
14260 (self._result_check, self.result),
14261 errors.ECODE_INVAL)
14263 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14264 assert self.relocate_from is not None
14265 assert self.required_nodes == 1
14267 node2group = dict((name, ndata["group"])
14268 for (name, ndata) in self.in_data["nodes"].items())
14270 fn = compat.partial(self._NodesToGroups, node2group,
14271 self.in_data["nodegroups"])
14273 instance = self.cfg.GetInstanceInfo(self.name)
14274 request_groups = fn(self.relocate_from + [instance.primary_node])
14275 result_groups = fn(rdict["result"] + [instance.primary_node])
14277 if self.success and not set(result_groups).issubset(request_groups):
14278 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14279 " differ from original groups (%s)" %
14280 (utils.CommaJoin(result_groups),
14281 utils.CommaJoin(request_groups)))
14283 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14284 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14286 self.out_data = rdict
14289 def _NodesToGroups(node2group, groups, nodes):
14290 """Returns a list of unique group names for a list of nodes.
14292 @type node2group: dict
14293 @param node2group: Map from node name to group UUID
14295 @param groups: Group information
14297 @param nodes: Node names
14304 group_uuid = node2group[node]
14306 # Ignore unknown node
14310 group = groups[group_uuid]
14312 # Can't find group, let's use UUID
14313 group_name = group_uuid
14315 group_name = group["name"]
14317 result.add(group_name)
14319 return sorted(result)
14322 class LUTestAllocator(NoHooksLU):
14323 """Run allocator tests.
14325 This LU runs the allocator tests
14328 def CheckPrereq(self):
14329 """Check prerequisites.
14331 This checks the opcode parameters depending on the director and mode test.
14334 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14335 for attr in ["memory", "disks", "disk_template",
14336 "os", "tags", "nics", "vcpus"]:
14337 if not hasattr(self.op, attr):
14338 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14339 attr, errors.ECODE_INVAL)
14340 iname = self.cfg.ExpandInstanceName(self.op.name)
14341 if iname is not None:
14342 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14343 iname, errors.ECODE_EXISTS)
14344 if not isinstance(self.op.nics, list):
14345 raise errors.OpPrereqError("Invalid parameter 'nics'",
14346 errors.ECODE_INVAL)
14347 if not isinstance(self.op.disks, list):
14348 raise errors.OpPrereqError("Invalid parameter 'disks'",
14349 errors.ECODE_INVAL)
14350 for row in self.op.disks:
14351 if (not isinstance(row, dict) or
14352 constants.IDISK_SIZE not in row or
14353 not isinstance(row[constants.IDISK_SIZE], int) or
14354 constants.IDISK_MODE not in row or
14355 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14356 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14357 " parameter", errors.ECODE_INVAL)
14358 if self.op.hypervisor is None:
14359 self.op.hypervisor = self.cfg.GetHypervisorType()
14360 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14361 fname = _ExpandInstanceName(self.cfg, self.op.name)
14362 self.op.name = fname
14363 self.relocate_from = \
14364 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14365 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14366 constants.IALLOCATOR_MODE_NODE_EVAC):
14367 if not self.op.instances:
14368 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14369 self.op.instances = _GetWantedInstances(self, self.op.instances)
14371 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14372 self.op.mode, errors.ECODE_INVAL)
14374 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14375 if self.op.allocator is None:
14376 raise errors.OpPrereqError("Missing allocator name",
14377 errors.ECODE_INVAL)
14378 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14379 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14380 self.op.direction, errors.ECODE_INVAL)
14382 def Exec(self, feedback_fn):
14383 """Run the allocator test.
14386 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14387 ial = IAllocator(self.cfg, self.rpc,
14390 memory=self.op.memory,
14391 disks=self.op.disks,
14392 disk_template=self.op.disk_template,
14396 vcpus=self.op.vcpus,
14397 hypervisor=self.op.hypervisor,
14399 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14400 ial = IAllocator(self.cfg, self.rpc,
14403 relocate_from=list(self.relocate_from),
14405 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14406 ial = IAllocator(self.cfg, self.rpc,
14408 instances=self.op.instances,
14409 target_groups=self.op.target_groups)
14410 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14411 ial = IAllocator(self.cfg, self.rpc,
14413 instances=self.op.instances,
14414 evac_mode=self.op.evac_mode)
14416 raise errors.ProgrammerError("Uncatched mode %s in"
14417 " LUTestAllocator.Exec", self.op.mode)
14419 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14420 result = ial.in_text
14422 ial.Run(self.op.allocator, validate=False)
14423 result = ial.out_text
14427 #: Query type implementations
14429 constants.QR_INSTANCE: _InstanceQuery,
14430 constants.QR_NODE: _NodeQuery,
14431 constants.QR_GROUP: _GroupQuery,
14432 constants.QR_OS: _OsQuery,
14435 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14438 def _GetQueryImplementation(name):
14439 """Returns the implemtnation for a query type.
14441 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14445 return _QUERY_IMPL[name]
14447 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14448 errors.ECODE_INVAL)