4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ExpandItemName(fn, name, kind):
1040 """Expand an item name.
1042 @param fn: the function to use for expansion
1043 @param name: requested item name
1044 @param kind: text description ('Node' or 'Instance')
1045 @return: the resolved (full) name
1046 @raise errors.OpPrereqError: if the item is not found
1049 full_name = fn(name)
1050 if full_name is None:
1051 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1056 def _ExpandNodeName(cfg, name):
1057 """Wrapper over L{_ExpandItemName} for nodes."""
1058 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1061 def _ExpandInstanceName(cfg, name):
1062 """Wrapper over L{_ExpandItemName} for instance."""
1063 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1066 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1067 minmem, maxmem, vcpus, nics, disk_template, disks,
1068 bep, hvp, hypervisor_name, tags):
1069 """Builds instance related env variables for hooks
1071 This builds the hook environment from individual variables.
1074 @param name: the name of the instance
1075 @type primary_node: string
1076 @param primary_node: the name of the instance's primary node
1077 @type secondary_nodes: list
1078 @param secondary_nodes: list of secondary nodes as strings
1079 @type os_type: string
1080 @param os_type: the name of the instance's OS
1081 @type status: string
1082 @param status: the desired status of the instance
1083 @type minmem: string
1084 @param minmem: the minimum memory size of the instance
1085 @type maxmem: string
1086 @param maxmem: the maximum memory size of the instance
1088 @param vcpus: the count of VCPUs the instance has
1090 @param nics: list of tuples (ip, mac, mode, link) representing
1091 the NICs the instance has
1092 @type disk_template: string
1093 @param disk_template: the disk template of the instance
1095 @param disks: the list of (size, mode) pairs
1097 @param bep: the backend parameters for the instance
1099 @param hvp: the hypervisor parameters for the instance
1100 @type hypervisor_name: string
1101 @param hypervisor_name: the hypervisor for the instance
1103 @param tags: list of instance tags as strings
1105 @return: the hook environment for this instance
1110 "INSTANCE_NAME": name,
1111 "INSTANCE_PRIMARY": primary_node,
1112 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1113 "INSTANCE_OS_TYPE": os_type,
1114 "INSTANCE_STATUS": status,
1115 "INSTANCE_MINMEM": minmem,
1116 "INSTANCE_MAXMEM": maxmem,
1117 # TODO(2.7) remove deprecated "memory" value
1118 "INSTANCE_MEMORY": maxmem,
1119 "INSTANCE_VCPUS": vcpus,
1120 "INSTANCE_DISK_TEMPLATE": disk_template,
1121 "INSTANCE_HYPERVISOR": hypervisor_name,
1124 nic_count = len(nics)
1125 for idx, (ip, mac, mode, link) in enumerate(nics):
1128 env["INSTANCE_NIC%d_IP" % idx] = ip
1129 env["INSTANCE_NIC%d_MAC" % idx] = mac
1130 env["INSTANCE_NIC%d_MODE" % idx] = mode
1131 env["INSTANCE_NIC%d_LINK" % idx] = link
1132 if mode == constants.NIC_MODE_BRIDGED:
1133 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1137 env["INSTANCE_NIC_COUNT"] = nic_count
1140 disk_count = len(disks)
1141 for idx, (size, mode) in enumerate(disks):
1142 env["INSTANCE_DISK%d_SIZE" % idx] = size
1143 env["INSTANCE_DISK%d_MODE" % idx] = mode
1147 env["INSTANCE_DISK_COUNT"] = disk_count
1152 env["INSTANCE_TAGS"] = " ".join(tags)
1154 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1155 for key, value in source.items():
1156 env["INSTANCE_%s_%s" % (kind, key)] = value
1161 def _NICListToTuple(lu, nics):
1162 """Build a list of nic information tuples.
1164 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1165 value in LUInstanceQueryData.
1167 @type lu: L{LogicalUnit}
1168 @param lu: the logical unit on whose behalf we execute
1169 @type nics: list of L{objects.NIC}
1170 @param nics: list of nics to convert to hooks tuples
1174 cluster = lu.cfg.GetClusterInfo()
1178 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1179 mode = filled_params[constants.NIC_MODE]
1180 link = filled_params[constants.NIC_LINK]
1181 hooks_nics.append((ip, mac, mode, link))
1185 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1186 """Builds instance related env variables for hooks from an object.
1188 @type lu: L{LogicalUnit}
1189 @param lu: the logical unit on whose behalf we execute
1190 @type instance: L{objects.Instance}
1191 @param instance: the instance for which we should build the
1193 @type override: dict
1194 @param override: dictionary with key/values that will override
1197 @return: the hook environment dictionary
1200 cluster = lu.cfg.GetClusterInfo()
1201 bep = cluster.FillBE(instance)
1202 hvp = cluster.FillHV(instance)
1204 "name": instance.name,
1205 "primary_node": instance.primary_node,
1206 "secondary_nodes": instance.secondary_nodes,
1207 "os_type": instance.os,
1208 "status": instance.admin_state,
1209 "maxmem": bep[constants.BE_MAXMEM],
1210 "minmem": bep[constants.BE_MINMEM],
1211 "vcpus": bep[constants.BE_VCPUS],
1212 "nics": _NICListToTuple(lu, instance.nics),
1213 "disk_template": instance.disk_template,
1214 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1217 "hypervisor_name": instance.hypervisor,
1218 "tags": instance.tags,
1221 args.update(override)
1222 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1225 def _AdjustCandidatePool(lu, exceptions):
1226 """Adjust the candidate pool after node operations.
1229 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1231 lu.LogInfo("Promoted nodes to master candidate role: %s",
1232 utils.CommaJoin(node.name for node in mod_list))
1233 for name in mod_list:
1234 lu.context.ReaddNode(name)
1235 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1237 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1241 def _DecideSelfPromotion(lu, exceptions=None):
1242 """Decide whether I should promote myself as a master candidate.
1245 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1246 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1247 # the new node will increase mc_max with one, so:
1248 mc_should = min(mc_should + 1, cp_size)
1249 return mc_now < mc_should
1252 def _CalculateGroupIPolicy(cfg, group):
1253 """Calculate instance policy for group.
1256 cluster = cfg.GetClusterInfo()
1257 return cluster.SimpleFillIPolicy(group.ipolicy)
1260 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1261 """Check that the brigdes needed by a list of nics exist.
1264 cluster = lu.cfg.GetClusterInfo()
1265 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1266 brlist = [params[constants.NIC_LINK] for params in paramslist
1267 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1269 result = lu.rpc.call_bridges_exist(target_node, brlist)
1270 result.Raise("Error checking bridges on destination node '%s'" %
1271 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1274 def _CheckInstanceBridgesExist(lu, instance, node=None):
1275 """Check that the brigdes needed by an instance exist.
1279 node = instance.primary_node
1280 _CheckNicsBridgesExist(lu, instance.nics, node)
1283 def _CheckOSVariant(os_obj, name):
1284 """Check whether an OS name conforms to the os variants specification.
1286 @type os_obj: L{objects.OS}
1287 @param os_obj: OS object to check
1289 @param name: OS name passed by the user, to check for validity
1292 variant = objects.OS.GetVariant(name)
1293 if not os_obj.supported_variants:
1295 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1296 " passed)" % (os_obj.name, variant),
1300 raise errors.OpPrereqError("OS name must include a variant",
1303 if variant not in os_obj.supported_variants:
1304 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1307 def _GetNodeInstancesInner(cfg, fn):
1308 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1311 def _GetNodeInstances(cfg, node_name):
1312 """Returns a list of all primary and secondary instances on a node.
1316 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1319 def _GetNodePrimaryInstances(cfg, node_name):
1320 """Returns primary instances on a node.
1323 return _GetNodeInstancesInner(cfg,
1324 lambda inst: node_name == inst.primary_node)
1327 def _GetNodeSecondaryInstances(cfg, node_name):
1328 """Returns secondary instances on a node.
1331 return _GetNodeInstancesInner(cfg,
1332 lambda inst: node_name in inst.secondary_nodes)
1335 def _GetStorageTypeArgs(cfg, storage_type):
1336 """Returns the arguments for a storage type.
1339 # Special case for file storage
1340 if storage_type == constants.ST_FILE:
1341 # storage.FileStorage wants a list of storage directories
1342 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1347 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1350 for dev in instance.disks:
1351 cfg.SetDiskID(dev, node_name)
1353 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1354 result.Raise("Failed to get disk status from node %s" % node_name,
1355 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1357 for idx, bdev_status in enumerate(result.payload):
1358 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1364 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1365 """Check the sanity of iallocator and node arguments and use the
1366 cluster-wide iallocator if appropriate.
1368 Check that at most one of (iallocator, node) is specified. If none is
1369 specified, then the LU's opcode's iallocator slot is filled with the
1370 cluster-wide default iallocator.
1372 @type iallocator_slot: string
1373 @param iallocator_slot: the name of the opcode iallocator slot
1374 @type node_slot: string
1375 @param node_slot: the name of the opcode target node slot
1378 node = getattr(lu.op, node_slot, None)
1379 iallocator = getattr(lu.op, iallocator_slot, None)
1381 if node is not None and iallocator is not None:
1382 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1384 elif node is None and iallocator is None:
1385 default_iallocator = lu.cfg.GetDefaultIAllocator()
1386 if default_iallocator:
1387 setattr(lu.op, iallocator_slot, default_iallocator)
1389 raise errors.OpPrereqError("No iallocator or node given and no"
1390 " cluster-wide default iallocator found;"
1391 " please specify either an iallocator or a"
1392 " node, or set a cluster-wide default"
1396 def _GetDefaultIAllocator(cfg, iallocator):
1397 """Decides on which iallocator to use.
1399 @type cfg: L{config.ConfigWriter}
1400 @param cfg: Cluster configuration object
1401 @type iallocator: string or None
1402 @param iallocator: Iallocator specified in opcode
1404 @return: Iallocator name
1408 # Use default iallocator
1409 iallocator = cfg.GetDefaultIAllocator()
1412 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1413 " opcode nor as a cluster-wide default",
1419 class LUClusterPostInit(LogicalUnit):
1420 """Logical unit for running hooks after cluster initialization.
1423 HPATH = "cluster-init"
1424 HTYPE = constants.HTYPE_CLUSTER
1426 def BuildHooksEnv(self):
1431 "OP_TARGET": self.cfg.GetClusterName(),
1434 def BuildHooksNodes(self):
1435 """Build hooks nodes.
1438 return ([], [self.cfg.GetMasterNode()])
1440 def Exec(self, feedback_fn):
1447 class LUClusterDestroy(LogicalUnit):
1448 """Logical unit for destroying the cluster.
1451 HPATH = "cluster-destroy"
1452 HTYPE = constants.HTYPE_CLUSTER
1454 def BuildHooksEnv(self):
1459 "OP_TARGET": self.cfg.GetClusterName(),
1462 def BuildHooksNodes(self):
1463 """Build hooks nodes.
1468 def CheckPrereq(self):
1469 """Check prerequisites.
1471 This checks whether the cluster is empty.
1473 Any errors are signaled by raising errors.OpPrereqError.
1476 master = self.cfg.GetMasterNode()
1478 nodelist = self.cfg.GetNodeList()
1479 if len(nodelist) != 1 or nodelist[0] != master:
1480 raise errors.OpPrereqError("There are still %d node(s) in"
1481 " this cluster." % (len(nodelist) - 1),
1483 instancelist = self.cfg.GetInstanceList()
1485 raise errors.OpPrereqError("There are still %d instance(s) in"
1486 " this cluster." % len(instancelist),
1489 def Exec(self, feedback_fn):
1490 """Destroys the cluster.
1493 master_params = self.cfg.GetMasterNetworkParameters()
1495 # Run post hooks on master node before it's removed
1496 _RunPostHook(self, master_params.name)
1498 ems = self.cfg.GetUseExternalMipScript()
1499 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1501 result.Raise("Could not disable the master role")
1503 return master_params.name
1506 def _VerifyCertificate(filename):
1507 """Verifies a certificate for L{LUClusterVerifyConfig}.
1509 @type filename: string
1510 @param filename: Path to PEM file
1514 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1515 utils.ReadFile(filename))
1516 except Exception, err: # pylint: disable=W0703
1517 return (LUClusterVerifyConfig.ETYPE_ERROR,
1518 "Failed to load X509 certificate %s: %s" % (filename, err))
1521 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1522 constants.SSL_CERT_EXPIRATION_ERROR)
1525 fnamemsg = "While verifying %s: %s" % (filename, msg)
1530 return (None, fnamemsg)
1531 elif errcode == utils.CERT_WARNING:
1532 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1533 elif errcode == utils.CERT_ERROR:
1534 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1536 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1539 def _GetAllHypervisorParameters(cluster, instances):
1540 """Compute the set of all hypervisor parameters.
1542 @type cluster: L{objects.Cluster}
1543 @param cluster: the cluster object
1544 @param instances: list of L{objects.Instance}
1545 @param instances: additional instances from which to obtain parameters
1546 @rtype: list of (origin, hypervisor, parameters)
1547 @return: a list with all parameters found, indicating the hypervisor they
1548 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1553 for hv_name in cluster.enabled_hypervisors:
1554 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1556 for os_name, os_hvp in cluster.os_hvp.items():
1557 for hv_name, hv_params in os_hvp.items():
1559 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1560 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1562 # TODO: collapse identical parameter values in a single one
1563 for instance in instances:
1564 if instance.hvparams:
1565 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1566 cluster.FillHV(instance)))
1571 class _VerifyErrors(object):
1572 """Mix-in for cluster/group verify LUs.
1574 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1575 self.op and self._feedback_fn to be available.)
1579 ETYPE_FIELD = "code"
1580 ETYPE_ERROR = "ERROR"
1581 ETYPE_WARNING = "WARNING"
1583 def _Error(self, ecode, item, msg, *args, **kwargs):
1584 """Format an error message.
1586 Based on the opcode's error_codes parameter, either format a
1587 parseable error code, or a simpler error string.
1589 This must be called only from Exec and functions called from Exec.
1592 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1593 itype, etxt, _ = ecode
1594 # first complete the msg
1597 # then format the whole message
1598 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1599 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1605 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1606 # and finally report it via the feedback_fn
1607 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1609 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1610 """Log an error message if the passed condition is True.
1614 or self.op.debug_simulate_errors) # pylint: disable=E1101
1616 # If the error code is in the list of ignored errors, demote the error to a
1618 (_, etxt, _) = ecode
1619 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1620 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1623 self._Error(ecode, *args, **kwargs)
1625 # do not mark the operation as failed for WARN cases only
1626 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1627 self.bad = self.bad or cond
1630 class LUClusterVerify(NoHooksLU):
1631 """Submits all jobs necessary to verify the cluster.
1636 def ExpandNames(self):
1637 self.needed_locks = {}
1639 def Exec(self, feedback_fn):
1642 if self.op.group_name:
1643 groups = [self.op.group_name]
1644 depends_fn = lambda: None
1646 groups = self.cfg.GetNodeGroupList()
1648 # Verify global configuration
1650 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1653 # Always depend on global verification
1654 depends_fn = lambda: [(-len(jobs), [])]
1656 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1657 ignore_errors=self.op.ignore_errors,
1658 depends=depends_fn())]
1659 for group in groups)
1661 # Fix up all parameters
1662 for op in itertools.chain(*jobs): # pylint: disable=W0142
1663 op.debug_simulate_errors = self.op.debug_simulate_errors
1664 op.verbose = self.op.verbose
1665 op.error_codes = self.op.error_codes
1667 op.skip_checks = self.op.skip_checks
1668 except AttributeError:
1669 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1671 return ResultWithJobs(jobs)
1674 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1675 """Verifies the cluster config.
1680 def _VerifyHVP(self, hvp_data):
1681 """Verifies locally the syntax of the hypervisor parameters.
1684 for item, hv_name, hv_params in hvp_data:
1685 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1688 hv_class = hypervisor.GetHypervisor(hv_name)
1689 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1690 hv_class.CheckParameterSyntax(hv_params)
1691 except errors.GenericError, err:
1692 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1694 def ExpandNames(self):
1695 # Information can be safely retrieved as the BGL is acquired in exclusive
1697 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1698 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1699 self.all_node_info = self.cfg.GetAllNodesInfo()
1700 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1701 self.needed_locks = {}
1703 def Exec(self, feedback_fn):
1704 """Verify integrity of cluster, performing various test on nodes.
1708 self._feedback_fn = feedback_fn
1710 feedback_fn("* Verifying cluster config")
1712 for msg in self.cfg.VerifyConfig():
1713 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1715 feedback_fn("* Verifying cluster certificate files")
1717 for cert_filename in constants.ALL_CERT_FILES:
1718 (errcode, msg) = _VerifyCertificate(cert_filename)
1719 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1721 feedback_fn("* Verifying hypervisor parameters")
1723 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1724 self.all_inst_info.values()))
1726 feedback_fn("* Verifying all nodes belong to an existing group")
1728 # We do this verification here because, should this bogus circumstance
1729 # occur, it would never be caught by VerifyGroup, which only acts on
1730 # nodes/instances reachable from existing node groups.
1732 dangling_nodes = set(node.name for node in self.all_node_info.values()
1733 if node.group not in self.all_group_info)
1735 dangling_instances = {}
1736 no_node_instances = []
1738 for inst in self.all_inst_info.values():
1739 if inst.primary_node in dangling_nodes:
1740 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1741 elif inst.primary_node not in self.all_node_info:
1742 no_node_instances.append(inst.name)
1747 utils.CommaJoin(dangling_instances.get(node.name,
1749 for node in dangling_nodes]
1751 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1753 "the following nodes (and their instances) belong to a non"
1754 " existing group: %s", utils.CommaJoin(pretty_dangling))
1756 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1758 "the following instances have a non-existing primary-node:"
1759 " %s", utils.CommaJoin(no_node_instances))
1764 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1765 """Verifies the status of a node group.
1768 HPATH = "cluster-verify"
1769 HTYPE = constants.HTYPE_CLUSTER
1772 _HOOKS_INDENT_RE = re.compile("^", re.M)
1774 class NodeImage(object):
1775 """A class representing the logical and physical status of a node.
1778 @ivar name: the node name to which this object refers
1779 @ivar volumes: a structure as returned from
1780 L{ganeti.backend.GetVolumeList} (runtime)
1781 @ivar instances: a list of running instances (runtime)
1782 @ivar pinst: list of configured primary instances (config)
1783 @ivar sinst: list of configured secondary instances (config)
1784 @ivar sbp: dictionary of {primary-node: list of instances} for all
1785 instances for which this node is secondary (config)
1786 @ivar mfree: free memory, as reported by hypervisor (runtime)
1787 @ivar dfree: free disk, as reported by the node (runtime)
1788 @ivar offline: the offline status (config)
1789 @type rpc_fail: boolean
1790 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1791 not whether the individual keys were correct) (runtime)
1792 @type lvm_fail: boolean
1793 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1794 @type hyp_fail: boolean
1795 @ivar hyp_fail: whether the RPC call didn't return the instance list
1796 @type ghost: boolean
1797 @ivar ghost: whether this is a known node or not (config)
1798 @type os_fail: boolean
1799 @ivar os_fail: whether the RPC call didn't return valid OS data
1801 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1802 @type vm_capable: boolean
1803 @ivar vm_capable: whether the node can host instances
1806 def __init__(self, offline=False, name=None, vm_capable=True):
1815 self.offline = offline
1816 self.vm_capable = vm_capable
1817 self.rpc_fail = False
1818 self.lvm_fail = False
1819 self.hyp_fail = False
1821 self.os_fail = False
1824 def ExpandNames(self):
1825 # This raises errors.OpPrereqError on its own:
1826 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1828 # Get instances in node group; this is unsafe and needs verification later
1829 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1831 self.needed_locks = {
1832 locking.LEVEL_INSTANCE: inst_names,
1833 locking.LEVEL_NODEGROUP: [self.group_uuid],
1834 locking.LEVEL_NODE: [],
1837 self.share_locks = _ShareAll()
1839 def DeclareLocks(self, level):
1840 if level == locking.LEVEL_NODE:
1841 # Get members of node group; this is unsafe and needs verification later
1842 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1844 all_inst_info = self.cfg.GetAllInstancesInfo()
1846 # In Exec(), we warn about mirrored instances that have primary and
1847 # secondary living in separate node groups. To fully verify that
1848 # volumes for these instances are healthy, we will need to do an
1849 # extra call to their secondaries. We ensure here those nodes will
1851 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1852 # Important: access only the instances whose lock is owned
1853 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1854 nodes.update(all_inst_info[inst].secondary_nodes)
1856 self.needed_locks[locking.LEVEL_NODE] = nodes
1858 def CheckPrereq(self):
1859 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1860 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1862 group_nodes = set(self.group_info.members)
1863 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1866 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1868 unlocked_instances = \
1869 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1872 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1873 utils.CommaJoin(unlocked_nodes))
1875 if unlocked_instances:
1876 raise errors.OpPrereqError("Missing lock for instances: %s" %
1877 utils.CommaJoin(unlocked_instances))
1879 self.all_node_info = self.cfg.GetAllNodesInfo()
1880 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1882 self.my_node_names = utils.NiceSort(group_nodes)
1883 self.my_inst_names = utils.NiceSort(group_instances)
1885 self.my_node_info = dict((name, self.all_node_info[name])
1886 for name in self.my_node_names)
1888 self.my_inst_info = dict((name, self.all_inst_info[name])
1889 for name in self.my_inst_names)
1891 # We detect here the nodes that will need the extra RPC calls for verifying
1892 # split LV volumes; they should be locked.
1893 extra_lv_nodes = set()
1895 for inst in self.my_inst_info.values():
1896 if inst.disk_template in constants.DTS_INT_MIRROR:
1897 group = self.my_node_info[inst.primary_node].group
1898 for nname in inst.secondary_nodes:
1899 if self.all_node_info[nname].group != group:
1900 extra_lv_nodes.add(nname)
1902 unlocked_lv_nodes = \
1903 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1905 if unlocked_lv_nodes:
1906 raise errors.OpPrereqError("these nodes could be locked: %s" %
1907 utils.CommaJoin(unlocked_lv_nodes))
1908 self.extra_lv_nodes = list(extra_lv_nodes)
1910 def _VerifyNode(self, ninfo, nresult):
1911 """Perform some basic validation on data returned from a node.
1913 - check the result data structure is well formed and has all the
1915 - check ganeti version
1917 @type ninfo: L{objects.Node}
1918 @param ninfo: the node to check
1919 @param nresult: the results from the node
1921 @return: whether overall this call was successful (and we can expect
1922 reasonable values in the respose)
1926 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1928 # main result, nresult should be a non-empty dict
1929 test = not nresult or not isinstance(nresult, dict)
1930 _ErrorIf(test, constants.CV_ENODERPC, node,
1931 "unable to verify node: no data returned")
1935 # compares ganeti version
1936 local_version = constants.PROTOCOL_VERSION
1937 remote_version = nresult.get("version", None)
1938 test = not (remote_version and
1939 isinstance(remote_version, (list, tuple)) and
1940 len(remote_version) == 2)
1941 _ErrorIf(test, constants.CV_ENODERPC, node,
1942 "connection to node returned invalid data")
1946 test = local_version != remote_version[0]
1947 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1948 "incompatible protocol versions: master %s,"
1949 " node %s", local_version, remote_version[0])
1953 # node seems compatible, we can actually try to look into its results
1955 # full package version
1956 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1957 constants.CV_ENODEVERSION, node,
1958 "software version mismatch: master %s, node %s",
1959 constants.RELEASE_VERSION, remote_version[1],
1960 code=self.ETYPE_WARNING)
1962 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1963 if ninfo.vm_capable and isinstance(hyp_result, dict):
1964 for hv_name, hv_result in hyp_result.iteritems():
1965 test = hv_result is not None
1966 _ErrorIf(test, constants.CV_ENODEHV, node,
1967 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1969 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1970 if ninfo.vm_capable and isinstance(hvp_result, list):
1971 for item, hv_name, hv_result in hvp_result:
1972 _ErrorIf(True, constants.CV_ENODEHV, node,
1973 "hypervisor %s parameter verify failure (source %s): %s",
1974 hv_name, item, hv_result)
1976 test = nresult.get(constants.NV_NODESETUP,
1977 ["Missing NODESETUP results"])
1978 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1983 def _VerifyNodeTime(self, ninfo, nresult,
1984 nvinfo_starttime, nvinfo_endtime):
1985 """Check the node time.
1987 @type ninfo: L{objects.Node}
1988 @param ninfo: the node to check
1989 @param nresult: the remote results for the node
1990 @param nvinfo_starttime: the start time of the RPC call
1991 @param nvinfo_endtime: the end time of the RPC call
1995 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1997 ntime = nresult.get(constants.NV_TIME, None)
1999 ntime_merged = utils.MergeTime(ntime)
2000 except (ValueError, TypeError):
2001 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2004 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2005 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2006 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2007 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2011 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2012 "Node time diverges by at least %s from master node time",
2015 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2016 """Check the node LVM results.
2018 @type ninfo: L{objects.Node}
2019 @param ninfo: the node to check
2020 @param nresult: the remote results for the node
2021 @param vg_name: the configured VG name
2028 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2030 # checks vg existence and size > 20G
2031 vglist = nresult.get(constants.NV_VGLIST, None)
2033 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2035 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2036 constants.MIN_VG_SIZE)
2037 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2040 pvlist = nresult.get(constants.NV_PVLIST, None)
2041 test = pvlist is None
2042 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2044 # check that ':' is not present in PV names, since it's a
2045 # special character for lvcreate (denotes the range of PEs to
2047 for _, pvname, owner_vg in pvlist:
2048 test = ":" in pvname
2049 _ErrorIf(test, constants.CV_ENODELVM, node,
2050 "Invalid character ':' in PV '%s' of VG '%s'",
2053 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2054 """Check the node bridges.
2056 @type ninfo: L{objects.Node}
2057 @param ninfo: the node to check
2058 @param nresult: the remote results for the node
2059 @param bridges: the expected list of bridges
2066 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2068 missing = nresult.get(constants.NV_BRIDGES, None)
2069 test = not isinstance(missing, list)
2070 _ErrorIf(test, constants.CV_ENODENET, node,
2071 "did not return valid bridge information")
2073 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2074 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2076 def _VerifyNodeUserScripts(self, ninfo, nresult):
2077 """Check the results of user scripts presence and executability on the node
2079 @type ninfo: L{objects.Node}
2080 @param ninfo: the node to check
2081 @param nresult: the remote results for the node
2086 test = not constants.NV_USERSCRIPTS in nresult
2087 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2088 "did not return user scripts information")
2090 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2092 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2093 "user scripts not present or not executable: %s" %
2094 utils.CommaJoin(sorted(broken_scripts)))
2096 def _VerifyNodeNetwork(self, ninfo, nresult):
2097 """Check the node network connectivity results.
2099 @type ninfo: L{objects.Node}
2100 @param ninfo: the node to check
2101 @param nresult: the remote results for the node
2105 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2107 test = constants.NV_NODELIST not in nresult
2108 _ErrorIf(test, constants.CV_ENODESSH, node,
2109 "node hasn't returned node ssh connectivity data")
2111 if nresult[constants.NV_NODELIST]:
2112 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2113 _ErrorIf(True, constants.CV_ENODESSH, node,
2114 "ssh communication with node '%s': %s", a_node, a_msg)
2116 test = constants.NV_NODENETTEST not in nresult
2117 _ErrorIf(test, constants.CV_ENODENET, node,
2118 "node hasn't returned node tcp connectivity data")
2120 if nresult[constants.NV_NODENETTEST]:
2121 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2123 _ErrorIf(True, constants.CV_ENODENET, node,
2124 "tcp communication with node '%s': %s",
2125 anode, nresult[constants.NV_NODENETTEST][anode])
2127 test = constants.NV_MASTERIP not in nresult
2128 _ErrorIf(test, constants.CV_ENODENET, node,
2129 "node hasn't returned node master IP reachability data")
2131 if not nresult[constants.NV_MASTERIP]:
2132 if node == self.master_node:
2133 msg = "the master node cannot reach the master IP (not configured?)"
2135 msg = "cannot reach the master IP"
2136 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2138 def _VerifyInstancePolicy(self, instance):
2139 """Verify instance specs against instance policy set on node group level.
2143 cluster = self.cfg.GetClusterInfo()
2144 full_beparams = cluster.FillBE(instance)
2145 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2147 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2148 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2149 disk_count = len(instance.disks)
2150 disk_sizes = [disk.size for disk in instance.disks]
2151 nic_count = len(instance.nics)
2154 (constants.ISPEC_MEM_SIZE, mem_size),
2155 (constants.ISPEC_CPU_COUNT, cpu_count),
2156 (constants.ISPEC_DISK_COUNT, disk_count),
2157 (constants.ISPEC_NIC_COUNT, nic_count),
2158 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2160 for (name, value) in test_settings:
2161 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2162 self._ErrorIf(test_result is not None,
2163 constants.CV_EINSTANCEPOLICY, instance.name,
2166 def _VerifyInstance(self, instance, instanceconfig, node_image,
2168 """Verify an instance.
2170 This function checks to see if the required block devices are
2171 available on the instance's node.
2174 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2175 node_current = instanceconfig.primary_node
2177 node_vol_should = {}
2178 instanceconfig.MapLVsByNode(node_vol_should)
2180 self._VerifyInstancePolicy(instanceconfig)
2182 for node in node_vol_should:
2183 n_img = node_image[node]
2184 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2185 # ignore missing volumes on offline or broken nodes
2187 for volume in node_vol_should[node]:
2188 test = volume not in n_img.volumes
2189 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2190 "volume %s missing on node %s", volume, node)
2192 if instanceconfig.admin_state == constants.ADMINST_UP:
2193 pri_img = node_image[node_current]
2194 test = instance not in pri_img.instances and not pri_img.offline
2195 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2196 "instance not running on its primary node %s",
2199 diskdata = [(nname, success, status, idx)
2200 for (nname, disks) in diskstatus.items()
2201 for idx, (success, status) in enumerate(disks)]
2203 for nname, success, bdev_status, idx in diskdata:
2204 # the 'ghost node' construction in Exec() ensures that we have a
2206 snode = node_image[nname]
2207 bad_snode = snode.ghost or snode.offline
2208 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2209 not success and not bad_snode,
2210 constants.CV_EINSTANCEFAULTYDISK, instance,
2211 "couldn't retrieve status for disk/%s on %s: %s",
2212 idx, nname, bdev_status)
2213 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2214 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2215 constants.CV_EINSTANCEFAULTYDISK, instance,
2216 "disk/%s on %s is faulty", idx, nname)
2218 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2219 """Verify if there are any unknown volumes in the cluster.
2221 The .os, .swap and backup volumes are ignored. All other volumes are
2222 reported as unknown.
2224 @type reserved: L{ganeti.utils.FieldSet}
2225 @param reserved: a FieldSet of reserved volume names
2228 for node, n_img in node_image.items():
2229 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2230 # skip non-healthy nodes
2232 for volume in n_img.volumes:
2233 test = ((node not in node_vol_should or
2234 volume not in node_vol_should[node]) and
2235 not reserved.Matches(volume))
2236 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2237 "volume %s is unknown", volume)
2239 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2240 """Verify N+1 Memory Resilience.
2242 Check that if one single node dies we can still start all the
2243 instances it was primary for.
2246 cluster_info = self.cfg.GetClusterInfo()
2247 for node, n_img in node_image.items():
2248 # This code checks that every node which is now listed as
2249 # secondary has enough memory to host all instances it is
2250 # supposed to should a single other node in the cluster fail.
2251 # FIXME: not ready for failover to an arbitrary node
2252 # FIXME: does not support file-backed instances
2253 # WARNING: we currently take into account down instances as well
2254 # as up ones, considering that even if they're down someone
2255 # might want to start them even in the event of a node failure.
2257 # we're skipping offline nodes from the N+1 warning, since
2258 # most likely we don't have good memory infromation from them;
2259 # we already list instances living on such nodes, and that's
2262 #TODO(dynmem): use MINMEM for checking
2263 #TODO(dynmem): also consider ballooning out other instances
2264 for prinode, instances in n_img.sbp.items():
2266 for instance in instances:
2267 bep = cluster_info.FillBE(instance_cfg[instance])
2268 if bep[constants.BE_AUTO_BALANCE]:
2269 needed_mem += bep[constants.BE_MAXMEM]
2270 test = n_img.mfree < needed_mem
2271 self._ErrorIf(test, constants.CV_ENODEN1, node,
2272 "not enough memory to accomodate instance failovers"
2273 " should node %s fail (%dMiB needed, %dMiB available)",
2274 prinode, needed_mem, n_img.mfree)
2277 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2278 (files_all, files_opt, files_mc, files_vm)):
2279 """Verifies file checksums collected from all nodes.
2281 @param errorif: Callback for reporting errors
2282 @param nodeinfo: List of L{objects.Node} objects
2283 @param master_node: Name of master node
2284 @param all_nvinfo: RPC results
2287 # Define functions determining which nodes to consider for a file
2290 (files_mc, lambda node: (node.master_candidate or
2291 node.name == master_node)),
2292 (files_vm, lambda node: node.vm_capable),
2295 # Build mapping from filename to list of nodes which should have the file
2297 for (files, fn) in files2nodefn:
2299 filenodes = nodeinfo
2301 filenodes = filter(fn, nodeinfo)
2302 nodefiles.update((filename,
2303 frozenset(map(operator.attrgetter("name"), filenodes)))
2304 for filename in files)
2306 assert set(nodefiles) == (files_all | files_mc | files_vm)
2308 fileinfo = dict((filename, {}) for filename in nodefiles)
2309 ignore_nodes = set()
2311 for node in nodeinfo:
2313 ignore_nodes.add(node.name)
2316 nresult = all_nvinfo[node.name]
2318 if nresult.fail_msg or not nresult.payload:
2321 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2323 test = not (node_files and isinstance(node_files, dict))
2324 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2325 "Node did not return file checksum data")
2327 ignore_nodes.add(node.name)
2330 # Build per-checksum mapping from filename to nodes having it
2331 for (filename, checksum) in node_files.items():
2332 assert filename in nodefiles
2333 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2335 for (filename, checksums) in fileinfo.items():
2336 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2338 # Nodes having the file
2339 with_file = frozenset(node_name
2340 for nodes in fileinfo[filename].values()
2341 for node_name in nodes) - ignore_nodes
2343 expected_nodes = nodefiles[filename] - ignore_nodes
2345 # Nodes missing file
2346 missing_file = expected_nodes - with_file
2348 if filename in files_opt:
2350 errorif(missing_file and missing_file != expected_nodes,
2351 constants.CV_ECLUSTERFILECHECK, None,
2352 "File %s is optional, but it must exist on all or no"
2353 " nodes (not found on %s)",
2354 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2356 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2357 "File %s is missing from node(s) %s", filename,
2358 utils.CommaJoin(utils.NiceSort(missing_file)))
2360 # Warn if a node has a file it shouldn't
2361 unexpected = with_file - expected_nodes
2363 constants.CV_ECLUSTERFILECHECK, None,
2364 "File %s should not exist on node(s) %s",
2365 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2367 # See if there are multiple versions of the file
2368 test = len(checksums) > 1
2370 variants = ["variant %s on %s" %
2371 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2372 for (idx, (checksum, nodes)) in
2373 enumerate(sorted(checksums.items()))]
2377 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2378 "File %s found with %s different checksums (%s)",
2379 filename, len(checksums), "; ".join(variants))
2381 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2383 """Verifies and the node DRBD status.
2385 @type ninfo: L{objects.Node}
2386 @param ninfo: the node to check
2387 @param nresult: the remote results for the node
2388 @param instanceinfo: the dict of instances
2389 @param drbd_helper: the configured DRBD usermode helper
2390 @param drbd_map: the DRBD map as returned by
2391 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2398 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2399 test = (helper_result == None)
2400 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2401 "no drbd usermode helper returned")
2403 status, payload = helper_result
2405 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2406 "drbd usermode helper check unsuccessful: %s", payload)
2407 test = status and (payload != drbd_helper)
2408 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2409 "wrong drbd usermode helper: %s", payload)
2411 # compute the DRBD minors
2413 for minor, instance in drbd_map[node].items():
2414 test = instance not in instanceinfo
2415 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2416 "ghost instance '%s' in temporary DRBD map", instance)
2417 # ghost instance should not be running, but otherwise we
2418 # don't give double warnings (both ghost instance and
2419 # unallocated minor in use)
2421 node_drbd[minor] = (instance, False)
2423 instance = instanceinfo[instance]
2424 node_drbd[minor] = (instance.name,
2425 instance.admin_state == constants.ADMINST_UP)
2427 # and now check them
2428 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2429 test = not isinstance(used_minors, (tuple, list))
2430 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2431 "cannot parse drbd status file: %s", str(used_minors))
2433 # we cannot check drbd status
2436 for minor, (iname, must_exist) in node_drbd.items():
2437 test = minor not in used_minors and must_exist
2438 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2439 "drbd minor %d of instance %s is not active", minor, iname)
2440 for minor in used_minors:
2441 test = minor not in node_drbd
2442 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2443 "unallocated drbd minor %d is in use", minor)
2445 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2446 """Builds the node OS structures.
2448 @type ninfo: L{objects.Node}
2449 @param ninfo: the node to check
2450 @param nresult: the remote results for the node
2451 @param nimg: the node image object
2455 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2457 remote_os = nresult.get(constants.NV_OSLIST, None)
2458 test = (not isinstance(remote_os, list) or
2459 not compat.all(isinstance(v, list) and len(v) == 7
2460 for v in remote_os))
2462 _ErrorIf(test, constants.CV_ENODEOS, node,
2463 "node hasn't returned valid OS data")
2472 for (name, os_path, status, diagnose,
2473 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2475 if name not in os_dict:
2478 # parameters is a list of lists instead of list of tuples due to
2479 # JSON lacking a real tuple type, fix it:
2480 parameters = [tuple(v) for v in parameters]
2481 os_dict[name].append((os_path, status, diagnose,
2482 set(variants), set(parameters), set(api_ver)))
2484 nimg.oslist = os_dict
2486 def _VerifyNodeOS(self, ninfo, nimg, base):
2487 """Verifies the node OS list.
2489 @type ninfo: L{objects.Node}
2490 @param ninfo: the node to check
2491 @param nimg: the node image object
2492 @param base: the 'template' node we match against (e.g. from the master)
2496 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2498 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2500 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2501 for os_name, os_data in nimg.oslist.items():
2502 assert os_data, "Empty OS status for OS %s?!" % os_name
2503 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2504 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2505 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2506 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2507 "OS '%s' has multiple entries (first one shadows the rest): %s",
2508 os_name, utils.CommaJoin([v[0] for v in os_data]))
2509 # comparisons with the 'base' image
2510 test = os_name not in base.oslist
2511 _ErrorIf(test, constants.CV_ENODEOS, node,
2512 "Extra OS %s not present on reference node (%s)",
2516 assert base.oslist[os_name], "Base node has empty OS status?"
2517 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2519 # base OS is invalid, skipping
2521 for kind, a, b in [("API version", f_api, b_api),
2522 ("variants list", f_var, b_var),
2523 ("parameters", beautify_params(f_param),
2524 beautify_params(b_param))]:
2525 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2526 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2527 kind, os_name, base.name,
2528 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2530 # check any missing OSes
2531 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2532 _ErrorIf(missing, constants.CV_ENODEOS, node,
2533 "OSes present on reference node %s but missing on this node: %s",
2534 base.name, utils.CommaJoin(missing))
2536 def _VerifyOob(self, ninfo, nresult):
2537 """Verifies out of band functionality of a node.
2539 @type ninfo: L{objects.Node}
2540 @param ninfo: the node to check
2541 @param nresult: the remote results for the node
2545 # We just have to verify the paths on master and/or master candidates
2546 # as the oob helper is invoked on the master
2547 if ((ninfo.master_candidate or ninfo.master_capable) and
2548 constants.NV_OOB_PATHS in nresult):
2549 for path_result in nresult[constants.NV_OOB_PATHS]:
2550 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2552 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2553 """Verifies and updates the node volume data.
2555 This function will update a L{NodeImage}'s internal structures
2556 with data from the remote call.
2558 @type ninfo: L{objects.Node}
2559 @param ninfo: the node to check
2560 @param nresult: the remote results for the node
2561 @param nimg: the node image object
2562 @param vg_name: the configured VG name
2566 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568 nimg.lvm_fail = True
2569 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2572 elif isinstance(lvdata, basestring):
2573 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2574 utils.SafeEncode(lvdata))
2575 elif not isinstance(lvdata, dict):
2576 _ErrorIf(True, constants.CV_ENODELVM, node,
2577 "rpc call to node failed (lvlist)")
2579 nimg.volumes = lvdata
2580 nimg.lvm_fail = False
2582 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2583 """Verifies and updates the node instance list.
2585 If the listing was successful, then updates this node's instance
2586 list. Otherwise, it marks the RPC call as failed for the instance
2589 @type ninfo: L{objects.Node}
2590 @param ninfo: the node to check
2591 @param nresult: the remote results for the node
2592 @param nimg: the node image object
2595 idata = nresult.get(constants.NV_INSTANCELIST, None)
2596 test = not isinstance(idata, list)
2597 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2598 "rpc call to node failed (instancelist): %s",
2599 utils.SafeEncode(str(idata)))
2601 nimg.hyp_fail = True
2603 nimg.instances = idata
2605 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2606 """Verifies and computes a node information map
2608 @type ninfo: L{objects.Node}
2609 @param ninfo: the node to check
2610 @param nresult: the remote results for the node
2611 @param nimg: the node image object
2612 @param vg_name: the configured VG name
2616 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2618 # try to read free memory (from the hypervisor)
2619 hv_info = nresult.get(constants.NV_HVINFO, None)
2620 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2621 _ErrorIf(test, constants.CV_ENODEHV, node,
2622 "rpc call to node failed (hvinfo)")
2625 nimg.mfree = int(hv_info["memory_free"])
2626 except (ValueError, TypeError):
2627 _ErrorIf(True, constants.CV_ENODERPC, node,
2628 "node returned invalid nodeinfo, check hypervisor")
2630 # FIXME: devise a free space model for file based instances as well
2631 if vg_name is not None:
2632 test = (constants.NV_VGLIST not in nresult or
2633 vg_name not in nresult[constants.NV_VGLIST])
2634 _ErrorIf(test, constants.CV_ENODELVM, node,
2635 "node didn't return data for the volume group '%s'"
2636 " - it is either missing or broken", vg_name)
2639 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2640 except (ValueError, TypeError):
2641 _ErrorIf(True, constants.CV_ENODERPC, node,
2642 "node returned invalid LVM info, check LVM status")
2644 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2645 """Gets per-disk status information for all instances.
2647 @type nodelist: list of strings
2648 @param nodelist: Node names
2649 @type node_image: dict of (name, L{objects.Node})
2650 @param node_image: Node objects
2651 @type instanceinfo: dict of (name, L{objects.Instance})
2652 @param instanceinfo: Instance objects
2653 @rtype: {instance: {node: [(succes, payload)]}}
2654 @return: a dictionary of per-instance dictionaries with nodes as
2655 keys and disk information as values; the disk information is a
2656 list of tuples (success, payload)
2659 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2662 node_disks_devonly = {}
2663 diskless_instances = set()
2664 diskless = constants.DT_DISKLESS
2666 for nname in nodelist:
2667 node_instances = list(itertools.chain(node_image[nname].pinst,
2668 node_image[nname].sinst))
2669 diskless_instances.update(inst for inst in node_instances
2670 if instanceinfo[inst].disk_template == diskless)
2671 disks = [(inst, disk)
2672 for inst in node_instances
2673 for disk in instanceinfo[inst].disks]
2676 # No need to collect data
2679 node_disks[nname] = disks
2681 # Creating copies as SetDiskID below will modify the objects and that can
2682 # lead to incorrect data returned from nodes
2683 devonly = [dev.Copy() for (_, dev) in disks]
2686 self.cfg.SetDiskID(dev, nname)
2688 node_disks_devonly[nname] = devonly
2690 assert len(node_disks) == len(node_disks_devonly)
2692 # Collect data from all nodes with disks
2693 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2696 assert len(result) == len(node_disks)
2700 for (nname, nres) in result.items():
2701 disks = node_disks[nname]
2704 # No data from this node
2705 data = len(disks) * [(False, "node offline")]
2708 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2709 "while getting disk information: %s", msg)
2711 # No data from this node
2712 data = len(disks) * [(False, msg)]
2715 for idx, i in enumerate(nres.payload):
2716 if isinstance(i, (tuple, list)) and len(i) == 2:
2719 logging.warning("Invalid result from node %s, entry %d: %s",
2721 data.append((False, "Invalid result from the remote node"))
2723 for ((inst, _), status) in zip(disks, data):
2724 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2726 # Add empty entries for diskless instances.
2727 for inst in diskless_instances:
2728 assert inst not in instdisk
2731 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2732 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2733 compat.all(isinstance(s, (tuple, list)) and
2734 len(s) == 2 for s in statuses)
2735 for inst, nnames in instdisk.items()
2736 for nname, statuses in nnames.items())
2737 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2742 def _SshNodeSelector(group_uuid, all_nodes):
2743 """Create endless iterators for all potential SSH check hosts.
2746 nodes = [node for node in all_nodes
2747 if (node.group != group_uuid and
2749 keyfunc = operator.attrgetter("group")
2751 return map(itertools.cycle,
2752 [sorted(map(operator.attrgetter("name"), names))
2753 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2757 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2758 """Choose which nodes should talk to which other nodes.
2760 We will make nodes contact all nodes in their group, and one node from
2763 @warning: This algorithm has a known issue if one node group is much
2764 smaller than others (e.g. just one node). In such a case all other
2765 nodes will talk to the single node.
2768 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2769 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2771 return (online_nodes,
2772 dict((name, sorted([i.next() for i in sel]))
2773 for name in online_nodes))
2775 def BuildHooksEnv(self):
2778 Cluster-Verify hooks just ran in the post phase and their failure makes
2779 the output be logged in the verify output and the verification to fail.
2783 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2786 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2787 for node in self.my_node_info.values())
2791 def BuildHooksNodes(self):
2792 """Build hooks nodes.
2795 return ([], self.my_node_names)
2797 def Exec(self, feedback_fn):
2798 """Verify integrity of the node group, performing various test on nodes.
2801 # This method has too many local variables. pylint: disable=R0914
2802 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2804 if not self.my_node_names:
2806 feedback_fn("* Empty node group, skipping verification")
2810 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2811 verbose = self.op.verbose
2812 self._feedback_fn = feedback_fn
2814 vg_name = self.cfg.GetVGName()
2815 drbd_helper = self.cfg.GetDRBDHelper()
2816 cluster = self.cfg.GetClusterInfo()
2817 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2818 hypervisors = cluster.enabled_hypervisors
2819 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2821 i_non_redundant = [] # Non redundant instances
2822 i_non_a_balanced = [] # Non auto-balanced instances
2823 i_offline = 0 # Count of offline instances
2824 n_offline = 0 # Count of offline nodes
2825 n_drained = 0 # Count of nodes being drained
2826 node_vol_should = {}
2828 # FIXME: verify OS list
2831 filemap = _ComputeAncillaryFiles(cluster, False)
2833 # do local checksums
2834 master_node = self.master_node = self.cfg.GetMasterNode()
2835 master_ip = self.cfg.GetMasterIP()
2837 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2840 if self.cfg.GetUseExternalMipScript():
2841 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2843 node_verify_param = {
2844 constants.NV_FILELIST:
2845 utils.UniqueSequence(filename
2846 for files in filemap
2847 for filename in files),
2848 constants.NV_NODELIST:
2849 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2850 self.all_node_info.values()),
2851 constants.NV_HYPERVISOR: hypervisors,
2852 constants.NV_HVPARAMS:
2853 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2854 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2855 for node in node_data_list
2856 if not node.offline],
2857 constants.NV_INSTANCELIST: hypervisors,
2858 constants.NV_VERSION: None,
2859 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2860 constants.NV_NODESETUP: None,
2861 constants.NV_TIME: None,
2862 constants.NV_MASTERIP: (master_node, master_ip),
2863 constants.NV_OSLIST: None,
2864 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2865 constants.NV_USERSCRIPTS: user_scripts,
2868 if vg_name is not None:
2869 node_verify_param[constants.NV_VGLIST] = None
2870 node_verify_param[constants.NV_LVLIST] = vg_name
2871 node_verify_param[constants.NV_PVLIST] = [vg_name]
2872 node_verify_param[constants.NV_DRBDLIST] = None
2875 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2878 # FIXME: this needs to be changed per node-group, not cluster-wide
2880 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2881 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2882 bridges.add(default_nicpp[constants.NIC_LINK])
2883 for instance in self.my_inst_info.values():
2884 for nic in instance.nics:
2885 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2886 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2887 bridges.add(full_nic[constants.NIC_LINK])
2890 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2892 # Build our expected cluster state
2893 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2895 vm_capable=node.vm_capable))
2896 for node in node_data_list)
2900 for node in self.all_node_info.values():
2901 path = _SupportsOob(self.cfg, node)
2902 if path and path not in oob_paths:
2903 oob_paths.append(path)
2906 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2908 for instance in self.my_inst_names:
2909 inst_config = self.my_inst_info[instance]
2911 for nname in inst_config.all_nodes:
2912 if nname not in node_image:
2913 gnode = self.NodeImage(name=nname)
2914 gnode.ghost = (nname not in self.all_node_info)
2915 node_image[nname] = gnode
2917 inst_config.MapLVsByNode(node_vol_should)
2919 pnode = inst_config.primary_node
2920 node_image[pnode].pinst.append(instance)
2922 for snode in inst_config.secondary_nodes:
2923 nimg = node_image[snode]
2924 nimg.sinst.append(instance)
2925 if pnode not in nimg.sbp:
2926 nimg.sbp[pnode] = []
2927 nimg.sbp[pnode].append(instance)
2929 # At this point, we have the in-memory data structures complete,
2930 # except for the runtime information, which we'll gather next
2932 # Due to the way our RPC system works, exact response times cannot be
2933 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2934 # time before and after executing the request, we can at least have a time
2936 nvinfo_starttime = time.time()
2937 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2939 self.cfg.GetClusterName())
2940 nvinfo_endtime = time.time()
2942 if self.extra_lv_nodes and vg_name is not None:
2944 self.rpc.call_node_verify(self.extra_lv_nodes,
2945 {constants.NV_LVLIST: vg_name},
2946 self.cfg.GetClusterName())
2948 extra_lv_nvinfo = {}
2950 all_drbd_map = self.cfg.ComputeDRBDMap()
2952 feedback_fn("* Gathering disk information (%s nodes)" %
2953 len(self.my_node_names))
2954 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2957 feedback_fn("* Verifying configuration file consistency")
2959 # If not all nodes are being checked, we need to make sure the master node
2960 # and a non-checked vm_capable node are in the list.
2961 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2963 vf_nvinfo = all_nvinfo.copy()
2964 vf_node_info = list(self.my_node_info.values())
2965 additional_nodes = []
2966 if master_node not in self.my_node_info:
2967 additional_nodes.append(master_node)
2968 vf_node_info.append(self.all_node_info[master_node])
2969 # Add the first vm_capable node we find which is not included
2970 for node in absent_nodes:
2971 nodeinfo = self.all_node_info[node]
2972 if nodeinfo.vm_capable and not nodeinfo.offline:
2973 additional_nodes.append(node)
2974 vf_node_info.append(self.all_node_info[node])
2976 key = constants.NV_FILELIST
2977 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2978 {key: node_verify_param[key]},
2979 self.cfg.GetClusterName()))
2981 vf_nvinfo = all_nvinfo
2982 vf_node_info = self.my_node_info.values()
2984 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2986 feedback_fn("* Verifying node status")
2990 for node_i in node_data_list:
2992 nimg = node_image[node]
2996 feedback_fn("* Skipping offline node %s" % (node,))
3000 if node == master_node:
3002 elif node_i.master_candidate:
3003 ntype = "master candidate"
3004 elif node_i.drained:
3010 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3012 msg = all_nvinfo[node].fail_msg
3013 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3016 nimg.rpc_fail = True
3019 nresult = all_nvinfo[node].payload
3021 nimg.call_ok = self._VerifyNode(node_i, nresult)
3022 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3023 self._VerifyNodeNetwork(node_i, nresult)
3024 self._VerifyNodeUserScripts(node_i, nresult)
3025 self._VerifyOob(node_i, nresult)
3028 self._VerifyNodeLVM(node_i, nresult, vg_name)
3029 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3032 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3033 self._UpdateNodeInstances(node_i, nresult, nimg)
3034 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3035 self._UpdateNodeOS(node_i, nresult, nimg)
3037 if not nimg.os_fail:
3038 if refos_img is None:
3040 self._VerifyNodeOS(node_i, nimg, refos_img)
3041 self._VerifyNodeBridges(node_i, nresult, bridges)
3043 # Check whether all running instancies are primary for the node. (This
3044 # can no longer be done from _VerifyInstance below, since some of the
3045 # wrong instances could be from other node groups.)
3046 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3048 for inst in non_primary_inst:
3049 # FIXME: investigate best way to handle offline insts
3050 if inst.admin_state == constants.ADMINST_OFFLINE:
3052 feedback_fn("* Skipping offline instance %s" % inst.name)
3055 test = inst in self.all_inst_info
3056 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3057 "instance should not run on node %s", node_i.name)
3058 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3059 "node is running unknown instance %s", inst)
3061 for node, result in extra_lv_nvinfo.items():
3062 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3063 node_image[node], vg_name)
3065 feedback_fn("* Verifying instance status")
3066 for instance in self.my_inst_names:
3068 feedback_fn("* Verifying instance %s" % instance)
3069 inst_config = self.my_inst_info[instance]
3070 self._VerifyInstance(instance, inst_config, node_image,
3072 inst_nodes_offline = []
3074 pnode = inst_config.primary_node
3075 pnode_img = node_image[pnode]
3076 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3077 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3078 " primary node failed", instance)
3080 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3082 constants.CV_EINSTANCEBADNODE, instance,
3083 "instance is marked as running and lives on offline node %s",
3084 inst_config.primary_node)
3086 # If the instance is non-redundant we cannot survive losing its primary
3087 # node, so we are not N+1 compliant. On the other hand we have no disk
3088 # templates with more than one secondary so that situation is not well
3090 # FIXME: does not support file-backed instances
3091 if not inst_config.secondary_nodes:
3092 i_non_redundant.append(instance)
3094 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3095 constants.CV_EINSTANCELAYOUT,
3096 instance, "instance has multiple secondary nodes: %s",
3097 utils.CommaJoin(inst_config.secondary_nodes),
3098 code=self.ETYPE_WARNING)
3100 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3101 pnode = inst_config.primary_node
3102 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3103 instance_groups = {}
3105 for node in instance_nodes:
3106 instance_groups.setdefault(self.all_node_info[node].group,
3110 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3111 # Sort so that we always list the primary node first.
3112 for group, nodes in sorted(instance_groups.items(),
3113 key=lambda (_, nodes): pnode in nodes,
3116 self._ErrorIf(len(instance_groups) > 1,
3117 constants.CV_EINSTANCESPLITGROUPS,
3118 instance, "instance has primary and secondary nodes in"
3119 " different groups: %s", utils.CommaJoin(pretty_list),
3120 code=self.ETYPE_WARNING)
3122 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3123 i_non_a_balanced.append(instance)
3125 for snode in inst_config.secondary_nodes:
3126 s_img = node_image[snode]
3127 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3128 snode, "instance %s, connection to secondary node failed",
3132 inst_nodes_offline.append(snode)
3134 # warn that the instance lives on offline nodes
3135 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3136 "instance has offline secondary node(s) %s",
3137 utils.CommaJoin(inst_nodes_offline))
3138 # ... or ghost/non-vm_capable nodes
3139 for node in inst_config.all_nodes:
3140 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3141 instance, "instance lives on ghost node %s", node)
3142 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3143 instance, "instance lives on non-vm_capable node %s", node)
3145 feedback_fn("* Verifying orphan volumes")
3146 reserved = utils.FieldSet(*cluster.reserved_lvs)
3148 # We will get spurious "unknown volume" warnings if any node of this group
3149 # is secondary for an instance whose primary is in another group. To avoid
3150 # them, we find these instances and add their volumes to node_vol_should.
3151 for inst in self.all_inst_info.values():
3152 for secondary in inst.secondary_nodes:
3153 if (secondary in self.my_node_info
3154 and inst.name not in self.my_inst_info):
3155 inst.MapLVsByNode(node_vol_should)
3158 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3160 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3161 feedback_fn("* Verifying N+1 Memory redundancy")
3162 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3164 feedback_fn("* Other Notes")
3166 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3167 % len(i_non_redundant))
3169 if i_non_a_balanced:
3170 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3171 % len(i_non_a_balanced))
3174 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3177 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3180 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3184 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3185 """Analyze the post-hooks' result
3187 This method analyses the hook result, handles it, and sends some
3188 nicely-formatted feedback back to the user.
3190 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3191 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3192 @param hooks_results: the results of the multi-node hooks rpc call
3193 @param feedback_fn: function used send feedback back to the caller
3194 @param lu_result: previous Exec result
3195 @return: the new Exec result, based on the previous result
3199 # We only really run POST phase hooks, only for non-empty groups,
3200 # and are only interested in their results
3201 if not self.my_node_names:
3204 elif phase == constants.HOOKS_PHASE_POST:
3205 # Used to change hooks' output to proper indentation
3206 feedback_fn("* Hooks Results")
3207 assert hooks_results, "invalid result from hooks"
3209 for node_name in hooks_results:
3210 res = hooks_results[node_name]
3212 test = msg and not res.offline
3213 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3214 "Communication failure in hooks execution: %s", msg)
3215 if res.offline or msg:
3216 # No need to investigate payload if node is offline or gave
3219 for script, hkr, output in res.payload:
3220 test = hkr == constants.HKR_FAIL
3221 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3222 "Script %s failed, output:", script)
3224 output = self._HOOKS_INDENT_RE.sub(" ", output)
3225 feedback_fn("%s" % output)
3231 class LUClusterVerifyDisks(NoHooksLU):
3232 """Verifies the cluster disks status.
3237 def ExpandNames(self):
3238 self.share_locks = _ShareAll()
3239 self.needed_locks = {
3240 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3243 def Exec(self, feedback_fn):
3244 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3246 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3247 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3248 for group in group_names])
3251 class LUGroupVerifyDisks(NoHooksLU):
3252 """Verifies the status of all disks in a node group.
3257 def ExpandNames(self):
3258 # Raises errors.OpPrereqError on its own if group can't be found
3259 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3261 self.share_locks = _ShareAll()
3262 self.needed_locks = {
3263 locking.LEVEL_INSTANCE: [],
3264 locking.LEVEL_NODEGROUP: [],
3265 locking.LEVEL_NODE: [],
3268 def DeclareLocks(self, level):
3269 if level == locking.LEVEL_INSTANCE:
3270 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3272 # Lock instances optimistically, needs verification once node and group
3273 # locks have been acquired
3274 self.needed_locks[locking.LEVEL_INSTANCE] = \
3275 self.cfg.GetNodeGroupInstances(self.group_uuid)
3277 elif level == locking.LEVEL_NODEGROUP:
3278 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3280 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3281 set([self.group_uuid] +
3282 # Lock all groups used by instances optimistically; this requires
3283 # going via the node before it's locked, requiring verification
3286 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3287 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3289 elif level == locking.LEVEL_NODE:
3290 # This will only lock the nodes in the group to be verified which contain
3292 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3293 self._LockInstancesNodes()
3295 # Lock all nodes in group to be verified
3296 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3297 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3298 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3300 def CheckPrereq(self):
3301 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3302 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3303 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3305 assert self.group_uuid in owned_groups
3307 # Check if locked instances are still correct
3308 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3310 # Get instance information
3311 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3313 # Check if node groups for locked instances are still correct
3314 for (instance_name, inst) in self.instances.items():
3315 assert owned_nodes.issuperset(inst.all_nodes), \
3316 "Instance %s's nodes changed while we kept the lock" % instance_name
3318 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3321 assert self.group_uuid in inst_groups, \
3322 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3324 def Exec(self, feedback_fn):
3325 """Verify integrity of cluster disks.
3327 @rtype: tuple of three items
3328 @return: a tuple of (dict of node-to-node_error, list of instances
3329 which need activate-disks, dict of instance: (node, volume) for
3334 res_instances = set()
3337 nv_dict = _MapInstanceDisksToNodes([inst
3338 for inst in self.instances.values()
3339 if inst.admin_state == constants.ADMINST_UP])
3342 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3343 set(self.cfg.GetVmCapableNodeList()))
3345 node_lvs = self.rpc.call_lv_list(nodes, [])
3347 for (node, node_res) in node_lvs.items():
3348 if node_res.offline:
3351 msg = node_res.fail_msg
3353 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3354 res_nodes[node] = msg
3357 for lv_name, (_, _, lv_online) in node_res.payload.items():
3358 inst = nv_dict.pop((node, lv_name), None)
3359 if not (lv_online or inst is None):
3360 res_instances.add(inst)
3362 # any leftover items in nv_dict are missing LVs, let's arrange the data
3364 for key, inst in nv_dict.iteritems():
3365 res_missing.setdefault(inst, []).append(list(key))
3367 return (res_nodes, list(res_instances), res_missing)
3370 class LUClusterRepairDiskSizes(NoHooksLU):
3371 """Verifies the cluster disks sizes.
3376 def ExpandNames(self):
3377 if self.op.instances:
3378 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3379 self.needed_locks = {
3380 locking.LEVEL_NODE_RES: [],
3381 locking.LEVEL_INSTANCE: self.wanted_names,
3383 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3385 self.wanted_names = None
3386 self.needed_locks = {
3387 locking.LEVEL_NODE_RES: locking.ALL_SET,
3388 locking.LEVEL_INSTANCE: locking.ALL_SET,
3390 self.share_locks = {
3391 locking.LEVEL_NODE_RES: 1,
3392 locking.LEVEL_INSTANCE: 0,
3395 def DeclareLocks(self, level):
3396 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3397 self._LockInstancesNodes(primary_only=True, level=level)
3399 def CheckPrereq(self):
3400 """Check prerequisites.
3402 This only checks the optional instance list against the existing names.
3405 if self.wanted_names is None:
3406 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3408 self.wanted_instances = \
3409 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3411 def _EnsureChildSizes(self, disk):
3412 """Ensure children of the disk have the needed disk size.
3414 This is valid mainly for DRBD8 and fixes an issue where the
3415 children have smaller disk size.
3417 @param disk: an L{ganeti.objects.Disk} object
3420 if disk.dev_type == constants.LD_DRBD8:
3421 assert disk.children, "Empty children for DRBD8?"
3422 fchild = disk.children[0]
3423 mismatch = fchild.size < disk.size
3425 self.LogInfo("Child disk has size %d, parent %d, fixing",
3426 fchild.size, disk.size)
3427 fchild.size = disk.size
3429 # and we recurse on this child only, not on the metadev
3430 return self._EnsureChildSizes(fchild) or mismatch
3434 def Exec(self, feedback_fn):
3435 """Verify the size of cluster disks.
3438 # TODO: check child disks too
3439 # TODO: check differences in size between primary/secondary nodes
3441 for instance in self.wanted_instances:
3442 pnode = instance.primary_node
3443 if pnode not in per_node_disks:
3444 per_node_disks[pnode] = []
3445 for idx, disk in enumerate(instance.disks):
3446 per_node_disks[pnode].append((instance, idx, disk))
3448 assert not (frozenset(per_node_disks.keys()) -
3449 self.owned_locks(locking.LEVEL_NODE_RES)), \
3450 "Not owning correct locks"
3451 assert not self.owned_locks(locking.LEVEL_NODE)
3454 for node, dskl in per_node_disks.items():
3455 newl = [v[2].Copy() for v in dskl]
3457 self.cfg.SetDiskID(dsk, node)
3458 result = self.rpc.call_blockdev_getsize(node, newl)
3460 self.LogWarning("Failure in blockdev_getsize call to node"
3461 " %s, ignoring", node)
3463 if len(result.payload) != len(dskl):
3464 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3465 " result.payload=%s", node, len(dskl), result.payload)
3466 self.LogWarning("Invalid result from node %s, ignoring node results",
3469 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3471 self.LogWarning("Disk %d of instance %s did not return size"
3472 " information, ignoring", idx, instance.name)
3474 if not isinstance(size, (int, long)):
3475 self.LogWarning("Disk %d of instance %s did not return valid"
3476 " size information, ignoring", idx, instance.name)
3479 if size != disk.size:
3480 self.LogInfo("Disk %d of instance %s has mismatched size,"
3481 " correcting: recorded %d, actual %d", idx,
3482 instance.name, disk.size, size)
3484 self.cfg.Update(instance, feedback_fn)
3485 changed.append((instance.name, idx, size))
3486 if self._EnsureChildSizes(disk):
3487 self.cfg.Update(instance, feedback_fn)
3488 changed.append((instance.name, idx, disk.size))
3492 class LUClusterRename(LogicalUnit):
3493 """Rename the cluster.
3496 HPATH = "cluster-rename"
3497 HTYPE = constants.HTYPE_CLUSTER
3499 def BuildHooksEnv(self):
3504 "OP_TARGET": self.cfg.GetClusterName(),
3505 "NEW_NAME": self.op.name,
3508 def BuildHooksNodes(self):
3509 """Build hooks nodes.
3512 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3514 def CheckPrereq(self):
3515 """Verify that the passed name is a valid one.
3518 hostname = netutils.GetHostname(name=self.op.name,
3519 family=self.cfg.GetPrimaryIPFamily())
3521 new_name = hostname.name
3522 self.ip = new_ip = hostname.ip
3523 old_name = self.cfg.GetClusterName()
3524 old_ip = self.cfg.GetMasterIP()
3525 if new_name == old_name and new_ip == old_ip:
3526 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3527 " cluster has changed",
3529 if new_ip != old_ip:
3530 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3531 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3532 " reachable on the network" %
3533 new_ip, errors.ECODE_NOTUNIQUE)
3535 self.op.name = new_name
3537 def Exec(self, feedback_fn):
3538 """Rename the cluster.
3541 clustername = self.op.name
3544 # shutdown the master IP
3545 master_params = self.cfg.GetMasterNetworkParameters()
3546 ems = self.cfg.GetUseExternalMipScript()
3547 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3549 result.Raise("Could not disable the master role")
3552 cluster = self.cfg.GetClusterInfo()
3553 cluster.cluster_name = clustername
3554 cluster.master_ip = new_ip
3555 self.cfg.Update(cluster, feedback_fn)
3557 # update the known hosts file
3558 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3559 node_list = self.cfg.GetOnlineNodeList()
3561 node_list.remove(master_params.name)
3564 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3566 master_params.ip = new_ip
3567 result = self.rpc.call_node_activate_master_ip(master_params.name,
3569 msg = result.fail_msg
3571 self.LogWarning("Could not re-enable the master role on"
3572 " the master, please restart manually: %s", msg)
3577 def _ValidateNetmask(cfg, netmask):
3578 """Checks if a netmask is valid.
3580 @type cfg: L{config.ConfigWriter}
3581 @param cfg: The cluster configuration
3583 @param netmask: the netmask to be verified
3584 @raise errors.OpPrereqError: if the validation fails
3587 ip_family = cfg.GetPrimaryIPFamily()
3589 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3590 except errors.ProgrammerError:
3591 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3593 if not ipcls.ValidateNetmask(netmask):
3594 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3598 class LUClusterSetParams(LogicalUnit):
3599 """Change the parameters of the cluster.
3602 HPATH = "cluster-modify"
3603 HTYPE = constants.HTYPE_CLUSTER
3606 def CheckArguments(self):
3610 if self.op.uid_pool:
3611 uidpool.CheckUidPool(self.op.uid_pool)
3613 if self.op.add_uids:
3614 uidpool.CheckUidPool(self.op.add_uids)
3616 if self.op.remove_uids:
3617 uidpool.CheckUidPool(self.op.remove_uids)
3619 if self.op.master_netmask is not None:
3620 _ValidateNetmask(self.cfg, self.op.master_netmask)
3622 if self.op.diskparams:
3623 for dt_params in self.op.diskparams.values():
3624 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3626 def ExpandNames(self):
3627 # FIXME: in the future maybe other cluster params won't require checking on
3628 # all nodes to be modified.
3629 self.needed_locks = {
3630 locking.LEVEL_NODE: locking.ALL_SET,
3632 self.share_locks[locking.LEVEL_NODE] = 1
3634 def BuildHooksEnv(self):
3639 "OP_TARGET": self.cfg.GetClusterName(),
3640 "NEW_VG_NAME": self.op.vg_name,
3643 def BuildHooksNodes(self):
3644 """Build hooks nodes.
3647 mn = self.cfg.GetMasterNode()
3650 def CheckPrereq(self):
3651 """Check prerequisites.
3653 This checks whether the given params don't conflict and
3654 if the given volume group is valid.
3657 if self.op.vg_name is not None and not self.op.vg_name:
3658 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3659 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3660 " instances exist", errors.ECODE_INVAL)
3662 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3663 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3664 raise errors.OpPrereqError("Cannot disable drbd helper while"
3665 " drbd-based instances exist",
3668 node_list = self.owned_locks(locking.LEVEL_NODE)
3670 # if vg_name not None, checks given volume group on all nodes
3672 vglist = self.rpc.call_vg_list(node_list)
3673 for node in node_list:
3674 msg = vglist[node].fail_msg
3676 # ignoring down node
3677 self.LogWarning("Error while gathering data on node %s"
3678 " (ignoring node): %s", node, msg)
3680 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3682 constants.MIN_VG_SIZE)
3684 raise errors.OpPrereqError("Error on node '%s': %s" %
3685 (node, vgstatus), errors.ECODE_ENVIRON)
3687 if self.op.drbd_helper:
3688 # checks given drbd helper on all nodes
3689 helpers = self.rpc.call_drbd_helper(node_list)
3690 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3692 self.LogInfo("Not checking drbd helper on offline node %s", node)
3694 msg = helpers[node].fail_msg
3696 raise errors.OpPrereqError("Error checking drbd helper on node"
3697 " '%s': %s" % (node, msg),
3698 errors.ECODE_ENVIRON)
3699 node_helper = helpers[node].payload
3700 if node_helper != self.op.drbd_helper:
3701 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3702 (node, node_helper), errors.ECODE_ENVIRON)
3704 self.cluster = cluster = self.cfg.GetClusterInfo()
3705 # validate params changes
3706 if self.op.beparams:
3707 objects.UpgradeBeParams(self.op.beparams)
3708 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3709 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3711 if self.op.ndparams:
3712 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3713 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3715 # TODO: we need a more general way to handle resetting
3716 # cluster-level parameters to default values
3717 if self.new_ndparams["oob_program"] == "":
3718 self.new_ndparams["oob_program"] = \
3719 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3721 if self.op.hv_state:
3722 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3723 self.cluster.hv_state_static)
3724 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3725 for hv, values in new_hv_state.items())
3727 if self.op.disk_state:
3728 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3729 self.cluster.disk_state_static)
3730 self.new_disk_state = \
3731 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3732 for name, values in svalues.items()))
3733 for storage, svalues in new_disk_state.items())
3737 for key, value in self.op.ipolicy.items():
3738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3739 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3741 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3742 self.new_ipolicy = ipolicy
3744 if self.op.nicparams:
3745 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3746 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3747 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3750 # check all instances for consistency
3751 for instance in self.cfg.GetAllInstancesInfo().values():
3752 for nic_idx, nic in enumerate(instance.nics):
3753 params_copy = copy.deepcopy(nic.nicparams)
3754 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3756 # check parameter syntax
3758 objects.NIC.CheckParameterSyntax(params_filled)
3759 except errors.ConfigurationError, err:
3760 nic_errors.append("Instance %s, nic/%d: %s" %
3761 (instance.name, nic_idx, err))
3763 # if we're moving instances to routed, check that they have an ip
3764 target_mode = params_filled[constants.NIC_MODE]
3765 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3766 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3767 " address" % (instance.name, nic_idx))
3769 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3770 "\n".join(nic_errors))
3772 # hypervisor list/parameters
3773 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3774 if self.op.hvparams:
3775 for hv_name, hv_dict in self.op.hvparams.items():
3776 if hv_name not in self.new_hvparams:
3777 self.new_hvparams[hv_name] = hv_dict
3779 self.new_hvparams[hv_name].update(hv_dict)
3781 # disk template parameters
3782 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3783 if self.op.diskparams:
3784 for dt_name, dt_params in self.op.diskparams.items():
3785 if dt_name not in self.op.diskparams:
3786 self.new_diskparams[dt_name] = dt_params
3788 self.new_diskparams[dt_name].update(dt_params)
3790 # os hypervisor parameters
3791 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3793 for os_name, hvs in self.op.os_hvp.items():
3794 if os_name not in self.new_os_hvp:
3795 self.new_os_hvp[os_name] = hvs
3797 for hv_name, hv_dict in hvs.items():
3798 if hv_name not in self.new_os_hvp[os_name]:
3799 self.new_os_hvp[os_name][hv_name] = hv_dict
3801 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3804 self.new_osp = objects.FillDict(cluster.osparams, {})
3805 if self.op.osparams:
3806 for os_name, osp in self.op.osparams.items():
3807 if os_name not in self.new_osp:
3808 self.new_osp[os_name] = {}
3810 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3813 if not self.new_osp[os_name]:
3814 # we removed all parameters
3815 del self.new_osp[os_name]
3817 # check the parameter validity (remote check)
3818 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3819 os_name, self.new_osp[os_name])
3821 # changes to the hypervisor list
3822 if self.op.enabled_hypervisors is not None:
3823 self.hv_list = self.op.enabled_hypervisors
3824 for hv in self.hv_list:
3825 # if the hypervisor doesn't already exist in the cluster
3826 # hvparams, we initialize it to empty, and then (in both
3827 # cases) we make sure to fill the defaults, as we might not
3828 # have a complete defaults list if the hypervisor wasn't
3830 if hv not in new_hvp:
3832 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3833 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3835 self.hv_list = cluster.enabled_hypervisors
3837 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3838 # either the enabled list has changed, or the parameters have, validate
3839 for hv_name, hv_params in self.new_hvparams.items():
3840 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3841 (self.op.enabled_hypervisors and
3842 hv_name in self.op.enabled_hypervisors)):
3843 # either this is a new hypervisor, or its parameters have changed
3844 hv_class = hypervisor.GetHypervisor(hv_name)
3845 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3846 hv_class.CheckParameterSyntax(hv_params)
3847 _CheckHVParams(self, node_list, hv_name, hv_params)
3850 # no need to check any newly-enabled hypervisors, since the
3851 # defaults have already been checked in the above code-block
3852 for os_name, os_hvp in self.new_os_hvp.items():
3853 for hv_name, hv_params in os_hvp.items():
3854 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3855 # we need to fill in the new os_hvp on top of the actual hv_p
3856 cluster_defaults = self.new_hvparams.get(hv_name, {})
3857 new_osp = objects.FillDict(cluster_defaults, hv_params)
3858 hv_class = hypervisor.GetHypervisor(hv_name)
3859 hv_class.CheckParameterSyntax(new_osp)
3860 _CheckHVParams(self, node_list, hv_name, new_osp)
3862 if self.op.default_iallocator:
3863 alloc_script = utils.FindFile(self.op.default_iallocator,
3864 constants.IALLOCATOR_SEARCH_PATH,
3866 if alloc_script is None:
3867 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3868 " specified" % self.op.default_iallocator,
3871 def Exec(self, feedback_fn):
3872 """Change the parameters of the cluster.
3875 if self.op.vg_name is not None:
3876 new_volume = self.op.vg_name
3879 if new_volume != self.cfg.GetVGName():
3880 self.cfg.SetVGName(new_volume)
3882 feedback_fn("Cluster LVM configuration already in desired"
3883 " state, not changing")
3884 if self.op.drbd_helper is not None:
3885 new_helper = self.op.drbd_helper
3888 if new_helper != self.cfg.GetDRBDHelper():
3889 self.cfg.SetDRBDHelper(new_helper)
3891 feedback_fn("Cluster DRBD helper already in desired state,"
3893 if self.op.hvparams:
3894 self.cluster.hvparams = self.new_hvparams
3896 self.cluster.os_hvp = self.new_os_hvp
3897 if self.op.enabled_hypervisors is not None:
3898 self.cluster.hvparams = self.new_hvparams
3899 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3900 if self.op.beparams:
3901 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3902 if self.op.nicparams:
3903 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3905 self.cluster.ipolicy = self.new_ipolicy
3906 if self.op.osparams:
3907 self.cluster.osparams = self.new_osp
3908 if self.op.ndparams:
3909 self.cluster.ndparams = self.new_ndparams
3910 if self.op.diskparams:
3911 self.cluster.diskparams = self.new_diskparams
3912 if self.op.hv_state:
3913 self.cluster.hv_state_static = self.new_hv_state
3914 if self.op.disk_state:
3915 self.cluster.disk_state_static = self.new_disk_state
3917 if self.op.candidate_pool_size is not None:
3918 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3919 # we need to update the pool size here, otherwise the save will fail
3920 _AdjustCandidatePool(self, [])
3922 if self.op.maintain_node_health is not None:
3923 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3924 feedback_fn("Note: CONFD was disabled at build time, node health"
3925 " maintenance is not useful (still enabling it)")
3926 self.cluster.maintain_node_health = self.op.maintain_node_health
3928 if self.op.prealloc_wipe_disks is not None:
3929 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3931 if self.op.add_uids is not None:
3932 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3934 if self.op.remove_uids is not None:
3935 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3937 if self.op.uid_pool is not None:
3938 self.cluster.uid_pool = self.op.uid_pool
3940 if self.op.default_iallocator is not None:
3941 self.cluster.default_iallocator = self.op.default_iallocator
3943 if self.op.reserved_lvs is not None:
3944 self.cluster.reserved_lvs = self.op.reserved_lvs
3946 if self.op.use_external_mip_script is not None:
3947 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3949 def helper_os(aname, mods, desc):
3951 lst = getattr(self.cluster, aname)
3952 for key, val in mods:
3953 if key == constants.DDM_ADD:
3955 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3958 elif key == constants.DDM_REMOVE:
3962 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3964 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3966 if self.op.hidden_os:
3967 helper_os("hidden_os", self.op.hidden_os, "hidden")
3969 if self.op.blacklisted_os:
3970 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3972 if self.op.master_netdev:
3973 master_params = self.cfg.GetMasterNetworkParameters()
3974 ems = self.cfg.GetUseExternalMipScript()
3975 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3976 self.cluster.master_netdev)
3977 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3979 result.Raise("Could not disable the master ip")
3980 feedback_fn("Changing master_netdev from %s to %s" %
3981 (master_params.netdev, self.op.master_netdev))
3982 self.cluster.master_netdev = self.op.master_netdev
3984 if self.op.master_netmask:
3985 master_params = self.cfg.GetMasterNetworkParameters()
3986 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3987 result = self.rpc.call_node_change_master_netmask(master_params.name,
3988 master_params.netmask,
3989 self.op.master_netmask,
3991 master_params.netdev)
3993 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3996 self.cluster.master_netmask = self.op.master_netmask
3998 self.cfg.Update(self.cluster, feedback_fn)
4000 if self.op.master_netdev:
4001 master_params = self.cfg.GetMasterNetworkParameters()
4002 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4003 self.op.master_netdev)
4004 ems = self.cfg.GetUseExternalMipScript()
4005 result = self.rpc.call_node_activate_master_ip(master_params.name,
4008 self.LogWarning("Could not re-enable the master ip on"
4009 " the master, please restart manually: %s",
4013 def _UploadHelper(lu, nodes, fname):
4014 """Helper for uploading a file and showing warnings.
4017 if os.path.exists(fname):
4018 result = lu.rpc.call_upload_file(nodes, fname)
4019 for to_node, to_result in result.items():
4020 msg = to_result.fail_msg
4022 msg = ("Copy of file %s to node %s failed: %s" %
4023 (fname, to_node, msg))
4024 lu.proc.LogWarning(msg)
4027 def _ComputeAncillaryFiles(cluster, redist):
4028 """Compute files external to Ganeti which need to be consistent.
4030 @type redist: boolean
4031 @param redist: Whether to include files which need to be redistributed
4034 # Compute files for all nodes
4036 constants.SSH_KNOWN_HOSTS_FILE,
4037 constants.CONFD_HMAC_KEY,
4038 constants.CLUSTER_DOMAIN_SECRET_FILE,
4039 constants.SPICE_CERT_FILE,
4040 constants.SPICE_CACERT_FILE,
4041 constants.RAPI_USERS_FILE,
4045 files_all.update(constants.ALL_CERT_FILES)
4046 files_all.update(ssconf.SimpleStore().GetFileList())
4048 # we need to ship at least the RAPI certificate
4049 files_all.add(constants.RAPI_CERT_FILE)
4051 if cluster.modify_etc_hosts:
4052 files_all.add(constants.ETC_HOSTS)
4054 # Files which are optional, these must:
4055 # - be present in one other category as well
4056 # - either exist or not exist on all nodes of that category (mc, vm all)
4058 constants.RAPI_USERS_FILE,
4061 # Files which should only be on master candidates
4065 files_mc.add(constants.CLUSTER_CONF_FILE)
4067 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4069 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4071 # Files which should only be on VM-capable nodes
4072 files_vm = set(filename
4073 for hv_name in cluster.enabled_hypervisors
4074 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4076 files_opt |= set(filename
4077 for hv_name in cluster.enabled_hypervisors
4078 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4080 # Filenames in each category must be unique
4081 all_files_set = files_all | files_mc | files_vm
4082 assert (len(all_files_set) ==
4083 sum(map(len, [files_all, files_mc, files_vm]))), \
4084 "Found file listed in more than one file list"
4086 # Optional files must be present in one other category
4087 assert all_files_set.issuperset(files_opt), \
4088 "Optional file not in a different required list"
4090 return (files_all, files_opt, files_mc, files_vm)
4093 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4094 """Distribute additional files which are part of the cluster configuration.
4096 ConfigWriter takes care of distributing the config and ssconf files, but
4097 there are more files which should be distributed to all nodes. This function
4098 makes sure those are copied.
4100 @param lu: calling logical unit
4101 @param additional_nodes: list of nodes not in the config to distribute to
4102 @type additional_vm: boolean
4103 @param additional_vm: whether the additional nodes are vm-capable or not
4106 # Gather target nodes
4107 cluster = lu.cfg.GetClusterInfo()
4108 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4110 online_nodes = lu.cfg.GetOnlineNodeList()
4111 vm_nodes = lu.cfg.GetVmCapableNodeList()
4113 if additional_nodes is not None:
4114 online_nodes.extend(additional_nodes)
4116 vm_nodes.extend(additional_nodes)
4118 # Never distribute to master node
4119 for nodelist in [online_nodes, vm_nodes]:
4120 if master_info.name in nodelist:
4121 nodelist.remove(master_info.name)
4124 (files_all, _, files_mc, files_vm) = \
4125 _ComputeAncillaryFiles(cluster, True)
4127 # Never re-distribute configuration file from here
4128 assert not (constants.CLUSTER_CONF_FILE in files_all or
4129 constants.CLUSTER_CONF_FILE in files_vm)
4130 assert not files_mc, "Master candidates not handled in this function"
4133 (online_nodes, files_all),
4134 (vm_nodes, files_vm),
4138 for (node_list, files) in filemap:
4140 _UploadHelper(lu, node_list, fname)
4143 class LUClusterRedistConf(NoHooksLU):
4144 """Force the redistribution of cluster configuration.
4146 This is a very simple LU.
4151 def ExpandNames(self):
4152 self.needed_locks = {
4153 locking.LEVEL_NODE: locking.ALL_SET,
4155 self.share_locks[locking.LEVEL_NODE] = 1
4157 def Exec(self, feedback_fn):
4158 """Redistribute the configuration.
4161 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4162 _RedistributeAncillaryFiles(self)
4165 class LUClusterActivateMasterIp(NoHooksLU):
4166 """Activate the master IP on the master node.
4169 def Exec(self, feedback_fn):
4170 """Activate the master IP.
4173 master_params = self.cfg.GetMasterNetworkParameters()
4174 ems = self.cfg.GetUseExternalMipScript()
4175 result = self.rpc.call_node_activate_master_ip(master_params.name,
4177 result.Raise("Could not activate the master IP")
4180 class LUClusterDeactivateMasterIp(NoHooksLU):
4181 """Deactivate the master IP on the master node.
4184 def Exec(self, feedback_fn):
4185 """Deactivate the master IP.
4188 master_params = self.cfg.GetMasterNetworkParameters()
4189 ems = self.cfg.GetUseExternalMipScript()
4190 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4192 result.Raise("Could not deactivate the master IP")
4195 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4196 """Sleep and poll for an instance's disk to sync.
4199 if not instance.disks or disks is not None and not disks:
4202 disks = _ExpandCheckDisks(instance, disks)
4205 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4207 node = instance.primary_node
4210 lu.cfg.SetDiskID(dev, node)
4212 # TODO: Convert to utils.Retry
4215 degr_retries = 10 # in seconds, as we sleep 1 second each time
4219 cumul_degraded = False
4220 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4221 msg = rstats.fail_msg
4223 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4226 raise errors.RemoteError("Can't contact node %s for mirror data,"
4227 " aborting." % node)
4230 rstats = rstats.payload
4232 for i, mstat in enumerate(rstats):
4234 lu.LogWarning("Can't compute data for node %s/%s",
4235 node, disks[i].iv_name)
4238 cumul_degraded = (cumul_degraded or
4239 (mstat.is_degraded and mstat.sync_percent is None))
4240 if mstat.sync_percent is not None:
4242 if mstat.estimated_time is not None:
4243 rem_time = ("%s remaining (estimated)" %
4244 utils.FormatSeconds(mstat.estimated_time))
4245 max_time = mstat.estimated_time
4247 rem_time = "no time estimate"
4248 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4249 (disks[i].iv_name, mstat.sync_percent, rem_time))
4251 # if we're done but degraded, let's do a few small retries, to
4252 # make sure we see a stable and not transient situation; therefore
4253 # we force restart of the loop
4254 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4255 logging.info("Degraded disks found, %d retries left", degr_retries)
4263 time.sleep(min(60, max_time))
4266 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4267 return not cumul_degraded
4270 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4271 """Check that mirrors are not degraded.
4273 The ldisk parameter, if True, will change the test from the
4274 is_degraded attribute (which represents overall non-ok status for
4275 the device(s)) to the ldisk (representing the local storage status).
4278 lu.cfg.SetDiskID(dev, node)
4282 if on_primary or dev.AssembleOnSecondary():
4283 rstats = lu.rpc.call_blockdev_find(node, dev)
4284 msg = rstats.fail_msg
4286 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4288 elif not rstats.payload:
4289 lu.LogWarning("Can't find disk on node %s", node)
4293 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4295 result = result and not rstats.payload.is_degraded
4298 for child in dev.children:
4299 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4304 class LUOobCommand(NoHooksLU):
4305 """Logical unit for OOB handling.
4309 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4311 def ExpandNames(self):
4312 """Gather locks we need.
4315 if self.op.node_names:
4316 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4317 lock_names = self.op.node_names
4319 lock_names = locking.ALL_SET
4321 self.needed_locks = {
4322 locking.LEVEL_NODE: lock_names,
4325 def CheckPrereq(self):
4326 """Check prerequisites.
4329 - the node exists in the configuration
4332 Any errors are signaled by raising errors.OpPrereqError.
4336 self.master_node = self.cfg.GetMasterNode()
4338 assert self.op.power_delay >= 0.0
4340 if self.op.node_names:
4341 if (self.op.command in self._SKIP_MASTER and
4342 self.master_node in self.op.node_names):
4343 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4344 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4346 if master_oob_handler:
4347 additional_text = ("run '%s %s %s' if you want to operate on the"
4348 " master regardless") % (master_oob_handler,
4352 additional_text = "it does not support out-of-band operations"
4354 raise errors.OpPrereqError(("Operating on the master node %s is not"
4355 " allowed for %s; %s") %
4356 (self.master_node, self.op.command,
4357 additional_text), errors.ECODE_INVAL)
4359 self.op.node_names = self.cfg.GetNodeList()
4360 if self.op.command in self._SKIP_MASTER:
4361 self.op.node_names.remove(self.master_node)
4363 if self.op.command in self._SKIP_MASTER:
4364 assert self.master_node not in self.op.node_names
4366 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4368 raise errors.OpPrereqError("Node %s not found" % node_name,
4371 self.nodes.append(node)
4373 if (not self.op.ignore_status and
4374 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4375 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4376 " not marked offline") % node_name,
4379 def Exec(self, feedback_fn):
4380 """Execute OOB and return result if we expect any.
4383 master_node = self.master_node
4386 for idx, node in enumerate(utils.NiceSort(self.nodes,
4387 key=lambda node: node.name)):
4388 node_entry = [(constants.RS_NORMAL, node.name)]
4389 ret.append(node_entry)
4391 oob_program = _SupportsOob(self.cfg, node)
4394 node_entry.append((constants.RS_UNAVAIL, None))
4397 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4398 self.op.command, oob_program, node.name)
4399 result = self.rpc.call_run_oob(master_node, oob_program,
4400 self.op.command, node.name,
4404 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4405 node.name, result.fail_msg)
4406 node_entry.append((constants.RS_NODATA, None))
4409 self._CheckPayload(result)
4410 except errors.OpExecError, err:
4411 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4413 node_entry.append((constants.RS_NODATA, None))
4415 if self.op.command == constants.OOB_HEALTH:
4416 # For health we should log important events
4417 for item, status in result.payload:
4418 if status in [constants.OOB_STATUS_WARNING,
4419 constants.OOB_STATUS_CRITICAL]:
4420 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4421 item, node.name, status)
4423 if self.op.command == constants.OOB_POWER_ON:
4425 elif self.op.command == constants.OOB_POWER_OFF:
4426 node.powered = False
4427 elif self.op.command == constants.OOB_POWER_STATUS:
4428 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4429 if powered != node.powered:
4430 logging.warning(("Recorded power state (%s) of node '%s' does not"
4431 " match actual power state (%s)"), node.powered,
4434 # For configuration changing commands we should update the node
4435 if self.op.command in (constants.OOB_POWER_ON,
4436 constants.OOB_POWER_OFF):
4437 self.cfg.Update(node, feedback_fn)
4439 node_entry.append((constants.RS_NORMAL, result.payload))
4441 if (self.op.command == constants.OOB_POWER_ON and
4442 idx < len(self.nodes) - 1):
4443 time.sleep(self.op.power_delay)
4447 def _CheckPayload(self, result):
4448 """Checks if the payload is valid.
4450 @param result: RPC result
4451 @raises errors.OpExecError: If payload is not valid
4455 if self.op.command == constants.OOB_HEALTH:
4456 if not isinstance(result.payload, list):
4457 errs.append("command 'health' is expected to return a list but got %s" %
4458 type(result.payload))
4460 for item, status in result.payload:
4461 if status not in constants.OOB_STATUSES:
4462 errs.append("health item '%s' has invalid status '%s'" %
4465 if self.op.command == constants.OOB_POWER_STATUS:
4466 if not isinstance(result.payload, dict):
4467 errs.append("power-status is expected to return a dict but got %s" %
4468 type(result.payload))
4470 if self.op.command in [
4471 constants.OOB_POWER_ON,
4472 constants.OOB_POWER_OFF,
4473 constants.OOB_POWER_CYCLE,
4475 if result.payload is not None:
4476 errs.append("%s is expected to not return payload but got '%s'" %
4477 (self.op.command, result.payload))
4480 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4481 utils.CommaJoin(errs))
4484 class _OsQuery(_QueryBase):
4485 FIELDS = query.OS_FIELDS
4487 def ExpandNames(self, lu):
4488 # Lock all nodes in shared mode
4489 # Temporary removal of locks, should be reverted later
4490 # TODO: reintroduce locks when they are lighter-weight
4491 lu.needed_locks = {}
4492 #self.share_locks[locking.LEVEL_NODE] = 1
4493 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4495 # The following variables interact with _QueryBase._GetNames
4497 self.wanted = self.names
4499 self.wanted = locking.ALL_SET
4501 self.do_locking = self.use_locking
4503 def DeclareLocks(self, lu, level):
4507 def _DiagnoseByOS(rlist):
4508 """Remaps a per-node return list into an a per-os per-node dictionary
4510 @param rlist: a map with node names as keys and OS objects as values
4513 @return: a dictionary with osnames as keys and as value another
4514 map, with nodes as keys and tuples of (path, status, diagnose,
4515 variants, parameters, api_versions) as values, eg::
4517 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4518 (/srv/..., False, "invalid api")],
4519 "node2": [(/srv/..., True, "", [], [])]}
4524 # we build here the list of nodes that didn't fail the RPC (at RPC
4525 # level), so that nodes with a non-responding node daemon don't
4526 # make all OSes invalid
4527 good_nodes = [node_name for node_name in rlist
4528 if not rlist[node_name].fail_msg]
4529 for node_name, nr in rlist.items():
4530 if nr.fail_msg or not nr.payload:
4532 for (name, path, status, diagnose, variants,
4533 params, api_versions) in nr.payload:
4534 if name not in all_os:
4535 # build a list of nodes for this os containing empty lists
4536 # for each node in node_list
4538 for nname in good_nodes:
4539 all_os[name][nname] = []
4540 # convert params from [name, help] to (name, help)
4541 params = [tuple(v) for v in params]
4542 all_os[name][node_name].append((path, status, diagnose,
4543 variants, params, api_versions))
4546 def _GetQueryData(self, lu):
4547 """Computes the list of nodes and their attributes.
4550 # Locking is not used
4551 assert not (compat.any(lu.glm.is_owned(level)
4552 for level in locking.LEVELS
4553 if level != locking.LEVEL_CLUSTER) or
4554 self.do_locking or self.use_locking)
4556 valid_nodes = [node.name
4557 for node in lu.cfg.GetAllNodesInfo().values()
4558 if not node.offline and node.vm_capable]
4559 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4560 cluster = lu.cfg.GetClusterInfo()
4564 for (os_name, os_data) in pol.items():
4565 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4566 hidden=(os_name in cluster.hidden_os),
4567 blacklisted=(os_name in cluster.blacklisted_os))
4571 api_versions = set()
4573 for idx, osl in enumerate(os_data.values()):
4574 info.valid = bool(info.valid and osl and osl[0][1])
4578 (node_variants, node_params, node_api) = osl[0][3:6]
4581 variants.update(node_variants)
4582 parameters.update(node_params)
4583 api_versions.update(node_api)
4585 # Filter out inconsistent values
4586 variants.intersection_update(node_variants)
4587 parameters.intersection_update(node_params)
4588 api_versions.intersection_update(node_api)
4590 info.variants = list(variants)
4591 info.parameters = list(parameters)
4592 info.api_versions = list(api_versions)
4594 data[os_name] = info
4596 # Prepare data in requested order
4597 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4601 class LUOsDiagnose(NoHooksLU):
4602 """Logical unit for OS diagnose/query.
4608 def _BuildFilter(fields, names):
4609 """Builds a filter for querying OSes.
4612 name_filter = qlang.MakeSimpleFilter("name", names)
4614 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4615 # respective field is not requested
4616 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4617 for fname in ["hidden", "blacklisted"]
4618 if fname not in fields]
4619 if "valid" not in fields:
4620 status_filter.append([qlang.OP_TRUE, "valid"])
4623 status_filter.insert(0, qlang.OP_AND)
4625 status_filter = None
4627 if name_filter and status_filter:
4628 return [qlang.OP_AND, name_filter, status_filter]
4632 return status_filter
4634 def CheckArguments(self):
4635 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4636 self.op.output_fields, False)
4638 def ExpandNames(self):
4639 self.oq.ExpandNames(self)
4641 def Exec(self, feedback_fn):
4642 return self.oq.OldStyleQuery(self)
4645 class LUNodeRemove(LogicalUnit):
4646 """Logical unit for removing a node.
4649 HPATH = "node-remove"
4650 HTYPE = constants.HTYPE_NODE
4652 def BuildHooksEnv(self):
4655 This doesn't run on the target node in the pre phase as a failed
4656 node would then be impossible to remove.
4660 "OP_TARGET": self.op.node_name,
4661 "NODE_NAME": self.op.node_name,
4664 def BuildHooksNodes(self):
4665 """Build hooks nodes.
4668 all_nodes = self.cfg.GetNodeList()
4670 all_nodes.remove(self.op.node_name)
4672 logging.warning("Node '%s', which is about to be removed, was not found"
4673 " in the list of all nodes", self.op.node_name)
4674 return (all_nodes, all_nodes)
4676 def CheckPrereq(self):
4677 """Check prerequisites.
4680 - the node exists in the configuration
4681 - it does not have primary or secondary instances
4682 - it's not the master
4684 Any errors are signaled by raising errors.OpPrereqError.
4687 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4688 node = self.cfg.GetNodeInfo(self.op.node_name)
4689 assert node is not None
4691 masternode = self.cfg.GetMasterNode()
4692 if node.name == masternode:
4693 raise errors.OpPrereqError("Node is the master node, failover to another"
4694 " node is required", errors.ECODE_INVAL)
4696 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4697 if node.name in instance.all_nodes:
4698 raise errors.OpPrereqError("Instance %s is still running on the node,"
4699 " please remove first" % instance_name,
4701 self.op.node_name = node.name
4704 def Exec(self, feedback_fn):
4705 """Removes the node from the cluster.
4709 logging.info("Stopping the node daemon and removing configs from node %s",
4712 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4714 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4717 # Promote nodes to master candidate as needed
4718 _AdjustCandidatePool(self, exceptions=[node.name])
4719 self.context.RemoveNode(node.name)
4721 # Run post hooks on the node before it's removed
4722 _RunPostHook(self, node.name)
4724 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4725 msg = result.fail_msg
4727 self.LogWarning("Errors encountered on the remote node while leaving"
4728 " the cluster: %s", msg)
4730 # Remove node from our /etc/hosts
4731 if self.cfg.GetClusterInfo().modify_etc_hosts:
4732 master_node = self.cfg.GetMasterNode()
4733 result = self.rpc.call_etc_hosts_modify(master_node,
4734 constants.ETC_HOSTS_REMOVE,
4736 result.Raise("Can't update hosts file with new host data")
4737 _RedistributeAncillaryFiles(self)
4740 class _NodeQuery(_QueryBase):
4741 FIELDS = query.NODE_FIELDS
4743 def ExpandNames(self, lu):
4744 lu.needed_locks = {}
4745 lu.share_locks = _ShareAll()
4748 self.wanted = _GetWantedNodes(lu, self.names)
4750 self.wanted = locking.ALL_SET
4752 self.do_locking = (self.use_locking and
4753 query.NQ_LIVE in self.requested_data)
4756 # If any non-static field is requested we need to lock the nodes
4757 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4759 def DeclareLocks(self, lu, level):
4762 def _GetQueryData(self, lu):
4763 """Computes the list of nodes and their attributes.
4766 all_info = lu.cfg.GetAllNodesInfo()
4768 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4770 # Gather data as requested
4771 if query.NQ_LIVE in self.requested_data:
4772 # filter out non-vm_capable nodes
4773 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4775 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4776 [lu.cfg.GetHypervisorType()])
4777 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4778 for (name, nresult) in node_data.items()
4779 if not nresult.fail_msg and nresult.payload)
4783 if query.NQ_INST in self.requested_data:
4784 node_to_primary = dict([(name, set()) for name in nodenames])
4785 node_to_secondary = dict([(name, set()) for name in nodenames])
4787 inst_data = lu.cfg.GetAllInstancesInfo()
4789 for inst in inst_data.values():
4790 if inst.primary_node in node_to_primary:
4791 node_to_primary[inst.primary_node].add(inst.name)
4792 for secnode in inst.secondary_nodes:
4793 if secnode in node_to_secondary:
4794 node_to_secondary[secnode].add(inst.name)
4796 node_to_primary = None
4797 node_to_secondary = None
4799 if query.NQ_OOB in self.requested_data:
4800 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4801 for name, node in all_info.iteritems())
4805 if query.NQ_GROUP in self.requested_data:
4806 groups = lu.cfg.GetAllNodeGroupsInfo()
4810 return query.NodeQueryData([all_info[name] for name in nodenames],
4811 live_data, lu.cfg.GetMasterNode(),
4812 node_to_primary, node_to_secondary, groups,
4813 oob_support, lu.cfg.GetClusterInfo())
4816 class LUNodeQuery(NoHooksLU):
4817 """Logical unit for querying nodes.
4820 # pylint: disable=W0142
4823 def CheckArguments(self):
4824 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4825 self.op.output_fields, self.op.use_locking)
4827 def ExpandNames(self):
4828 self.nq.ExpandNames(self)
4830 def DeclareLocks(self, level):
4831 self.nq.DeclareLocks(self, level)
4833 def Exec(self, feedback_fn):
4834 return self.nq.OldStyleQuery(self)
4837 class LUNodeQueryvols(NoHooksLU):
4838 """Logical unit for getting volumes on node(s).
4842 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4843 _FIELDS_STATIC = utils.FieldSet("node")
4845 def CheckArguments(self):
4846 _CheckOutputFields(static=self._FIELDS_STATIC,
4847 dynamic=self._FIELDS_DYNAMIC,
4848 selected=self.op.output_fields)
4850 def ExpandNames(self):
4851 self.share_locks = _ShareAll()
4852 self.needed_locks = {}
4854 if not self.op.nodes:
4855 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4857 self.needed_locks[locking.LEVEL_NODE] = \
4858 _GetWantedNodes(self, self.op.nodes)
4860 def Exec(self, feedback_fn):
4861 """Computes the list of nodes and their attributes.
4864 nodenames = self.owned_locks(locking.LEVEL_NODE)
4865 volumes = self.rpc.call_node_volumes(nodenames)
4867 ilist = self.cfg.GetAllInstancesInfo()
4868 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4871 for node in nodenames:
4872 nresult = volumes[node]
4875 msg = nresult.fail_msg
4877 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4880 node_vols = sorted(nresult.payload,
4881 key=operator.itemgetter("dev"))
4883 for vol in node_vols:
4885 for field in self.op.output_fields:
4888 elif field == "phys":
4892 elif field == "name":
4894 elif field == "size":
4895 val = int(float(vol["size"]))
4896 elif field == "instance":
4897 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4899 raise errors.ParameterError(field)
4900 node_output.append(str(val))
4902 output.append(node_output)
4907 class LUNodeQueryStorage(NoHooksLU):
4908 """Logical unit for getting information on storage units on node(s).
4911 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4914 def CheckArguments(self):
4915 _CheckOutputFields(static=self._FIELDS_STATIC,
4916 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4917 selected=self.op.output_fields)
4919 def ExpandNames(self):
4920 self.share_locks = _ShareAll()
4921 self.needed_locks = {}
4924 self.needed_locks[locking.LEVEL_NODE] = \
4925 _GetWantedNodes(self, self.op.nodes)
4927 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4929 def Exec(self, feedback_fn):
4930 """Computes the list of nodes and their attributes.
4933 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4935 # Always get name to sort by
4936 if constants.SF_NAME in self.op.output_fields:
4937 fields = self.op.output_fields[:]
4939 fields = [constants.SF_NAME] + self.op.output_fields
4941 # Never ask for node or type as it's only known to the LU
4942 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4943 while extra in fields:
4944 fields.remove(extra)
4946 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4947 name_idx = field_idx[constants.SF_NAME]
4949 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4950 data = self.rpc.call_storage_list(self.nodes,
4951 self.op.storage_type, st_args,
4952 self.op.name, fields)
4956 for node in utils.NiceSort(self.nodes):
4957 nresult = data[node]
4961 msg = nresult.fail_msg
4963 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4966 rows = dict([(row[name_idx], row) for row in nresult.payload])
4968 for name in utils.NiceSort(rows.keys()):
4973 for field in self.op.output_fields:
4974 if field == constants.SF_NODE:
4976 elif field == constants.SF_TYPE:
4977 val = self.op.storage_type
4978 elif field in field_idx:
4979 val = row[field_idx[field]]
4981 raise errors.ParameterError(field)
4990 class _InstanceQuery(_QueryBase):
4991 FIELDS = query.INSTANCE_FIELDS
4993 def ExpandNames(self, lu):
4994 lu.needed_locks = {}
4995 lu.share_locks = _ShareAll()
4998 self.wanted = _GetWantedInstances(lu, self.names)
5000 self.wanted = locking.ALL_SET
5002 self.do_locking = (self.use_locking and
5003 query.IQ_LIVE in self.requested_data)
5005 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5007 lu.needed_locks[locking.LEVEL_NODE] = []
5008 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5010 self.do_grouplocks = (self.do_locking and
5011 query.IQ_NODES in self.requested_data)
5013 def DeclareLocks(self, lu, level):
5015 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5016 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5018 # Lock all groups used by instances optimistically; this requires going
5019 # via the node before it's locked, requiring verification later on
5020 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5022 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5023 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5024 elif level == locking.LEVEL_NODE:
5025 lu._LockInstancesNodes() # pylint: disable=W0212
5028 def _CheckGroupLocks(lu):
5029 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5030 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5032 # Check if node groups for locked instances are still correct
5033 for instance_name in owned_instances:
5034 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5036 def _GetQueryData(self, lu):
5037 """Computes the list of instances and their attributes.
5040 if self.do_grouplocks:
5041 self._CheckGroupLocks(lu)
5043 cluster = lu.cfg.GetClusterInfo()
5044 all_info = lu.cfg.GetAllInstancesInfo()
5046 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5048 instance_list = [all_info[name] for name in instance_names]
5049 nodes = frozenset(itertools.chain(*(inst.all_nodes
5050 for inst in instance_list)))
5051 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5054 wrongnode_inst = set()
5056 # Gather data as requested
5057 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5059 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5061 result = node_data[name]
5063 # offline nodes will be in both lists
5064 assert result.fail_msg
5065 offline_nodes.append(name)
5067 bad_nodes.append(name)
5068 elif result.payload:
5069 for inst in result.payload:
5070 if inst in all_info:
5071 if all_info[inst].primary_node == name:
5072 live_data.update(result.payload)
5074 wrongnode_inst.add(inst)
5076 # orphan instance; we don't list it here as we don't
5077 # handle this case yet in the output of instance listing
5078 logging.warning("Orphan instance '%s' found on node %s",
5080 # else no instance is alive
5084 if query.IQ_DISKUSAGE in self.requested_data:
5085 disk_usage = dict((inst.name,
5086 _ComputeDiskSize(inst.disk_template,
5087 [{constants.IDISK_SIZE: disk.size}
5088 for disk in inst.disks]))
5089 for inst in instance_list)
5093 if query.IQ_CONSOLE in self.requested_data:
5095 for inst in instance_list:
5096 if inst.name in live_data:
5097 # Instance is running
5098 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5100 consinfo[inst.name] = None
5101 assert set(consinfo.keys()) == set(instance_names)
5105 if query.IQ_NODES in self.requested_data:
5106 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5108 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5109 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5110 for uuid in set(map(operator.attrgetter("group"),
5116 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5117 disk_usage, offline_nodes, bad_nodes,
5118 live_data, wrongnode_inst, consinfo,
5122 class LUQuery(NoHooksLU):
5123 """Query for resources/items of a certain kind.
5126 # pylint: disable=W0142
5129 def CheckArguments(self):
5130 qcls = _GetQueryImplementation(self.op.what)
5132 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5134 def ExpandNames(self):
5135 self.impl.ExpandNames(self)
5137 def DeclareLocks(self, level):
5138 self.impl.DeclareLocks(self, level)
5140 def Exec(self, feedback_fn):
5141 return self.impl.NewStyleQuery(self)
5144 class LUQueryFields(NoHooksLU):
5145 """Query for resources/items of a certain kind.
5148 # pylint: disable=W0142
5151 def CheckArguments(self):
5152 self.qcls = _GetQueryImplementation(self.op.what)
5154 def ExpandNames(self):
5155 self.needed_locks = {}
5157 def Exec(self, feedback_fn):
5158 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5161 class LUNodeModifyStorage(NoHooksLU):
5162 """Logical unit for modifying a storage volume on a node.
5167 def CheckArguments(self):
5168 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5170 storage_type = self.op.storage_type
5173 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5175 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5176 " modified" % storage_type,
5179 diff = set(self.op.changes.keys()) - modifiable
5181 raise errors.OpPrereqError("The following fields can not be modified for"
5182 " storage units of type '%s': %r" %
5183 (storage_type, list(diff)),
5186 def ExpandNames(self):
5187 self.needed_locks = {
5188 locking.LEVEL_NODE: self.op.node_name,
5191 def Exec(self, feedback_fn):
5192 """Computes the list of nodes and their attributes.
5195 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5196 result = self.rpc.call_storage_modify(self.op.node_name,
5197 self.op.storage_type, st_args,
5198 self.op.name, self.op.changes)
5199 result.Raise("Failed to modify storage unit '%s' on %s" %
5200 (self.op.name, self.op.node_name))
5203 class LUNodeAdd(LogicalUnit):
5204 """Logical unit for adding node to the cluster.
5208 HTYPE = constants.HTYPE_NODE
5209 _NFLAGS = ["master_capable", "vm_capable"]
5211 def CheckArguments(self):
5212 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5213 # validate/normalize the node name
5214 self.hostname = netutils.GetHostname(name=self.op.node_name,
5215 family=self.primary_ip_family)
5216 self.op.node_name = self.hostname.name
5218 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5219 raise errors.OpPrereqError("Cannot readd the master node",
5222 if self.op.readd and self.op.group:
5223 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5224 " being readded", errors.ECODE_INVAL)
5226 def BuildHooksEnv(self):
5229 This will run on all nodes before, and on all nodes + the new node after.
5233 "OP_TARGET": self.op.node_name,
5234 "NODE_NAME": self.op.node_name,
5235 "NODE_PIP": self.op.primary_ip,
5236 "NODE_SIP": self.op.secondary_ip,
5237 "MASTER_CAPABLE": str(self.op.master_capable),
5238 "VM_CAPABLE": str(self.op.vm_capable),
5241 def BuildHooksNodes(self):
5242 """Build hooks nodes.
5245 # Exclude added node
5246 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5247 post_nodes = pre_nodes + [self.op.node_name, ]
5249 return (pre_nodes, post_nodes)
5251 def CheckPrereq(self):
5252 """Check prerequisites.
5255 - the new node is not already in the config
5257 - its parameters (single/dual homed) matches the cluster
5259 Any errors are signaled by raising errors.OpPrereqError.
5263 hostname = self.hostname
5264 node = hostname.name
5265 primary_ip = self.op.primary_ip = hostname.ip
5266 if self.op.secondary_ip is None:
5267 if self.primary_ip_family == netutils.IP6Address.family:
5268 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5269 " IPv4 address must be given as secondary",
5271 self.op.secondary_ip = primary_ip
5273 secondary_ip = self.op.secondary_ip
5274 if not netutils.IP4Address.IsValid(secondary_ip):
5275 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5276 " address" % secondary_ip, errors.ECODE_INVAL)
5278 node_list = cfg.GetNodeList()
5279 if not self.op.readd and node in node_list:
5280 raise errors.OpPrereqError("Node %s is already in the configuration" %
5281 node, errors.ECODE_EXISTS)
5282 elif self.op.readd and node not in node_list:
5283 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5286 self.changed_primary_ip = False
5288 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5289 if self.op.readd and node == existing_node_name:
5290 if existing_node.secondary_ip != secondary_ip:
5291 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5292 " address configuration as before",
5294 if existing_node.primary_ip != primary_ip:
5295 self.changed_primary_ip = True
5299 if (existing_node.primary_ip == primary_ip or
5300 existing_node.secondary_ip == primary_ip or
5301 existing_node.primary_ip == secondary_ip or
5302 existing_node.secondary_ip == secondary_ip):
5303 raise errors.OpPrereqError("New node ip address(es) conflict with"
5304 " existing node %s" % existing_node.name,
5305 errors.ECODE_NOTUNIQUE)
5307 # After this 'if' block, None is no longer a valid value for the
5308 # _capable op attributes
5310 old_node = self.cfg.GetNodeInfo(node)
5311 assert old_node is not None, "Can't retrieve locked node %s" % node
5312 for attr in self._NFLAGS:
5313 if getattr(self.op, attr) is None:
5314 setattr(self.op, attr, getattr(old_node, attr))
5316 for attr in self._NFLAGS:
5317 if getattr(self.op, attr) is None:
5318 setattr(self.op, attr, True)
5320 if self.op.readd and not self.op.vm_capable:
5321 pri, sec = cfg.GetNodeInstances(node)
5323 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5324 " flag set to false, but it already holds"
5325 " instances" % node,
5328 # check that the type of the node (single versus dual homed) is the
5329 # same as for the master
5330 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5331 master_singlehomed = myself.secondary_ip == myself.primary_ip
5332 newbie_singlehomed = secondary_ip == primary_ip
5333 if master_singlehomed != newbie_singlehomed:
5334 if master_singlehomed:
5335 raise errors.OpPrereqError("The master has no secondary ip but the"
5336 " new node has one",
5339 raise errors.OpPrereqError("The master has a secondary ip but the"
5340 " new node doesn't have one",
5343 # checks reachability
5344 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5345 raise errors.OpPrereqError("Node not reachable by ping",
5346 errors.ECODE_ENVIRON)
5348 if not newbie_singlehomed:
5349 # check reachability from my secondary ip to newbie's secondary ip
5350 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5351 source=myself.secondary_ip):
5352 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5353 " based ping to node daemon port",
5354 errors.ECODE_ENVIRON)
5361 if self.op.master_capable:
5362 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5364 self.master_candidate = False
5367 self.new_node = old_node
5369 node_group = cfg.LookupNodeGroup(self.op.group)
5370 self.new_node = objects.Node(name=node,
5371 primary_ip=primary_ip,
5372 secondary_ip=secondary_ip,
5373 master_candidate=self.master_candidate,
5374 offline=False, drained=False,
5377 if self.op.ndparams:
5378 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5380 def Exec(self, feedback_fn):
5381 """Adds the new node to the cluster.
5384 new_node = self.new_node
5385 node = new_node.name
5387 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5390 # We adding a new node so we assume it's powered
5391 new_node.powered = True
5393 # for re-adds, reset the offline/drained/master-candidate flags;
5394 # we need to reset here, otherwise offline would prevent RPC calls
5395 # later in the procedure; this also means that if the re-add
5396 # fails, we are left with a non-offlined, broken node
5398 new_node.drained = new_node.offline = False # pylint: disable=W0201
5399 self.LogInfo("Readding a node, the offline/drained flags were reset")
5400 # if we demote the node, we do cleanup later in the procedure
5401 new_node.master_candidate = self.master_candidate
5402 if self.changed_primary_ip:
5403 new_node.primary_ip = self.op.primary_ip
5405 # copy the master/vm_capable flags
5406 for attr in self._NFLAGS:
5407 setattr(new_node, attr, getattr(self.op, attr))
5409 # notify the user about any possible mc promotion
5410 if new_node.master_candidate:
5411 self.LogInfo("Node will be a master candidate")
5413 if self.op.ndparams:
5414 new_node.ndparams = self.op.ndparams
5416 new_node.ndparams = {}
5418 # check connectivity
5419 result = self.rpc.call_version([node])[node]
5420 result.Raise("Can't get version information from node %s" % node)
5421 if constants.PROTOCOL_VERSION == result.payload:
5422 logging.info("Communication to node %s fine, sw version %s match",
5423 node, result.payload)
5425 raise errors.OpExecError("Version mismatch master version %s,"
5426 " node version %s" %
5427 (constants.PROTOCOL_VERSION, result.payload))
5429 # Add node to our /etc/hosts, and add key to known_hosts
5430 if self.cfg.GetClusterInfo().modify_etc_hosts:
5431 master_node = self.cfg.GetMasterNode()
5432 result = self.rpc.call_etc_hosts_modify(master_node,
5433 constants.ETC_HOSTS_ADD,
5436 result.Raise("Can't update hosts file with new host data")
5438 if new_node.secondary_ip != new_node.primary_ip:
5439 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5442 node_verify_list = [self.cfg.GetMasterNode()]
5443 node_verify_param = {
5444 constants.NV_NODELIST: ([node], {}),
5445 # TODO: do a node-net-test as well?
5448 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5449 self.cfg.GetClusterName())
5450 for verifier in node_verify_list:
5451 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5452 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5454 for failed in nl_payload:
5455 feedback_fn("ssh/hostname verification failed"
5456 " (checking from %s): %s" %
5457 (verifier, nl_payload[failed]))
5458 raise errors.OpExecError("ssh/hostname verification failed")
5461 _RedistributeAncillaryFiles(self)
5462 self.context.ReaddNode(new_node)
5463 # make sure we redistribute the config
5464 self.cfg.Update(new_node, feedback_fn)
5465 # and make sure the new node will not have old files around
5466 if not new_node.master_candidate:
5467 result = self.rpc.call_node_demote_from_mc(new_node.name)
5468 msg = result.fail_msg
5470 self.LogWarning("Node failed to demote itself from master"
5471 " candidate status: %s" % msg)
5473 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5474 additional_vm=self.op.vm_capable)
5475 self.context.AddNode(new_node, self.proc.GetECId())
5478 class LUNodeSetParams(LogicalUnit):
5479 """Modifies the parameters of a node.
5481 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5482 to the node role (as _ROLE_*)
5483 @cvar _R2F: a dictionary from node role to tuples of flags
5484 @cvar _FLAGS: a list of attribute names corresponding to the flags
5487 HPATH = "node-modify"
5488 HTYPE = constants.HTYPE_NODE
5490 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5492 (True, False, False): _ROLE_CANDIDATE,
5493 (False, True, False): _ROLE_DRAINED,
5494 (False, False, True): _ROLE_OFFLINE,
5495 (False, False, False): _ROLE_REGULAR,
5497 _R2F = dict((v, k) for k, v in _F2R.items())
5498 _FLAGS = ["master_candidate", "drained", "offline"]
5500 def CheckArguments(self):
5501 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5502 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5503 self.op.master_capable, self.op.vm_capable,
5504 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5506 if all_mods.count(None) == len(all_mods):
5507 raise errors.OpPrereqError("Please pass at least one modification",
5509 if all_mods.count(True) > 1:
5510 raise errors.OpPrereqError("Can't set the node into more than one"
5511 " state at the same time",
5514 # Boolean value that tells us whether we might be demoting from MC
5515 self.might_demote = (self.op.master_candidate == False or
5516 self.op.offline == True or
5517 self.op.drained == True or
5518 self.op.master_capable == False)
5520 if self.op.secondary_ip:
5521 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5522 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5523 " address" % self.op.secondary_ip,
5526 self.lock_all = self.op.auto_promote and self.might_demote
5527 self.lock_instances = self.op.secondary_ip is not None
5529 def _InstanceFilter(self, instance):
5530 """Filter for getting affected instances.
5533 return (instance.disk_template in constants.DTS_INT_MIRROR and
5534 self.op.node_name in instance.all_nodes)
5536 def ExpandNames(self):
5538 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5540 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5542 # Since modifying a node can have severe effects on currently running
5543 # operations the resource lock is at least acquired in shared mode
5544 self.needed_locks[locking.LEVEL_NODE_RES] = \
5545 self.needed_locks[locking.LEVEL_NODE]
5547 # Get node resource and instance locks in shared mode; they are not used
5548 # for anything but read-only access
5549 self.share_locks[locking.LEVEL_NODE_RES] = 1
5550 self.share_locks[locking.LEVEL_INSTANCE] = 1
5552 if self.lock_instances:
5553 self.needed_locks[locking.LEVEL_INSTANCE] = \
5554 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5556 def BuildHooksEnv(self):
5559 This runs on the master node.
5563 "OP_TARGET": self.op.node_name,
5564 "MASTER_CANDIDATE": str(self.op.master_candidate),
5565 "OFFLINE": str(self.op.offline),
5566 "DRAINED": str(self.op.drained),
5567 "MASTER_CAPABLE": str(self.op.master_capable),
5568 "VM_CAPABLE": str(self.op.vm_capable),
5571 def BuildHooksNodes(self):
5572 """Build hooks nodes.
5575 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5578 def CheckPrereq(self):
5579 """Check prerequisites.
5581 This only checks the instance list against the existing names.
5584 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5586 if self.lock_instances:
5587 affected_instances = \
5588 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5590 # Verify instance locks
5591 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5592 wanted_instances = frozenset(affected_instances.keys())
5593 if wanted_instances - owned_instances:
5594 raise errors.OpPrereqError("Instances affected by changing node %s's"
5595 " secondary IP address have changed since"
5596 " locks were acquired, wanted '%s', have"
5597 " '%s'; retry the operation" %
5599 utils.CommaJoin(wanted_instances),
5600 utils.CommaJoin(owned_instances)),
5603 affected_instances = None
5605 if (self.op.master_candidate is not None or
5606 self.op.drained is not None or
5607 self.op.offline is not None):
5608 # we can't change the master's node flags
5609 if self.op.node_name == self.cfg.GetMasterNode():
5610 raise errors.OpPrereqError("The master role can be changed"
5611 " only via master-failover",
5614 if self.op.master_candidate and not node.master_capable:
5615 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5616 " it a master candidate" % node.name,
5619 if self.op.vm_capable == False:
5620 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5622 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5623 " the vm_capable flag" % node.name,
5626 if node.master_candidate and self.might_demote and not self.lock_all:
5627 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5628 # check if after removing the current node, we're missing master
5630 (mc_remaining, mc_should, _) = \
5631 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5632 if mc_remaining < mc_should:
5633 raise errors.OpPrereqError("Not enough master candidates, please"
5634 " pass auto promote option to allow"
5635 " promotion", errors.ECODE_STATE)
5637 self.old_flags = old_flags = (node.master_candidate,
5638 node.drained, node.offline)
5639 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5640 self.old_role = old_role = self._F2R[old_flags]
5642 # Check for ineffective changes
5643 for attr in self._FLAGS:
5644 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5645 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5646 setattr(self.op, attr, None)
5648 # Past this point, any flag change to False means a transition
5649 # away from the respective state, as only real changes are kept
5651 # TODO: We might query the real power state if it supports OOB
5652 if _SupportsOob(self.cfg, node):
5653 if self.op.offline is False and not (node.powered or
5654 self.op.powered == True):
5655 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5656 " offline status can be reset") %
5658 elif self.op.powered is not None:
5659 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5660 " as it does not support out-of-band"
5661 " handling") % self.op.node_name)
5663 # If we're being deofflined/drained, we'll MC ourself if needed
5664 if (self.op.drained == False or self.op.offline == False or
5665 (self.op.master_capable and not node.master_capable)):
5666 if _DecideSelfPromotion(self):
5667 self.op.master_candidate = True
5668 self.LogInfo("Auto-promoting node to master candidate")
5670 # If we're no longer master capable, we'll demote ourselves from MC
5671 if self.op.master_capable == False and node.master_candidate:
5672 self.LogInfo("Demoting from master candidate")
5673 self.op.master_candidate = False
5676 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5677 if self.op.master_candidate:
5678 new_role = self._ROLE_CANDIDATE
5679 elif self.op.drained:
5680 new_role = self._ROLE_DRAINED
5681 elif self.op.offline:
5682 new_role = self._ROLE_OFFLINE
5683 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5684 # False is still in new flags, which means we're un-setting (the
5686 new_role = self._ROLE_REGULAR
5687 else: # no new flags, nothing, keep old role
5690 self.new_role = new_role
5692 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5693 # Trying to transition out of offline status
5694 # TODO: Use standard RPC runner, but make sure it works when the node is
5695 # still marked offline
5696 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5698 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5699 " to report its version: %s" %
5700 (node.name, result.fail_msg),
5703 self.LogWarning("Transitioning node from offline to online state"
5704 " without using re-add. Please make sure the node"
5707 if self.op.secondary_ip:
5708 # Ok even without locking, because this can't be changed by any LU
5709 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5710 master_singlehomed = master.secondary_ip == master.primary_ip
5711 if master_singlehomed and self.op.secondary_ip:
5712 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5713 " homed cluster", errors.ECODE_INVAL)
5715 assert not (frozenset(affected_instances) -
5716 self.owned_locks(locking.LEVEL_INSTANCE))
5719 if affected_instances:
5720 raise errors.OpPrereqError("Cannot change secondary IP address:"
5721 " offline node has instances (%s)"
5722 " configured to use it" %
5723 utils.CommaJoin(affected_instances.keys()))
5725 # On online nodes, check that no instances are running, and that
5726 # the node has the new ip and we can reach it.
5727 for instance in affected_instances.values():
5728 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5729 msg="cannot change secondary ip")
5731 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5732 if master.name != node.name:
5733 # check reachability from master secondary ip to new secondary ip
5734 if not netutils.TcpPing(self.op.secondary_ip,
5735 constants.DEFAULT_NODED_PORT,
5736 source=master.secondary_ip):
5737 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5738 " based ping to node daemon port",
5739 errors.ECODE_ENVIRON)
5741 if self.op.ndparams:
5742 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5743 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5744 self.new_ndparams = new_ndparams
5746 if self.op.hv_state:
5747 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5748 self.node.hv_state_static)
5750 if self.op.disk_state:
5751 self.new_disk_state = \
5752 _MergeAndVerifyDiskState(self.op.disk_state,
5753 self.node.disk_state_static)
5755 def Exec(self, feedback_fn):
5760 old_role = self.old_role
5761 new_role = self.new_role
5765 if self.op.ndparams:
5766 node.ndparams = self.new_ndparams
5768 if self.op.powered is not None:
5769 node.powered = self.op.powered
5771 if self.op.hv_state:
5772 node.hv_state_static = self.new_hv_state
5774 if self.op.disk_state:
5775 node.disk_state_static = self.new_disk_state
5777 for attr in ["master_capable", "vm_capable"]:
5778 val = getattr(self.op, attr)
5780 setattr(node, attr, val)
5781 result.append((attr, str(val)))
5783 if new_role != old_role:
5784 # Tell the node to demote itself, if no longer MC and not offline
5785 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5786 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5788 self.LogWarning("Node failed to demote itself: %s", msg)
5790 new_flags = self._R2F[new_role]
5791 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5793 result.append((desc, str(nf)))
5794 (node.master_candidate, node.drained, node.offline) = new_flags
5796 # we locked all nodes, we adjust the CP before updating this node
5798 _AdjustCandidatePool(self, [node.name])
5800 if self.op.secondary_ip:
5801 node.secondary_ip = self.op.secondary_ip
5802 result.append(("secondary_ip", self.op.secondary_ip))
5804 # this will trigger configuration file update, if needed
5805 self.cfg.Update(node, feedback_fn)
5807 # this will trigger job queue propagation or cleanup if the mc
5809 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5810 self.context.ReaddNode(node)
5815 class LUNodePowercycle(NoHooksLU):
5816 """Powercycles a node.
5821 def CheckArguments(self):
5822 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5823 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5824 raise errors.OpPrereqError("The node is the master and the force"
5825 " parameter was not set",
5828 def ExpandNames(self):
5829 """Locking for PowercycleNode.
5831 This is a last-resort option and shouldn't block on other
5832 jobs. Therefore, we grab no locks.
5835 self.needed_locks = {}
5837 def Exec(self, feedback_fn):
5841 result = self.rpc.call_node_powercycle(self.op.node_name,
5842 self.cfg.GetHypervisorType())
5843 result.Raise("Failed to schedule the reboot")
5844 return result.payload
5847 class LUClusterQuery(NoHooksLU):
5848 """Query cluster configuration.
5853 def ExpandNames(self):
5854 self.needed_locks = {}
5856 def Exec(self, feedback_fn):
5857 """Return cluster config.
5860 cluster = self.cfg.GetClusterInfo()
5863 # Filter just for enabled hypervisors
5864 for os_name, hv_dict in cluster.os_hvp.items():
5865 os_hvp[os_name] = {}
5866 for hv_name, hv_params in hv_dict.items():
5867 if hv_name in cluster.enabled_hypervisors:
5868 os_hvp[os_name][hv_name] = hv_params
5870 # Convert ip_family to ip_version
5871 primary_ip_version = constants.IP4_VERSION
5872 if cluster.primary_ip_family == netutils.IP6Address.family:
5873 primary_ip_version = constants.IP6_VERSION
5876 "software_version": constants.RELEASE_VERSION,
5877 "protocol_version": constants.PROTOCOL_VERSION,
5878 "config_version": constants.CONFIG_VERSION,
5879 "os_api_version": max(constants.OS_API_VERSIONS),
5880 "export_version": constants.EXPORT_VERSION,
5881 "architecture": (platform.architecture()[0], platform.machine()),
5882 "name": cluster.cluster_name,
5883 "master": cluster.master_node,
5884 "default_hypervisor": cluster.primary_hypervisor,
5885 "enabled_hypervisors": cluster.enabled_hypervisors,
5886 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5887 for hypervisor_name in cluster.enabled_hypervisors]),
5889 "beparams": cluster.beparams,
5890 "osparams": cluster.osparams,
5891 "ipolicy": cluster.ipolicy,
5892 "nicparams": cluster.nicparams,
5893 "ndparams": cluster.ndparams,
5894 "candidate_pool_size": cluster.candidate_pool_size,
5895 "master_netdev": cluster.master_netdev,
5896 "master_netmask": cluster.master_netmask,
5897 "use_external_mip_script": cluster.use_external_mip_script,
5898 "volume_group_name": cluster.volume_group_name,
5899 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5900 "file_storage_dir": cluster.file_storage_dir,
5901 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5902 "maintain_node_health": cluster.maintain_node_health,
5903 "ctime": cluster.ctime,
5904 "mtime": cluster.mtime,
5905 "uuid": cluster.uuid,
5906 "tags": list(cluster.GetTags()),
5907 "uid_pool": cluster.uid_pool,
5908 "default_iallocator": cluster.default_iallocator,
5909 "reserved_lvs": cluster.reserved_lvs,
5910 "primary_ip_version": primary_ip_version,
5911 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5912 "hidden_os": cluster.hidden_os,
5913 "blacklisted_os": cluster.blacklisted_os,
5919 class LUClusterConfigQuery(NoHooksLU):
5920 """Return configuration values.
5924 _FIELDS_DYNAMIC = utils.FieldSet()
5925 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5926 "watcher_pause", "volume_group_name")
5928 def CheckArguments(self):
5929 _CheckOutputFields(static=self._FIELDS_STATIC,
5930 dynamic=self._FIELDS_DYNAMIC,
5931 selected=self.op.output_fields)
5933 def ExpandNames(self):
5934 self.needed_locks = {}
5936 def Exec(self, feedback_fn):
5937 """Dump a representation of the cluster config to the standard output.
5941 for field in self.op.output_fields:
5942 if field == "cluster_name":
5943 entry = self.cfg.GetClusterName()
5944 elif field == "master_node":
5945 entry = self.cfg.GetMasterNode()
5946 elif field == "drain_flag":
5947 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5948 elif field == "watcher_pause":
5949 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5950 elif field == "volume_group_name":
5951 entry = self.cfg.GetVGName()
5953 raise errors.ParameterError(field)
5954 values.append(entry)
5958 class LUInstanceActivateDisks(NoHooksLU):
5959 """Bring up an instance's disks.
5964 def ExpandNames(self):
5965 self._ExpandAndLockInstance()
5966 self.needed_locks[locking.LEVEL_NODE] = []
5967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5969 def DeclareLocks(self, level):
5970 if level == locking.LEVEL_NODE:
5971 self._LockInstancesNodes()
5973 def CheckPrereq(self):
5974 """Check prerequisites.
5976 This checks that the instance is in the cluster.
5979 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5980 assert self.instance is not None, \
5981 "Cannot retrieve locked instance %s" % self.op.instance_name
5982 _CheckNodeOnline(self, self.instance.primary_node)
5984 def Exec(self, feedback_fn):
5985 """Activate the disks.
5988 disks_ok, disks_info = \
5989 _AssembleInstanceDisks(self, self.instance,
5990 ignore_size=self.op.ignore_size)
5992 raise errors.OpExecError("Cannot activate block devices")
5997 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5999 """Prepare the block devices for an instance.
6001 This sets up the block devices on all nodes.
6003 @type lu: L{LogicalUnit}
6004 @param lu: the logical unit on whose behalf we execute
6005 @type instance: L{objects.Instance}
6006 @param instance: the instance for whose disks we assemble
6007 @type disks: list of L{objects.Disk} or None
6008 @param disks: which disks to assemble (or all, if None)
6009 @type ignore_secondaries: boolean
6010 @param ignore_secondaries: if true, errors on secondary nodes
6011 won't result in an error return from the function
6012 @type ignore_size: boolean
6013 @param ignore_size: if true, the current known size of the disk
6014 will not be used during the disk activation, useful for cases
6015 when the size is wrong
6016 @return: False if the operation failed, otherwise a list of
6017 (host, instance_visible_name, node_visible_name)
6018 with the mapping from node devices to instance devices
6023 iname = instance.name
6024 disks = _ExpandCheckDisks(instance, disks)
6026 # With the two passes mechanism we try to reduce the window of
6027 # opportunity for the race condition of switching DRBD to primary
6028 # before handshaking occured, but we do not eliminate it
6030 # The proper fix would be to wait (with some limits) until the
6031 # connection has been made and drbd transitions from WFConnection
6032 # into any other network-connected state (Connected, SyncTarget,
6035 # 1st pass, assemble on all nodes in secondary mode
6036 for idx, inst_disk in enumerate(disks):
6037 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6039 node_disk = node_disk.Copy()
6040 node_disk.UnsetSize()
6041 lu.cfg.SetDiskID(node_disk, node)
6042 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6043 msg = result.fail_msg
6045 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6046 " (is_primary=False, pass=1): %s",
6047 inst_disk.iv_name, node, msg)
6048 if not ignore_secondaries:
6051 # FIXME: race condition on drbd migration to primary
6053 # 2nd pass, do only the primary node
6054 for idx, inst_disk in enumerate(disks):
6057 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6058 if node != instance.primary_node:
6061 node_disk = node_disk.Copy()
6062 node_disk.UnsetSize()
6063 lu.cfg.SetDiskID(node_disk, node)
6064 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6065 msg = result.fail_msg
6067 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6068 " (is_primary=True, pass=2): %s",
6069 inst_disk.iv_name, node, msg)
6072 dev_path = result.payload
6074 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6076 # leave the disks configured for the primary node
6077 # this is a workaround that would be fixed better by
6078 # improving the logical/physical id handling
6080 lu.cfg.SetDiskID(disk, instance.primary_node)
6082 return disks_ok, device_info
6085 def _StartInstanceDisks(lu, instance, force):
6086 """Start the disks of an instance.
6089 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6090 ignore_secondaries=force)
6092 _ShutdownInstanceDisks(lu, instance)
6093 if force is not None and not force:
6094 lu.proc.LogWarning("", hint="If the message above refers to a"
6096 " you can retry the operation using '--force'.")
6097 raise errors.OpExecError("Disk consistency error")
6100 class LUInstanceDeactivateDisks(NoHooksLU):
6101 """Shutdown an instance's disks.
6106 def ExpandNames(self):
6107 self._ExpandAndLockInstance()
6108 self.needed_locks[locking.LEVEL_NODE] = []
6109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6111 def DeclareLocks(self, level):
6112 if level == locking.LEVEL_NODE:
6113 self._LockInstancesNodes()
6115 def CheckPrereq(self):
6116 """Check prerequisites.
6118 This checks that the instance is in the cluster.
6121 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122 assert self.instance is not None, \
6123 "Cannot retrieve locked instance %s" % self.op.instance_name
6125 def Exec(self, feedback_fn):
6126 """Deactivate the disks
6129 instance = self.instance
6131 _ShutdownInstanceDisks(self, instance)
6133 _SafeShutdownInstanceDisks(self, instance)
6136 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6137 """Shutdown block devices of an instance.
6139 This function checks if an instance is running, before calling
6140 _ShutdownInstanceDisks.
6143 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6144 _ShutdownInstanceDisks(lu, instance, disks=disks)
6147 def _ExpandCheckDisks(instance, disks):
6148 """Return the instance disks selected by the disks list
6150 @type disks: list of L{objects.Disk} or None
6151 @param disks: selected disks
6152 @rtype: list of L{objects.Disk}
6153 @return: selected instance disks to act on
6157 return instance.disks
6159 if not set(disks).issubset(instance.disks):
6160 raise errors.ProgrammerError("Can only act on disks belonging to the"
6165 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6166 """Shutdown block devices of an instance.
6168 This does the shutdown on all nodes of the instance.
6170 If the ignore_primary is false, errors on the primary node are
6175 disks = _ExpandCheckDisks(instance, disks)
6178 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6179 lu.cfg.SetDiskID(top_disk, node)
6180 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6181 msg = result.fail_msg
6183 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6184 disk.iv_name, node, msg)
6185 if ((node == instance.primary_node and not ignore_primary) or
6186 (node != instance.primary_node and not result.offline)):
6191 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6192 """Checks if a node has enough free memory.
6194 This function check if a given node has the needed amount of free
6195 memory. In case the node has less memory or we cannot get the
6196 information from the node, this function raise an OpPrereqError
6199 @type lu: C{LogicalUnit}
6200 @param lu: a logical unit from which we get configuration data
6202 @param node: the node to check
6203 @type reason: C{str}
6204 @param reason: string to use in the error message
6205 @type requested: C{int}
6206 @param requested: the amount of memory in MiB to check for
6207 @type hypervisor_name: C{str}
6208 @param hypervisor_name: the hypervisor to ask for memory stats
6209 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6210 we cannot check the node
6213 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6214 nodeinfo[node].Raise("Can't get data from node %s" % node,
6215 prereq=True, ecode=errors.ECODE_ENVIRON)
6216 (_, _, (hv_info, )) = nodeinfo[node].payload
6218 free_mem = hv_info.get("memory_free", None)
6219 if not isinstance(free_mem, int):
6220 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6221 " was '%s'" % (node, free_mem),
6222 errors.ECODE_ENVIRON)
6223 if requested > free_mem:
6224 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6225 " needed %s MiB, available %s MiB" %
6226 (node, reason, requested, free_mem),
6230 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6231 """Checks if nodes have enough free disk space in the all VGs.
6233 This function check if all given nodes have the needed amount of
6234 free disk. In case any node has less disk or we cannot get the
6235 information from the node, this function raise an OpPrereqError
6238 @type lu: C{LogicalUnit}
6239 @param lu: a logical unit from which we get configuration data
6240 @type nodenames: C{list}
6241 @param nodenames: the list of node names to check
6242 @type req_sizes: C{dict}
6243 @param req_sizes: the hash of vg and corresponding amount of disk in
6245 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6246 or we cannot check the node
6249 for vg, req_size in req_sizes.items():
6250 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6253 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6254 """Checks if nodes have enough free disk space in the specified VG.
6256 This function check if all given nodes have the needed amount of
6257 free disk. In case any node has less disk or we cannot get the
6258 information from the node, this function raise an OpPrereqError
6261 @type lu: C{LogicalUnit}
6262 @param lu: a logical unit from which we get configuration data
6263 @type nodenames: C{list}
6264 @param nodenames: the list of node names to check
6266 @param vg: the volume group to check
6267 @type requested: C{int}
6268 @param requested: the amount of disk in MiB to check for
6269 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6270 or we cannot check the node
6273 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6274 for node in nodenames:
6275 info = nodeinfo[node]
6276 info.Raise("Cannot get current information from node %s" % node,
6277 prereq=True, ecode=errors.ECODE_ENVIRON)
6278 (_, (vg_info, ), _) = info.payload
6279 vg_free = vg_info.get("vg_free", None)
6280 if not isinstance(vg_free, int):
6281 raise errors.OpPrereqError("Can't compute free disk space on node"
6282 " %s for vg %s, result was '%s'" %
6283 (node, vg, vg_free), errors.ECODE_ENVIRON)
6284 if requested > vg_free:
6285 raise errors.OpPrereqError("Not enough disk space on target node %s"
6286 " vg %s: required %d MiB, available %d MiB" %
6287 (node, vg, requested, vg_free),
6291 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6292 """Checks if nodes have enough physical CPUs
6294 This function checks if all given nodes have the needed number of
6295 physical CPUs. In case any node has less CPUs or we cannot get the
6296 information from the node, this function raises an OpPrereqError
6299 @type lu: C{LogicalUnit}
6300 @param lu: a logical unit from which we get configuration data
6301 @type nodenames: C{list}
6302 @param nodenames: the list of node names to check
6303 @type requested: C{int}
6304 @param requested: the minimum acceptable number of physical CPUs
6305 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6306 or we cannot check the node
6309 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6310 for node in nodenames:
6311 info = nodeinfo[node]
6312 info.Raise("Cannot get current information from node %s" % node,
6313 prereq=True, ecode=errors.ECODE_ENVIRON)
6314 (_, _, (hv_info, )) = info.payload
6315 num_cpus = hv_info.get("cpu_total", None)
6316 if not isinstance(num_cpus, int):
6317 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6318 " on node %s, result was '%s'" %
6319 (node, num_cpus), errors.ECODE_ENVIRON)
6320 if requested > num_cpus:
6321 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6322 "required" % (node, num_cpus, requested),
6326 class LUInstanceStartup(LogicalUnit):
6327 """Starts an instance.
6330 HPATH = "instance-start"
6331 HTYPE = constants.HTYPE_INSTANCE
6334 def CheckArguments(self):
6336 if self.op.beparams:
6337 # fill the beparams dict
6338 objects.UpgradeBeParams(self.op.beparams)
6339 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6341 def ExpandNames(self):
6342 self._ExpandAndLockInstance()
6344 def BuildHooksEnv(self):
6347 This runs on master, primary and secondary nodes of the instance.
6351 "FORCE": self.op.force,
6354 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6358 def BuildHooksNodes(self):
6359 """Build hooks nodes.
6362 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6365 def CheckPrereq(self):
6366 """Check prerequisites.
6368 This checks that the instance is in the cluster.
6371 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6372 assert self.instance is not None, \
6373 "Cannot retrieve locked instance %s" % self.op.instance_name
6376 if self.op.hvparams:
6377 # check hypervisor parameter syntax (locally)
6378 cluster = self.cfg.GetClusterInfo()
6379 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6380 filled_hvp = cluster.FillHV(instance)
6381 filled_hvp.update(self.op.hvparams)
6382 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6383 hv_type.CheckParameterSyntax(filled_hvp)
6384 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6386 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6388 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6390 if self.primary_offline and self.op.ignore_offline_nodes:
6391 self.proc.LogWarning("Ignoring offline primary node")
6393 if self.op.hvparams or self.op.beparams:
6394 self.proc.LogWarning("Overridden parameters are ignored")
6396 _CheckNodeOnline(self, instance.primary_node)
6398 bep = self.cfg.GetClusterInfo().FillBE(instance)
6400 # check bridges existence
6401 _CheckInstanceBridgesExist(self, instance)
6403 remote_info = self.rpc.call_instance_info(instance.primary_node,
6405 instance.hypervisor)
6406 remote_info.Raise("Error checking node %s" % instance.primary_node,
6407 prereq=True, ecode=errors.ECODE_ENVIRON)
6408 if not remote_info.payload: # not running already
6409 _CheckNodeFreeMemory(self, instance.primary_node,
6410 "starting instance %s" % instance.name,
6411 bep[constants.BE_MAXMEM], instance.hypervisor)
6413 def Exec(self, feedback_fn):
6414 """Start the instance.
6417 instance = self.instance
6418 force = self.op.force
6420 if not self.op.no_remember:
6421 self.cfg.MarkInstanceUp(instance.name)
6423 if self.primary_offline:
6424 assert self.op.ignore_offline_nodes
6425 self.proc.LogInfo("Primary node offline, marked instance as started")
6427 node_current = instance.primary_node
6429 _StartInstanceDisks(self, instance, force)
6432 self.rpc.call_instance_start(node_current,
6433 (instance, self.op.hvparams,
6435 self.op.startup_paused)
6436 msg = result.fail_msg
6438 _ShutdownInstanceDisks(self, instance)
6439 raise errors.OpExecError("Could not start instance: %s" % msg)
6442 class LUInstanceReboot(LogicalUnit):
6443 """Reboot an instance.
6446 HPATH = "instance-reboot"
6447 HTYPE = constants.HTYPE_INSTANCE
6450 def ExpandNames(self):
6451 self._ExpandAndLockInstance()
6453 def BuildHooksEnv(self):
6456 This runs on master, primary and secondary nodes of the instance.
6460 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6461 "REBOOT_TYPE": self.op.reboot_type,
6462 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6465 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6469 def BuildHooksNodes(self):
6470 """Build hooks nodes.
6473 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6476 def CheckPrereq(self):
6477 """Check prerequisites.
6479 This checks that the instance is in the cluster.
6482 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6483 assert self.instance is not None, \
6484 "Cannot retrieve locked instance %s" % self.op.instance_name
6485 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6486 _CheckNodeOnline(self, instance.primary_node)
6488 # check bridges existence
6489 _CheckInstanceBridgesExist(self, instance)
6491 def Exec(self, feedback_fn):
6492 """Reboot the instance.
6495 instance = self.instance
6496 ignore_secondaries = self.op.ignore_secondaries
6497 reboot_type = self.op.reboot_type
6499 remote_info = self.rpc.call_instance_info(instance.primary_node,
6501 instance.hypervisor)
6502 remote_info.Raise("Error checking node %s" % instance.primary_node)
6503 instance_running = bool(remote_info.payload)
6505 node_current = instance.primary_node
6507 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6508 constants.INSTANCE_REBOOT_HARD]:
6509 for disk in instance.disks:
6510 self.cfg.SetDiskID(disk, node_current)
6511 result = self.rpc.call_instance_reboot(node_current, instance,
6513 self.op.shutdown_timeout)
6514 result.Raise("Could not reboot instance")
6516 if instance_running:
6517 result = self.rpc.call_instance_shutdown(node_current, instance,
6518 self.op.shutdown_timeout)
6519 result.Raise("Could not shutdown instance for full reboot")
6520 _ShutdownInstanceDisks(self, instance)
6522 self.LogInfo("Instance %s was already stopped, starting now",
6524 _StartInstanceDisks(self, instance, ignore_secondaries)
6525 result = self.rpc.call_instance_start(node_current,
6526 (instance, None, None), False)
6527 msg = result.fail_msg
6529 _ShutdownInstanceDisks(self, instance)
6530 raise errors.OpExecError("Could not start instance for"
6531 " full reboot: %s" % msg)
6533 self.cfg.MarkInstanceUp(instance.name)
6536 class LUInstanceShutdown(LogicalUnit):
6537 """Shutdown an instance.
6540 HPATH = "instance-stop"
6541 HTYPE = constants.HTYPE_INSTANCE
6544 def ExpandNames(self):
6545 self._ExpandAndLockInstance()
6547 def BuildHooksEnv(self):
6550 This runs on master, primary and secondary nodes of the instance.
6553 env = _BuildInstanceHookEnvByObject(self, self.instance)
6554 env["TIMEOUT"] = self.op.timeout
6557 def BuildHooksNodes(self):
6558 """Build hooks nodes.
6561 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6564 def CheckPrereq(self):
6565 """Check prerequisites.
6567 This checks that the instance is in the cluster.
6570 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6571 assert self.instance is not None, \
6572 "Cannot retrieve locked instance %s" % self.op.instance_name
6574 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6576 self.primary_offline = \
6577 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6579 if self.primary_offline and self.op.ignore_offline_nodes:
6580 self.proc.LogWarning("Ignoring offline primary node")
6582 _CheckNodeOnline(self, self.instance.primary_node)
6584 def Exec(self, feedback_fn):
6585 """Shutdown the instance.
6588 instance = self.instance
6589 node_current = instance.primary_node
6590 timeout = self.op.timeout
6592 if not self.op.no_remember:
6593 self.cfg.MarkInstanceDown(instance.name)
6595 if self.primary_offline:
6596 assert self.op.ignore_offline_nodes
6597 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6599 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6600 msg = result.fail_msg
6602 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6604 _ShutdownInstanceDisks(self, instance)
6607 class LUInstanceReinstall(LogicalUnit):
6608 """Reinstall an instance.
6611 HPATH = "instance-reinstall"
6612 HTYPE = constants.HTYPE_INSTANCE
6615 def ExpandNames(self):
6616 self._ExpandAndLockInstance()
6618 def BuildHooksEnv(self):
6621 This runs on master, primary and secondary nodes of the instance.
6624 return _BuildInstanceHookEnvByObject(self, self.instance)
6626 def BuildHooksNodes(self):
6627 """Build hooks nodes.
6630 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6633 def CheckPrereq(self):
6634 """Check prerequisites.
6636 This checks that the instance is in the cluster and is not running.
6639 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6640 assert instance is not None, \
6641 "Cannot retrieve locked instance %s" % self.op.instance_name
6642 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6643 " offline, cannot reinstall")
6644 for node in instance.secondary_nodes:
6645 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6646 " cannot reinstall")
6648 if instance.disk_template == constants.DT_DISKLESS:
6649 raise errors.OpPrereqError("Instance '%s' has no disks" %
6650 self.op.instance_name,
6652 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6654 if self.op.os_type is not None:
6656 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6657 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6658 instance_os = self.op.os_type
6660 instance_os = instance.os
6662 nodelist = list(instance.all_nodes)
6664 if self.op.osparams:
6665 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6666 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6667 self.os_inst = i_osdict # the new dict (without defaults)
6671 self.instance = instance
6673 def Exec(self, feedback_fn):
6674 """Reinstall the instance.
6677 inst = self.instance
6679 if self.op.os_type is not None:
6680 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6681 inst.os = self.op.os_type
6682 # Write to configuration
6683 self.cfg.Update(inst, feedback_fn)
6685 _StartInstanceDisks(self, inst, None)
6687 feedback_fn("Running the instance OS create scripts...")
6688 # FIXME: pass debug option from opcode to backend
6689 result = self.rpc.call_instance_os_add(inst.primary_node,
6690 (inst, self.os_inst), True,
6691 self.op.debug_level)
6692 result.Raise("Could not install OS for instance %s on node %s" %
6693 (inst.name, inst.primary_node))
6695 _ShutdownInstanceDisks(self, inst)
6698 class LUInstanceRecreateDisks(LogicalUnit):
6699 """Recreate an instance's missing disks.
6702 HPATH = "instance-recreate-disks"
6703 HTYPE = constants.HTYPE_INSTANCE
6706 def CheckArguments(self):
6707 # normalise the disk list
6708 self.op.disks = sorted(frozenset(self.op.disks))
6710 def ExpandNames(self):
6711 self._ExpandAndLockInstance()
6712 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6714 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6715 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6717 self.needed_locks[locking.LEVEL_NODE] = []
6719 def DeclareLocks(self, level):
6720 if level == locking.LEVEL_NODE:
6721 # if we replace the nodes, we only need to lock the old primary,
6722 # otherwise we need to lock all nodes for disk re-creation
6723 primary_only = bool(self.op.nodes)
6724 self._LockInstancesNodes(primary_only=primary_only)
6725 elif level == locking.LEVEL_NODE_RES:
6727 self.needed_locks[locking.LEVEL_NODE_RES] = \
6728 self.needed_locks[locking.LEVEL_NODE][:]
6730 def BuildHooksEnv(self):
6733 This runs on master, primary and secondary nodes of the instance.
6736 return _BuildInstanceHookEnvByObject(self, self.instance)
6738 def BuildHooksNodes(self):
6739 """Build hooks nodes.
6742 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6745 def CheckPrereq(self):
6746 """Check prerequisites.
6748 This checks that the instance is in the cluster and is not running.
6751 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6752 assert instance is not None, \
6753 "Cannot retrieve locked instance %s" % self.op.instance_name
6755 if len(self.op.nodes) != len(instance.all_nodes):
6756 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6757 " %d replacement nodes were specified" %
6758 (instance.name, len(instance.all_nodes),
6759 len(self.op.nodes)),
6761 assert instance.disk_template != constants.DT_DRBD8 or \
6762 len(self.op.nodes) == 2
6763 assert instance.disk_template != constants.DT_PLAIN or \
6764 len(self.op.nodes) == 1
6765 primary_node = self.op.nodes[0]
6767 primary_node = instance.primary_node
6768 _CheckNodeOnline(self, primary_node)
6770 if instance.disk_template == constants.DT_DISKLESS:
6771 raise errors.OpPrereqError("Instance '%s' has no disks" %
6772 self.op.instance_name, errors.ECODE_INVAL)
6773 # if we replace nodes *and* the old primary is offline, we don't
6775 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6776 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6777 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6778 if not (self.op.nodes and old_pnode.offline):
6779 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6780 msg="cannot recreate disks")
6782 if not self.op.disks:
6783 self.op.disks = range(len(instance.disks))
6785 for idx in self.op.disks:
6786 if idx >= len(instance.disks):
6787 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6789 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6790 raise errors.OpPrereqError("Can't recreate disks partially and"
6791 " change the nodes at the same time",
6793 self.instance = instance
6795 def Exec(self, feedback_fn):
6796 """Recreate the disks.
6799 instance = self.instance
6801 assert (self.owned_locks(locking.LEVEL_NODE) ==
6802 self.owned_locks(locking.LEVEL_NODE_RES))
6805 mods = [] # keeps track of needed logical_id changes
6807 for idx, disk in enumerate(instance.disks):
6808 if idx not in self.op.disks: # disk idx has not been passed in
6811 # update secondaries for disks, if needed
6813 if disk.dev_type == constants.LD_DRBD8:
6814 # need to update the nodes and minors
6815 assert len(self.op.nodes) == 2
6816 assert len(disk.logical_id) == 6 # otherwise disk internals
6818 (_, _, old_port, _, _, old_secret) = disk.logical_id
6819 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6820 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6821 new_minors[0], new_minors[1], old_secret)
6822 assert len(disk.logical_id) == len(new_id)
6823 mods.append((idx, new_id))
6825 # now that we have passed all asserts above, we can apply the mods
6826 # in a single run (to avoid partial changes)
6827 for idx, new_id in mods:
6828 instance.disks[idx].logical_id = new_id
6830 # change primary node, if needed
6832 instance.primary_node = self.op.nodes[0]
6833 self.LogWarning("Changing the instance's nodes, you will have to"
6834 " remove any disks left on the older nodes manually")
6837 self.cfg.Update(instance, feedback_fn)
6839 _CreateDisks(self, instance, to_skip=to_skip)
6842 class LUInstanceRename(LogicalUnit):
6843 """Rename an instance.
6846 HPATH = "instance-rename"
6847 HTYPE = constants.HTYPE_INSTANCE
6849 def CheckArguments(self):
6853 if self.op.ip_check and not self.op.name_check:
6854 # TODO: make the ip check more flexible and not depend on the name check
6855 raise errors.OpPrereqError("IP address check requires a name check",
6858 def BuildHooksEnv(self):
6861 This runs on master, primary and secondary nodes of the instance.
6864 env = _BuildInstanceHookEnvByObject(self, self.instance)
6865 env["INSTANCE_NEW_NAME"] = self.op.new_name
6868 def BuildHooksNodes(self):
6869 """Build hooks nodes.
6872 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6875 def CheckPrereq(self):
6876 """Check prerequisites.
6878 This checks that the instance is in the cluster and is not running.
6881 self.op.instance_name = _ExpandInstanceName(self.cfg,
6882 self.op.instance_name)
6883 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884 assert instance is not None
6885 _CheckNodeOnline(self, instance.primary_node)
6886 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6887 msg="cannot rename")
6888 self.instance = instance
6890 new_name = self.op.new_name
6891 if self.op.name_check:
6892 hostname = netutils.GetHostname(name=new_name)
6893 if hostname.name != new_name:
6894 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6896 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6897 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6898 " same as given hostname '%s'") %
6899 (hostname.name, self.op.new_name),
6901 new_name = self.op.new_name = hostname.name
6902 if (self.op.ip_check and
6903 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6904 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6905 (hostname.ip, new_name),
6906 errors.ECODE_NOTUNIQUE)
6908 instance_list = self.cfg.GetInstanceList()
6909 if new_name in instance_list and new_name != instance.name:
6910 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6911 new_name, errors.ECODE_EXISTS)
6913 def Exec(self, feedback_fn):
6914 """Rename the instance.
6917 inst = self.instance
6918 old_name = inst.name
6920 rename_file_storage = False
6921 if (inst.disk_template in constants.DTS_FILEBASED and
6922 self.op.new_name != inst.name):
6923 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6924 rename_file_storage = True
6926 self.cfg.RenameInstance(inst.name, self.op.new_name)
6927 # Change the instance lock. This is definitely safe while we hold the BGL.
6928 # Otherwise the new lock would have to be added in acquired mode.
6930 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6931 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6933 # re-read the instance from the configuration after rename
6934 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6936 if rename_file_storage:
6937 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6938 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6939 old_file_storage_dir,
6940 new_file_storage_dir)
6941 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6942 " (but the instance has been renamed in Ganeti)" %
6943 (inst.primary_node, old_file_storage_dir,
6944 new_file_storage_dir))
6946 _StartInstanceDisks(self, inst, None)
6948 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6949 old_name, self.op.debug_level)
6950 msg = result.fail_msg
6952 msg = ("Could not run OS rename script for instance %s on node %s"
6953 " (but the instance has been renamed in Ganeti): %s" %
6954 (inst.name, inst.primary_node, msg))
6955 self.proc.LogWarning(msg)
6957 _ShutdownInstanceDisks(self, inst)
6962 class LUInstanceRemove(LogicalUnit):
6963 """Remove an instance.
6966 HPATH = "instance-remove"
6967 HTYPE = constants.HTYPE_INSTANCE
6970 def ExpandNames(self):
6971 self._ExpandAndLockInstance()
6972 self.needed_locks[locking.LEVEL_NODE] = []
6973 self.needed_locks[locking.LEVEL_NODE_RES] = []
6974 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6976 def DeclareLocks(self, level):
6977 if level == locking.LEVEL_NODE:
6978 self._LockInstancesNodes()
6979 elif level == locking.LEVEL_NODE_RES:
6981 self.needed_locks[locking.LEVEL_NODE_RES] = \
6982 self.needed_locks[locking.LEVEL_NODE][:]
6984 def BuildHooksEnv(self):
6987 This runs on master, primary and secondary nodes of the instance.
6990 env = _BuildInstanceHookEnvByObject(self, self.instance)
6991 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6994 def BuildHooksNodes(self):
6995 """Build hooks nodes.
6998 nl = [self.cfg.GetMasterNode()]
6999 nl_post = list(self.instance.all_nodes) + nl
7000 return (nl, nl_post)
7002 def CheckPrereq(self):
7003 """Check prerequisites.
7005 This checks that the instance is in the cluster.
7008 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7009 assert self.instance is not None, \
7010 "Cannot retrieve locked instance %s" % self.op.instance_name
7012 def Exec(self, feedback_fn):
7013 """Remove the instance.
7016 instance = self.instance
7017 logging.info("Shutting down instance %s on node %s",
7018 instance.name, instance.primary_node)
7020 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7021 self.op.shutdown_timeout)
7022 msg = result.fail_msg
7024 if self.op.ignore_failures:
7025 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7027 raise errors.OpExecError("Could not shutdown instance %s on"
7029 (instance.name, instance.primary_node, msg))
7031 assert (self.owned_locks(locking.LEVEL_NODE) ==
7032 self.owned_locks(locking.LEVEL_NODE_RES))
7033 assert not (set(instance.all_nodes) -
7034 self.owned_locks(locking.LEVEL_NODE)), \
7035 "Not owning correct locks"
7037 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7040 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7041 """Utility function to remove an instance.
7044 logging.info("Removing block devices for instance %s", instance.name)
7046 if not _RemoveDisks(lu, instance):
7047 if not ignore_failures:
7048 raise errors.OpExecError("Can't remove instance's disks")
7049 feedback_fn("Warning: can't remove instance's disks")
7051 logging.info("Removing instance %s out of cluster config", instance.name)
7053 lu.cfg.RemoveInstance(instance.name)
7055 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7056 "Instance lock removal conflict"
7058 # Remove lock for the instance
7059 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7062 class LUInstanceQuery(NoHooksLU):
7063 """Logical unit for querying instances.
7066 # pylint: disable=W0142
7069 def CheckArguments(self):
7070 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7071 self.op.output_fields, self.op.use_locking)
7073 def ExpandNames(self):
7074 self.iq.ExpandNames(self)
7076 def DeclareLocks(self, level):
7077 self.iq.DeclareLocks(self, level)
7079 def Exec(self, feedback_fn):
7080 return self.iq.OldStyleQuery(self)
7083 class LUInstanceFailover(LogicalUnit):
7084 """Failover an instance.
7087 HPATH = "instance-failover"
7088 HTYPE = constants.HTYPE_INSTANCE
7091 def CheckArguments(self):
7092 """Check the arguments.
7095 self.iallocator = getattr(self.op, "iallocator", None)
7096 self.target_node = getattr(self.op, "target_node", None)
7098 def ExpandNames(self):
7099 self._ExpandAndLockInstance()
7101 if self.op.target_node is not None:
7102 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7104 self.needed_locks[locking.LEVEL_NODE] = []
7105 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7107 ignore_consistency = self.op.ignore_consistency
7108 shutdown_timeout = self.op.shutdown_timeout
7109 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7112 ignore_consistency=ignore_consistency,
7113 shutdown_timeout=shutdown_timeout)
7114 self.tasklets = [self._migrater]
7116 def DeclareLocks(self, level):
7117 if level == locking.LEVEL_NODE:
7118 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7119 if instance.disk_template in constants.DTS_EXT_MIRROR:
7120 if self.op.target_node is None:
7121 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7123 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7124 self.op.target_node]
7125 del self.recalculate_locks[locking.LEVEL_NODE]
7127 self._LockInstancesNodes()
7129 def BuildHooksEnv(self):
7132 This runs on master, primary and secondary nodes of the instance.
7135 instance = self._migrater.instance
7136 source_node = instance.primary_node
7137 target_node = self.op.target_node
7139 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7140 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7141 "OLD_PRIMARY": source_node,
7142 "NEW_PRIMARY": target_node,
7145 if instance.disk_template in constants.DTS_INT_MIRROR:
7146 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7147 env["NEW_SECONDARY"] = source_node
7149 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7151 env.update(_BuildInstanceHookEnvByObject(self, instance))
7155 def BuildHooksNodes(self):
7156 """Build hooks nodes.
7159 instance = self._migrater.instance
7160 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7161 return (nl, nl + [instance.primary_node])
7164 class LUInstanceMigrate(LogicalUnit):
7165 """Migrate an instance.
7167 This is migration without shutting down, compared to the failover,
7168 which is done with shutdown.
7171 HPATH = "instance-migrate"
7172 HTYPE = constants.HTYPE_INSTANCE
7175 def ExpandNames(self):
7176 self._ExpandAndLockInstance()
7178 if self.op.target_node is not None:
7179 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7181 self.needed_locks[locking.LEVEL_NODE] = []
7182 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7184 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7185 cleanup=self.op.cleanup,
7187 fallback=self.op.allow_failover)
7188 self.tasklets = [self._migrater]
7190 def DeclareLocks(self, level):
7191 if level == locking.LEVEL_NODE:
7192 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7193 if instance.disk_template in constants.DTS_EXT_MIRROR:
7194 if self.op.target_node is None:
7195 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7197 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7198 self.op.target_node]
7199 del self.recalculate_locks[locking.LEVEL_NODE]
7201 self._LockInstancesNodes()
7203 def BuildHooksEnv(self):
7206 This runs on master, primary and secondary nodes of the instance.
7209 instance = self._migrater.instance
7210 source_node = instance.primary_node
7211 target_node = self.op.target_node
7212 env = _BuildInstanceHookEnvByObject(self, instance)
7214 "MIGRATE_LIVE": self._migrater.live,
7215 "MIGRATE_CLEANUP": self.op.cleanup,
7216 "OLD_PRIMARY": source_node,
7217 "NEW_PRIMARY": target_node,
7220 if instance.disk_template in constants.DTS_INT_MIRROR:
7221 env["OLD_SECONDARY"] = target_node
7222 env["NEW_SECONDARY"] = source_node
7224 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7228 def BuildHooksNodes(self):
7229 """Build hooks nodes.
7232 instance = self._migrater.instance
7233 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7234 return (nl, nl + [instance.primary_node])
7237 class LUInstanceMove(LogicalUnit):
7238 """Move an instance by data-copying.
7241 HPATH = "instance-move"
7242 HTYPE = constants.HTYPE_INSTANCE
7245 def ExpandNames(self):
7246 self._ExpandAndLockInstance()
7247 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7248 self.op.target_node = target_node
7249 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7250 self.needed_locks[locking.LEVEL_NODE_RES] = []
7251 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7253 def DeclareLocks(self, level):
7254 if level == locking.LEVEL_NODE:
7255 self._LockInstancesNodes(primary_only=True)
7256 elif level == locking.LEVEL_NODE_RES:
7258 self.needed_locks[locking.LEVEL_NODE_RES] = \
7259 self.needed_locks[locking.LEVEL_NODE][:]
7261 def BuildHooksEnv(self):
7264 This runs on master, primary and secondary nodes of the instance.
7268 "TARGET_NODE": self.op.target_node,
7269 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7271 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7274 def BuildHooksNodes(self):
7275 """Build hooks nodes.
7279 self.cfg.GetMasterNode(),
7280 self.instance.primary_node,
7281 self.op.target_node,
7285 def CheckPrereq(self):
7286 """Check prerequisites.
7288 This checks that the instance is in the cluster.
7291 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7292 assert self.instance is not None, \
7293 "Cannot retrieve locked instance %s" % self.op.instance_name
7295 node = self.cfg.GetNodeInfo(self.op.target_node)
7296 assert node is not None, \
7297 "Cannot retrieve locked node %s" % self.op.target_node
7299 self.target_node = target_node = node.name
7301 if target_node == instance.primary_node:
7302 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7303 (instance.name, target_node),
7306 bep = self.cfg.GetClusterInfo().FillBE(instance)
7308 for idx, dsk in enumerate(instance.disks):
7309 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7310 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7311 " cannot copy" % idx, errors.ECODE_STATE)
7313 _CheckNodeOnline(self, target_node)
7314 _CheckNodeNotDrained(self, target_node)
7315 _CheckNodeVmCapable(self, target_node)
7317 if instance.admin_state == constants.ADMINST_UP:
7318 # check memory requirements on the secondary node
7319 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7320 instance.name, bep[constants.BE_MAXMEM],
7321 instance.hypervisor)
7323 self.LogInfo("Not checking memory on the secondary node as"
7324 " instance will not be started")
7326 # check bridge existance
7327 _CheckInstanceBridgesExist(self, instance, node=target_node)
7329 def Exec(self, feedback_fn):
7330 """Move an instance.
7332 The move is done by shutting it down on its present node, copying
7333 the data over (slow) and starting it on the new node.
7336 instance = self.instance
7338 source_node = instance.primary_node
7339 target_node = self.target_node
7341 self.LogInfo("Shutting down instance %s on source node %s",
7342 instance.name, source_node)
7344 assert (self.owned_locks(locking.LEVEL_NODE) ==
7345 self.owned_locks(locking.LEVEL_NODE_RES))
7347 result = self.rpc.call_instance_shutdown(source_node, instance,
7348 self.op.shutdown_timeout)
7349 msg = result.fail_msg
7351 if self.op.ignore_consistency:
7352 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7353 " Proceeding anyway. Please make sure node"
7354 " %s is down. Error details: %s",
7355 instance.name, source_node, source_node, msg)
7357 raise errors.OpExecError("Could not shutdown instance %s on"
7359 (instance.name, source_node, msg))
7361 # create the target disks
7363 _CreateDisks(self, instance, target_node=target_node)
7364 except errors.OpExecError:
7365 self.LogWarning("Device creation failed, reverting...")
7367 _RemoveDisks(self, instance, target_node=target_node)
7369 self.cfg.ReleaseDRBDMinors(instance.name)
7372 cluster_name = self.cfg.GetClusterInfo().cluster_name
7375 # activate, get path, copy the data over
7376 for idx, disk in enumerate(instance.disks):
7377 self.LogInfo("Copying data for disk %d", idx)
7378 result = self.rpc.call_blockdev_assemble(target_node, disk,
7379 instance.name, True, idx)
7381 self.LogWarning("Can't assemble newly created disk %d: %s",
7382 idx, result.fail_msg)
7383 errs.append(result.fail_msg)
7385 dev_path = result.payload
7386 result = self.rpc.call_blockdev_export(source_node, disk,
7387 target_node, dev_path,
7390 self.LogWarning("Can't copy data over for disk %d: %s",
7391 idx, result.fail_msg)
7392 errs.append(result.fail_msg)
7396 self.LogWarning("Some disks failed to copy, aborting")
7398 _RemoveDisks(self, instance, target_node=target_node)
7400 self.cfg.ReleaseDRBDMinors(instance.name)
7401 raise errors.OpExecError("Errors during disk copy: %s" %
7404 instance.primary_node = target_node
7405 self.cfg.Update(instance, feedback_fn)
7407 self.LogInfo("Removing the disks on the original node")
7408 _RemoveDisks(self, instance, target_node=source_node)
7410 # Only start the instance if it's marked as up
7411 if instance.admin_state == constants.ADMINST_UP:
7412 self.LogInfo("Starting instance %s on node %s",
7413 instance.name, target_node)
7415 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7416 ignore_secondaries=True)
7418 _ShutdownInstanceDisks(self, instance)
7419 raise errors.OpExecError("Can't activate the instance's disks")
7421 result = self.rpc.call_instance_start(target_node,
7422 (instance, None, None), False)
7423 msg = result.fail_msg
7425 _ShutdownInstanceDisks(self, instance)
7426 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7427 (instance.name, target_node, msg))
7430 class LUNodeMigrate(LogicalUnit):
7431 """Migrate all instances from a node.
7434 HPATH = "node-migrate"
7435 HTYPE = constants.HTYPE_NODE
7438 def CheckArguments(self):
7441 def ExpandNames(self):
7442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7444 self.share_locks = _ShareAll()
7445 self.needed_locks = {
7446 locking.LEVEL_NODE: [self.op.node_name],
7449 def BuildHooksEnv(self):
7452 This runs on the master, the primary and all the secondaries.
7456 "NODE_NAME": self.op.node_name,
7459 def BuildHooksNodes(self):
7460 """Build hooks nodes.
7463 nl = [self.cfg.GetMasterNode()]
7466 def CheckPrereq(self):
7469 def Exec(self, feedback_fn):
7470 # Prepare jobs for migration instances
7472 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7475 iallocator=self.op.iallocator,
7476 target_node=self.op.target_node)]
7477 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7480 # TODO: Run iallocator in this opcode and pass correct placement options to
7481 # OpInstanceMigrate. Since other jobs can modify the cluster between
7482 # running the iallocator and the actual migration, a good consistency model
7483 # will have to be found.
7485 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7486 frozenset([self.op.node_name]))
7488 return ResultWithJobs(jobs)
7491 class TLMigrateInstance(Tasklet):
7492 """Tasklet class for instance migration.
7495 @ivar live: whether the migration will be done live or non-live;
7496 this variable is initalized only after CheckPrereq has run
7497 @type cleanup: boolean
7498 @ivar cleanup: Wheater we cleanup from a failed migration
7499 @type iallocator: string
7500 @ivar iallocator: The iallocator used to determine target_node
7501 @type target_node: string
7502 @ivar target_node: If given, the target_node to reallocate the instance to
7503 @type failover: boolean
7504 @ivar failover: Whether operation results in failover or migration
7505 @type fallback: boolean
7506 @ivar fallback: Whether fallback to failover is allowed if migration not
7508 @type ignore_consistency: boolean
7509 @ivar ignore_consistency: Wheter we should ignore consistency between source
7511 @type shutdown_timeout: int
7512 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7517 _MIGRATION_POLL_INTERVAL = 1 # seconds
7518 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7520 def __init__(self, lu, instance_name, cleanup=False,
7521 failover=False, fallback=False,
7522 ignore_consistency=False,
7523 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7524 """Initializes this class.
7527 Tasklet.__init__(self, lu)
7530 self.instance_name = instance_name
7531 self.cleanup = cleanup
7532 self.live = False # will be overridden later
7533 self.failover = failover
7534 self.fallback = fallback
7535 self.ignore_consistency = ignore_consistency
7536 self.shutdown_timeout = shutdown_timeout
7538 def CheckPrereq(self):
7539 """Check prerequisites.
7541 This checks that the instance is in the cluster.
7544 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7545 instance = self.cfg.GetInstanceInfo(instance_name)
7546 assert instance is not None
7547 self.instance = instance
7549 if (not self.cleanup and
7550 not instance.admin_state == constants.ADMINST_UP and
7551 not self.failover and self.fallback):
7552 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7553 " switching to failover")
7554 self.failover = True
7556 if instance.disk_template not in constants.DTS_MIRRORED:
7561 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7562 " %s" % (instance.disk_template, text),
7565 if instance.disk_template in constants.DTS_EXT_MIRROR:
7566 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7568 if self.lu.op.iallocator:
7569 self._RunAllocator()
7571 # We set set self.target_node as it is required by
7573 self.target_node = self.lu.op.target_node
7575 # self.target_node is already populated, either directly or by the
7577 target_node = self.target_node
7578 if self.target_node == instance.primary_node:
7579 raise errors.OpPrereqError("Cannot migrate instance %s"
7580 " to its primary (%s)" %
7581 (instance.name, instance.primary_node))
7583 if len(self.lu.tasklets) == 1:
7584 # It is safe to release locks only when we're the only tasklet
7586 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7587 keep=[instance.primary_node, self.target_node])
7590 secondary_nodes = instance.secondary_nodes
7591 if not secondary_nodes:
7592 raise errors.ConfigurationError("No secondary node but using"
7593 " %s disk template" %
7594 instance.disk_template)
7595 target_node = secondary_nodes[0]
7596 if self.lu.op.iallocator or (self.lu.op.target_node and
7597 self.lu.op.target_node != target_node):
7599 text = "failed over"
7602 raise errors.OpPrereqError("Instances with disk template %s cannot"
7603 " be %s to arbitrary nodes"
7604 " (neither an iallocator nor a target"
7605 " node can be passed)" %
7606 (instance.disk_template, text),
7609 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7611 # check memory requirements on the secondary node
7612 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7613 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7614 instance.name, i_be[constants.BE_MAXMEM],
7615 instance.hypervisor)
7617 self.lu.LogInfo("Not checking memory on the secondary node as"
7618 " instance will not be started")
7620 # check if failover must be forced instead of migration
7621 if (not self.cleanup and not self.failover and
7622 i_be[constants.BE_ALWAYS_FAILOVER]):
7624 self.lu.LogInfo("Instance configured to always failover; fallback"
7626 self.failover = True
7628 raise errors.OpPrereqError("This instance has been configured to"
7629 " always failover, please allow failover",
7632 # check bridge existance
7633 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7635 if not self.cleanup:
7636 _CheckNodeNotDrained(self.lu, target_node)
7637 if not self.failover:
7638 result = self.rpc.call_instance_migratable(instance.primary_node,
7640 if result.fail_msg and self.fallback:
7641 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7643 self.failover = True
7645 result.Raise("Can't migrate, please use failover",
7646 prereq=True, ecode=errors.ECODE_STATE)
7648 assert not (self.failover and self.cleanup)
7650 if not self.failover:
7651 if self.lu.op.live is not None and self.lu.op.mode is not None:
7652 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7653 " parameters are accepted",
7655 if self.lu.op.live is not None:
7657 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7659 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7660 # reset the 'live' parameter to None so that repeated
7661 # invocations of CheckPrereq do not raise an exception
7662 self.lu.op.live = None
7663 elif self.lu.op.mode is None:
7664 # read the default value from the hypervisor
7665 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7667 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7669 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7671 # Failover is never live
7674 def _RunAllocator(self):
7675 """Run the allocator based on input opcode.
7678 ial = IAllocator(self.cfg, self.rpc,
7679 mode=constants.IALLOCATOR_MODE_RELOC,
7680 name=self.instance_name,
7681 # TODO See why hail breaks with a single node below
7682 relocate_from=[self.instance.primary_node,
7683 self.instance.primary_node],
7686 ial.Run(self.lu.op.iallocator)
7689 raise errors.OpPrereqError("Can't compute nodes using"
7690 " iallocator '%s': %s" %
7691 (self.lu.op.iallocator, ial.info),
7693 if len(ial.result) != ial.required_nodes:
7694 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7695 " of nodes (%s), required %s" %
7696 (self.lu.op.iallocator, len(ial.result),
7697 ial.required_nodes), errors.ECODE_FAULT)
7698 self.target_node = ial.result[0]
7699 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7700 self.instance_name, self.lu.op.iallocator,
7701 utils.CommaJoin(ial.result))
7703 def _WaitUntilSync(self):
7704 """Poll with custom rpc for disk sync.
7706 This uses our own step-based rpc call.
7709 self.feedback_fn("* wait until resync is done")
7713 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7715 self.instance.disks)
7717 for node, nres in result.items():
7718 nres.Raise("Cannot resync disks on node %s" % node)
7719 node_done, node_percent = nres.payload
7720 all_done = all_done and node_done
7721 if node_percent is not None:
7722 min_percent = min(min_percent, node_percent)
7724 if min_percent < 100:
7725 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7728 def _EnsureSecondary(self, node):
7729 """Demote a node to secondary.
7732 self.feedback_fn("* switching node %s to secondary mode" % node)
7734 for dev in self.instance.disks:
7735 self.cfg.SetDiskID(dev, node)
7737 result = self.rpc.call_blockdev_close(node, self.instance.name,
7738 self.instance.disks)
7739 result.Raise("Cannot change disk to secondary on node %s" % node)
7741 def _GoStandalone(self):
7742 """Disconnect from the network.
7745 self.feedback_fn("* changing into standalone mode")
7746 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7747 self.instance.disks)
7748 for node, nres in result.items():
7749 nres.Raise("Cannot disconnect disks node %s" % node)
7751 def _GoReconnect(self, multimaster):
7752 """Reconnect to the network.
7758 msg = "single-master"
7759 self.feedback_fn("* changing disks into %s mode" % msg)
7760 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7761 self.instance.disks,
7762 self.instance.name, multimaster)
7763 for node, nres in result.items():
7764 nres.Raise("Cannot change disks config on node %s" % node)
7766 def _ExecCleanup(self):
7767 """Try to cleanup after a failed migration.
7769 The cleanup is done by:
7770 - check that the instance is running only on one node
7771 (and update the config if needed)
7772 - change disks on its secondary node to secondary
7773 - wait until disks are fully synchronized
7774 - disconnect from the network
7775 - change disks into single-master mode
7776 - wait again until disks are fully synchronized
7779 instance = self.instance
7780 target_node = self.target_node
7781 source_node = self.source_node
7783 # check running on only one node
7784 self.feedback_fn("* checking where the instance actually runs"
7785 " (if this hangs, the hypervisor might be in"
7787 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7788 for node, result in ins_l.items():
7789 result.Raise("Can't contact node %s" % node)
7791 runningon_source = instance.name in ins_l[source_node].payload
7792 runningon_target = instance.name in ins_l[target_node].payload
7794 if runningon_source and runningon_target:
7795 raise errors.OpExecError("Instance seems to be running on two nodes,"
7796 " or the hypervisor is confused; you will have"
7797 " to ensure manually that it runs only on one"
7798 " and restart this operation")
7800 if not (runningon_source or runningon_target):
7801 raise errors.OpExecError("Instance does not seem to be running at all;"
7802 " in this case it's safer to repair by"
7803 " running 'gnt-instance stop' to ensure disk"
7804 " shutdown, and then restarting it")
7806 if runningon_target:
7807 # the migration has actually succeeded, we need to update the config
7808 self.feedback_fn("* instance running on secondary node (%s),"
7809 " updating config" % target_node)
7810 instance.primary_node = target_node
7811 self.cfg.Update(instance, self.feedback_fn)
7812 demoted_node = source_node
7814 self.feedback_fn("* instance confirmed to be running on its"
7815 " primary node (%s)" % source_node)
7816 demoted_node = target_node
7818 if instance.disk_template in constants.DTS_INT_MIRROR:
7819 self._EnsureSecondary(demoted_node)
7821 self._WaitUntilSync()
7822 except errors.OpExecError:
7823 # we ignore here errors, since if the device is standalone, it
7824 # won't be able to sync
7826 self._GoStandalone()
7827 self._GoReconnect(False)
7828 self._WaitUntilSync()
7830 self.feedback_fn("* done")
7832 def _RevertDiskStatus(self):
7833 """Try to revert the disk status after a failed migration.
7836 target_node = self.target_node
7837 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7841 self._EnsureSecondary(target_node)
7842 self._GoStandalone()
7843 self._GoReconnect(False)
7844 self._WaitUntilSync()
7845 except errors.OpExecError, err:
7846 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7847 " please try to recover the instance manually;"
7848 " error '%s'" % str(err))
7850 def _AbortMigration(self):
7851 """Call the hypervisor code to abort a started migration.
7854 instance = self.instance
7855 target_node = self.target_node
7856 source_node = self.source_node
7857 migration_info = self.migration_info
7859 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7863 abort_msg = abort_result.fail_msg
7865 logging.error("Aborting migration failed on target node %s: %s",
7866 target_node, abort_msg)
7867 # Don't raise an exception here, as we stil have to try to revert the
7868 # disk status, even if this step failed.
7870 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7871 instance, False, self.live)
7872 abort_msg = abort_result.fail_msg
7874 logging.error("Aborting migration failed on source node %s: %s",
7875 source_node, abort_msg)
7877 def _ExecMigration(self):
7878 """Migrate an instance.
7880 The migrate is done by:
7881 - change the disks into dual-master mode
7882 - wait until disks are fully synchronized again
7883 - migrate the instance
7884 - change disks on the new secondary node (the old primary) to secondary
7885 - wait until disks are fully synchronized
7886 - change disks into single-master mode
7889 instance = self.instance
7890 target_node = self.target_node
7891 source_node = self.source_node
7893 # Check for hypervisor version mismatch and warn the user.
7894 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7895 None, [self.instance.hypervisor])
7896 for ninfo in nodeinfo.values():
7897 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7899 (_, _, (src_info, )) = nodeinfo[source_node].payload
7900 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7902 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7903 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7904 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7905 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7906 if src_version != dst_version:
7907 self.feedback_fn("* warning: hypervisor version mismatch between"
7908 " source (%s) and target (%s) node" %
7909 (src_version, dst_version))
7911 self.feedback_fn("* checking disk consistency between source and target")
7912 for dev in instance.disks:
7913 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7914 raise errors.OpExecError("Disk %s is degraded or not fully"
7915 " synchronized on target node,"
7916 " aborting migration" % dev.iv_name)
7918 # First get the migration information from the remote node
7919 result = self.rpc.call_migration_info(source_node, instance)
7920 msg = result.fail_msg
7922 log_err = ("Failed fetching source migration information from %s: %s" %
7924 logging.error(log_err)
7925 raise errors.OpExecError(log_err)
7927 self.migration_info = migration_info = result.payload
7929 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7930 # Then switch the disks to master/master mode
7931 self._EnsureSecondary(target_node)
7932 self._GoStandalone()
7933 self._GoReconnect(True)
7934 self._WaitUntilSync()
7936 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7937 result = self.rpc.call_accept_instance(target_node,
7940 self.nodes_ip[target_node])
7942 msg = result.fail_msg
7944 logging.error("Instance pre-migration failed, trying to revert"
7945 " disk status: %s", msg)
7946 self.feedback_fn("Pre-migration failed, aborting")
7947 self._AbortMigration()
7948 self._RevertDiskStatus()
7949 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7950 (instance.name, msg))
7952 self.feedback_fn("* migrating instance to %s" % target_node)
7953 result = self.rpc.call_instance_migrate(source_node, instance,
7954 self.nodes_ip[target_node],
7956 msg = result.fail_msg
7958 logging.error("Instance migration failed, trying to revert"
7959 " disk status: %s", msg)
7960 self.feedback_fn("Migration failed, aborting")
7961 self._AbortMigration()
7962 self._RevertDiskStatus()
7963 raise errors.OpExecError("Could not migrate instance %s: %s" %
7964 (instance.name, msg))
7966 self.feedback_fn("* starting memory transfer")
7967 last_feedback = time.time()
7969 result = self.rpc.call_instance_get_migration_status(source_node,
7971 msg = result.fail_msg
7972 ms = result.payload # MigrationStatus instance
7973 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7974 logging.error("Instance migration failed, trying to revert"
7975 " disk status: %s", msg)
7976 self.feedback_fn("Migration failed, aborting")
7977 self._AbortMigration()
7978 self._RevertDiskStatus()
7979 raise errors.OpExecError("Could not migrate instance %s: %s" %
7980 (instance.name, msg))
7982 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7983 self.feedback_fn("* memory transfer complete")
7986 if (utils.TimeoutExpired(last_feedback,
7987 self._MIGRATION_FEEDBACK_INTERVAL) and
7988 ms.transferred_ram is not None):
7989 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7990 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7991 last_feedback = time.time()
7993 time.sleep(self._MIGRATION_POLL_INTERVAL)
7995 result = self.rpc.call_instance_finalize_migration_src(source_node,
7999 msg = result.fail_msg
8001 logging.error("Instance migration succeeded, but finalization failed"
8002 " on the source node: %s", msg)
8003 raise errors.OpExecError("Could not finalize instance migration: %s" %
8006 instance.primary_node = target_node
8008 # distribute new instance config to the other nodes
8009 self.cfg.Update(instance, self.feedback_fn)
8011 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8015 msg = result.fail_msg
8017 logging.error("Instance migration succeeded, but finalization failed"
8018 " on the target node: %s", msg)
8019 raise errors.OpExecError("Could not finalize instance migration: %s" %
8022 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8023 self._EnsureSecondary(source_node)
8024 self._WaitUntilSync()
8025 self._GoStandalone()
8026 self._GoReconnect(False)
8027 self._WaitUntilSync()
8029 self.feedback_fn("* done")
8031 def _ExecFailover(self):
8032 """Failover an instance.
8034 The failover is done by shutting it down on its present node and
8035 starting it on the secondary.
8038 instance = self.instance
8039 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8041 source_node = instance.primary_node
8042 target_node = self.target_node
8044 if instance.admin_state == constants.ADMINST_UP:
8045 self.feedback_fn("* checking disk consistency between source and target")
8046 for dev in instance.disks:
8047 # for drbd, these are drbd over lvm
8048 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8049 if primary_node.offline:
8050 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8052 (primary_node.name, dev.iv_name, target_node))
8053 elif not self.ignore_consistency:
8054 raise errors.OpExecError("Disk %s is degraded on target node,"
8055 " aborting failover" % dev.iv_name)
8057 self.feedback_fn("* not checking disk consistency as instance is not"
8060 self.feedback_fn("* shutting down instance on source node")
8061 logging.info("Shutting down instance %s on node %s",
8062 instance.name, source_node)
8064 result = self.rpc.call_instance_shutdown(source_node, instance,
8065 self.shutdown_timeout)
8066 msg = result.fail_msg
8068 if self.ignore_consistency or primary_node.offline:
8069 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8070 " proceeding anyway; please make sure node"
8071 " %s is down; error details: %s",
8072 instance.name, source_node, source_node, msg)
8074 raise errors.OpExecError("Could not shutdown instance %s on"
8076 (instance.name, source_node, msg))
8078 self.feedback_fn("* deactivating the instance's disks on source node")
8079 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8080 raise errors.OpExecError("Can't shut down the instance's disks")
8082 instance.primary_node = target_node
8083 # distribute new instance config to the other nodes
8084 self.cfg.Update(instance, self.feedback_fn)
8086 # Only start the instance if it's marked as up
8087 if instance.admin_state == constants.ADMINST_UP:
8088 self.feedback_fn("* activating the instance's disks on target node %s" %
8090 logging.info("Starting instance %s on node %s",
8091 instance.name, target_node)
8093 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8094 ignore_secondaries=True)
8096 _ShutdownInstanceDisks(self.lu, instance)
8097 raise errors.OpExecError("Can't activate the instance's disks")
8099 self.feedback_fn("* starting the instance on the target node %s" %
8101 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8103 msg = result.fail_msg
8105 _ShutdownInstanceDisks(self.lu, instance)
8106 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8107 (instance.name, target_node, msg))
8109 def Exec(self, feedback_fn):
8110 """Perform the migration.
8113 self.feedback_fn = feedback_fn
8114 self.source_node = self.instance.primary_node
8116 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8117 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8118 self.target_node = self.instance.secondary_nodes[0]
8119 # Otherwise self.target_node has been populated either
8120 # directly, or through an iallocator.
8122 self.all_nodes = [self.source_node, self.target_node]
8123 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8124 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8127 feedback_fn("Failover instance %s" % self.instance.name)
8128 self._ExecFailover()
8130 feedback_fn("Migrating instance %s" % self.instance.name)
8133 return self._ExecCleanup()
8135 return self._ExecMigration()
8138 def _CreateBlockDev(lu, node, instance, device, force_create,
8140 """Create a tree of block devices on a given node.
8142 If this device type has to be created on secondaries, create it and
8145 If not, just recurse to children keeping the same 'force' value.
8147 @param lu: the lu on whose behalf we execute
8148 @param node: the node on which to create the device
8149 @type instance: L{objects.Instance}
8150 @param instance: the instance which owns the device
8151 @type device: L{objects.Disk}
8152 @param device: the device to create
8153 @type force_create: boolean
8154 @param force_create: whether to force creation of this device; this
8155 will be change to True whenever we find a device which has
8156 CreateOnSecondary() attribute
8157 @param info: the extra 'metadata' we should attach to the device
8158 (this will be represented as a LVM tag)
8159 @type force_open: boolean
8160 @param force_open: this parameter will be passes to the
8161 L{backend.BlockdevCreate} function where it specifies
8162 whether we run on primary or not, and it affects both
8163 the child assembly and the device own Open() execution
8166 if device.CreateOnSecondary():
8170 for child in device.children:
8171 _CreateBlockDev(lu, node, instance, child, force_create,
8174 if not force_create:
8177 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8180 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8181 """Create a single block device on a given node.
8183 This will not recurse over children of the device, so they must be
8186 @param lu: the lu on whose behalf we execute
8187 @param node: the node on which to create the device
8188 @type instance: L{objects.Instance}
8189 @param instance: the instance which owns the device
8190 @type device: L{objects.Disk}
8191 @param device: the device to create
8192 @param info: the extra 'metadata' we should attach to the device
8193 (this will be represented as a LVM tag)
8194 @type force_open: boolean
8195 @param force_open: this parameter will be passes to the
8196 L{backend.BlockdevCreate} function where it specifies
8197 whether we run on primary or not, and it affects both
8198 the child assembly and the device own Open() execution
8201 lu.cfg.SetDiskID(device, node)
8202 result = lu.rpc.call_blockdev_create(node, device, device.size,
8203 instance.name, force_open, info)
8204 result.Raise("Can't create block device %s on"
8205 " node %s for instance %s" % (device, node, instance.name))
8206 if device.physical_id is None:
8207 device.physical_id = result.payload
8210 def _GenerateUniqueNames(lu, exts):
8211 """Generate a suitable LV name.
8213 This will generate a logical volume name for the given instance.
8218 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8219 results.append("%s%s" % (new_id, val))
8223 def _ComputeLDParams(disk_template, disk_params):
8224 """Computes Logical Disk parameters from Disk Template parameters.
8226 @type disk_template: string
8227 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8228 @type disk_params: dict
8229 @param disk_params: disk template parameters; dict(template_name -> parameters
8231 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8232 contains the LD parameters of the node. The tree is flattened in-order.
8235 if disk_template not in constants.DISK_TEMPLATES:
8236 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8239 dt_params = disk_params[disk_template]
8240 if disk_template == constants.DT_DRBD8:
8242 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8243 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8244 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8245 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8246 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8247 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8248 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8249 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8250 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8251 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8252 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8253 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8257 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8260 result.append(drbd_params)
8264 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8267 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8269 result.append(data_params)
8273 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8276 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8278 result.append(meta_params)
8280 elif (disk_template == constants.DT_FILE or
8281 disk_template == constants.DT_SHARED_FILE):
8282 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8284 elif disk_template == constants.DT_PLAIN:
8286 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8289 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8291 result.append(params)
8293 elif disk_template == constants.DT_BLOCK:
8294 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8299 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8300 iv_name, p_minor, s_minor, drbd_params, data_params,
8302 """Generate a drbd8 device complete with its children.
8305 assert len(vgnames) == len(names) == 2
8306 port = lu.cfg.AllocatePort()
8307 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8309 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8310 logical_id=(vgnames[0], names[0]),
8312 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8313 logical_id=(vgnames[1], names[1]),
8315 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8316 logical_id=(primary, secondary, port,
8319 children=[dev_data, dev_meta],
8320 iv_name=iv_name, params=drbd_params)
8324 def _GenerateDiskTemplate(lu, template_name,
8325 instance_name, primary_node,
8326 secondary_nodes, disk_info,
8327 file_storage_dir, file_driver,
8328 base_index, feedback_fn, disk_params):
8329 """Generate the entire disk layout for a given template type.
8332 #TODO: compute space requirements
8334 vgname = lu.cfg.GetVGName()
8335 disk_count = len(disk_info)
8337 ld_params = _ComputeLDParams(template_name, disk_params)
8338 if template_name == constants.DT_DISKLESS:
8340 elif template_name == constants.DT_PLAIN:
8341 if len(secondary_nodes) != 0:
8342 raise errors.ProgrammerError("Wrong template configuration")
8344 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8345 for i in range(disk_count)])
8346 for idx, disk in enumerate(disk_info):
8347 disk_index = idx + base_index
8348 vg = disk.get(constants.IDISK_VG, vgname)
8349 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8350 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8351 size=disk[constants.IDISK_SIZE],
8352 logical_id=(vg, names[idx]),
8353 iv_name="disk/%d" % disk_index,
8354 mode=disk[constants.IDISK_MODE],
8355 params=ld_params[0])
8356 disks.append(disk_dev)
8357 elif template_name == constants.DT_DRBD8:
8358 drbd_params, data_params, meta_params = ld_params
8359 if len(secondary_nodes) != 1:
8360 raise errors.ProgrammerError("Wrong template configuration")
8361 remote_node = secondary_nodes[0]
8362 minors = lu.cfg.AllocateDRBDMinor(
8363 [primary_node, remote_node] * len(disk_info), instance_name)
8366 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8367 for i in range(disk_count)]):
8368 names.append(lv_prefix + "_data")
8369 names.append(lv_prefix + "_meta")
8370 for idx, disk in enumerate(disk_info):
8371 disk_index = idx + base_index
8372 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8373 data_vg = disk.get(constants.IDISK_VG, vgname)
8374 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8375 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8376 disk[constants.IDISK_SIZE],
8378 names[idx * 2:idx * 2 + 2],
8379 "disk/%d" % disk_index,
8380 minors[idx * 2], minors[idx * 2 + 1],
8381 drbd_params, data_params, meta_params)
8382 disk_dev.mode = disk[constants.IDISK_MODE]
8383 disks.append(disk_dev)
8384 elif template_name == constants.DT_FILE:
8385 if len(secondary_nodes) != 0:
8386 raise errors.ProgrammerError("Wrong template configuration")
8388 opcodes.RequireFileStorage()
8390 for idx, disk in enumerate(disk_info):
8391 disk_index = idx + base_index
8392 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8393 size=disk[constants.IDISK_SIZE],
8394 iv_name="disk/%d" % disk_index,
8395 logical_id=(file_driver,
8396 "%s/disk%d" % (file_storage_dir,
8398 mode=disk[constants.IDISK_MODE],
8399 params=ld_params[0])
8400 disks.append(disk_dev)
8401 elif template_name == constants.DT_SHARED_FILE:
8402 if len(secondary_nodes) != 0:
8403 raise errors.ProgrammerError("Wrong template configuration")
8405 opcodes.RequireSharedFileStorage()
8407 for idx, disk in enumerate(disk_info):
8408 disk_index = idx + base_index
8409 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8410 size=disk[constants.IDISK_SIZE],
8411 iv_name="disk/%d" % disk_index,
8412 logical_id=(file_driver,
8413 "%s/disk%d" % (file_storage_dir,
8415 mode=disk[constants.IDISK_MODE],
8416 params=ld_params[0])
8417 disks.append(disk_dev)
8418 elif template_name == constants.DT_BLOCK:
8419 if len(secondary_nodes) != 0:
8420 raise errors.ProgrammerError("Wrong template configuration")
8422 for idx, disk in enumerate(disk_info):
8423 disk_index = idx + base_index
8424 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8425 size=disk[constants.IDISK_SIZE],
8426 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8427 disk[constants.IDISK_ADOPT]),
8428 iv_name="disk/%d" % disk_index,
8429 mode=disk[constants.IDISK_MODE],
8430 params=ld_params[0])
8431 disks.append(disk_dev)
8434 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8438 def _GetInstanceInfoText(instance):
8439 """Compute that text that should be added to the disk's metadata.
8442 return "originstname+%s" % instance.name
8445 def _CalcEta(time_taken, written, total_size):
8446 """Calculates the ETA based on size written and total size.
8448 @param time_taken: The time taken so far
8449 @param written: amount written so far
8450 @param total_size: The total size of data to be written
8451 @return: The remaining time in seconds
8454 avg_time = time_taken / float(written)
8455 return (total_size - written) * avg_time
8458 def _WipeDisks(lu, instance):
8459 """Wipes instance disks.
8461 @type lu: L{LogicalUnit}
8462 @param lu: the logical unit on whose behalf we execute
8463 @type instance: L{objects.Instance}
8464 @param instance: the instance whose disks we should create
8465 @return: the success of the wipe
8468 node = instance.primary_node
8470 for device in instance.disks:
8471 lu.cfg.SetDiskID(device, node)
8473 logging.info("Pause sync of instance %s disks", instance.name)
8474 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8476 for idx, success in enumerate(result.payload):
8478 logging.warn("pause-sync of instance %s for disks %d failed",
8482 for idx, device in enumerate(instance.disks):
8483 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8484 # MAX_WIPE_CHUNK at max
8485 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8486 constants.MIN_WIPE_CHUNK_PERCENT)
8487 # we _must_ make this an int, otherwise rounding errors will
8489 wipe_chunk_size = int(wipe_chunk_size)
8491 lu.LogInfo("* Wiping disk %d", idx)
8492 logging.info("Wiping disk %d for instance %s, node %s using"
8493 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8498 start_time = time.time()
8500 while offset < size:
8501 wipe_size = min(wipe_chunk_size, size - offset)
8502 logging.debug("Wiping disk %d, offset %s, chunk %s",
8503 idx, offset, wipe_size)
8504 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8505 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8506 (idx, offset, wipe_size))
8509 if now - last_output >= 60:
8510 eta = _CalcEta(now - start_time, offset, size)
8511 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8512 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8515 logging.info("Resume sync of instance %s disks", instance.name)
8517 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8519 for idx, success in enumerate(result.payload):
8521 lu.LogWarning("Resume sync of disk %d failed, please have a"
8522 " look at the status and troubleshoot the issue", idx)
8523 logging.warn("resume-sync of instance %s for disks %d failed",
8527 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8528 """Create all disks for an instance.
8530 This abstracts away some work from AddInstance.
8532 @type lu: L{LogicalUnit}
8533 @param lu: the logical unit on whose behalf we execute
8534 @type instance: L{objects.Instance}
8535 @param instance: the instance whose disks we should create
8537 @param to_skip: list of indices to skip
8538 @type target_node: string
8539 @param target_node: if passed, overrides the target node for creation
8541 @return: the success of the creation
8544 info = _GetInstanceInfoText(instance)
8545 if target_node is None:
8546 pnode = instance.primary_node
8547 all_nodes = instance.all_nodes
8552 if instance.disk_template in constants.DTS_FILEBASED:
8553 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8554 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8556 result.Raise("Failed to create directory '%s' on"
8557 " node %s" % (file_storage_dir, pnode))
8559 # Note: this needs to be kept in sync with adding of disks in
8560 # LUInstanceSetParams
8561 for idx, device in enumerate(instance.disks):
8562 if to_skip and idx in to_skip:
8564 logging.info("Creating volume %s for instance %s",
8565 device.iv_name, instance.name)
8567 for node in all_nodes:
8568 f_create = node == pnode
8569 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8572 def _RemoveDisks(lu, instance, target_node=None):
8573 """Remove all disks for an instance.
8575 This abstracts away some work from `AddInstance()` and
8576 `RemoveInstance()`. Note that in case some of the devices couldn't
8577 be removed, the removal will continue with the other ones (compare
8578 with `_CreateDisks()`).
8580 @type lu: L{LogicalUnit}
8581 @param lu: the logical unit on whose behalf we execute
8582 @type instance: L{objects.Instance}
8583 @param instance: the instance whose disks we should remove
8584 @type target_node: string
8585 @param target_node: used to override the node on which to remove the disks
8587 @return: the success of the removal
8590 logging.info("Removing block devices for instance %s", instance.name)
8593 for device in instance.disks:
8595 edata = [(target_node, device)]
8597 edata = device.ComputeNodeTree(instance.primary_node)
8598 for node, disk in edata:
8599 lu.cfg.SetDiskID(disk, node)
8600 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8602 lu.LogWarning("Could not remove block device %s on node %s,"
8603 " continuing anyway: %s", device.iv_name, node, msg)
8606 # if this is a DRBD disk, return its port to the pool
8607 if device.dev_type in constants.LDS_DRBD:
8608 tcp_port = device.logical_id[2]
8609 lu.cfg.AddTcpUdpPort(tcp_port)
8611 if instance.disk_template == constants.DT_FILE:
8612 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8616 tgt = instance.primary_node
8617 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8619 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8620 file_storage_dir, instance.primary_node, result.fail_msg)
8626 def _ComputeDiskSizePerVG(disk_template, disks):
8627 """Compute disk size requirements in the volume group
8630 def _compute(disks, payload):
8631 """Universal algorithm.
8636 vgs[disk[constants.IDISK_VG]] = \
8637 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8641 # Required free disk space as a function of disk and swap space
8643 constants.DT_DISKLESS: {},
8644 constants.DT_PLAIN: _compute(disks, 0),
8645 # 128 MB are added for drbd metadata for each disk
8646 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8647 constants.DT_FILE: {},
8648 constants.DT_SHARED_FILE: {},
8651 if disk_template not in req_size_dict:
8652 raise errors.ProgrammerError("Disk template '%s' size requirement"
8653 " is unknown" % disk_template)
8655 return req_size_dict[disk_template]
8658 def _ComputeDiskSize(disk_template, disks):
8659 """Compute disk size requirements in the volume group
8662 # Required free disk space as a function of disk and swap space
8664 constants.DT_DISKLESS: None,
8665 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8666 # 128 MB are added for drbd metadata for each disk
8668 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8669 constants.DT_FILE: None,
8670 constants.DT_SHARED_FILE: 0,
8671 constants.DT_BLOCK: 0,
8674 if disk_template not in req_size_dict:
8675 raise errors.ProgrammerError("Disk template '%s' size requirement"
8676 " is unknown" % disk_template)
8678 return req_size_dict[disk_template]
8681 def _FilterVmNodes(lu, nodenames):
8682 """Filters out non-vm_capable nodes from a list.
8684 @type lu: L{LogicalUnit}
8685 @param lu: the logical unit for which we check
8686 @type nodenames: list
8687 @param nodenames: the list of nodes on which we should check
8689 @return: the list of vm-capable nodes
8692 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8693 return [name for name in nodenames if name not in vm_nodes]
8696 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8697 """Hypervisor parameter validation.
8699 This function abstract the hypervisor parameter validation to be
8700 used in both instance create and instance modify.
8702 @type lu: L{LogicalUnit}
8703 @param lu: the logical unit for which we check
8704 @type nodenames: list
8705 @param nodenames: the list of nodes on which we should check
8706 @type hvname: string
8707 @param hvname: the name of the hypervisor we should use
8708 @type hvparams: dict
8709 @param hvparams: the parameters which we need to check
8710 @raise errors.OpPrereqError: if the parameters are not valid
8713 nodenames = _FilterVmNodes(lu, nodenames)
8715 cluster = lu.cfg.GetClusterInfo()
8716 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8718 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8719 for node in nodenames:
8723 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8726 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8727 """OS parameters validation.
8729 @type lu: L{LogicalUnit}
8730 @param lu: the logical unit for which we check
8731 @type required: boolean
8732 @param required: whether the validation should fail if the OS is not
8734 @type nodenames: list
8735 @param nodenames: the list of nodes on which we should check
8736 @type osname: string
8737 @param osname: the name of the hypervisor we should use
8738 @type osparams: dict
8739 @param osparams: the parameters which we need to check
8740 @raise errors.OpPrereqError: if the parameters are not valid
8743 nodenames = _FilterVmNodes(lu, nodenames)
8744 result = lu.rpc.call_os_validate(nodenames, required, osname,
8745 [constants.OS_VALIDATE_PARAMETERS],
8747 for node, nres in result.items():
8748 # we don't check for offline cases since this should be run only
8749 # against the master node and/or an instance's nodes
8750 nres.Raise("OS Parameters validation failed on node %s" % node)
8751 if not nres.payload:
8752 lu.LogInfo("OS %s not found on node %s, validation skipped",
8756 class LUInstanceCreate(LogicalUnit):
8757 """Create an instance.
8760 HPATH = "instance-add"
8761 HTYPE = constants.HTYPE_INSTANCE
8764 def CheckArguments(self):
8768 # do not require name_check to ease forward/backward compatibility
8770 if self.op.no_install and self.op.start:
8771 self.LogInfo("No-installation mode selected, disabling startup")
8772 self.op.start = False
8773 # validate/normalize the instance name
8774 self.op.instance_name = \
8775 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8777 if self.op.ip_check and not self.op.name_check:
8778 # TODO: make the ip check more flexible and not depend on the name check
8779 raise errors.OpPrereqError("Cannot do IP address check without a name"
8780 " check", errors.ECODE_INVAL)
8782 # check nics' parameter names
8783 for nic in self.op.nics:
8784 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8786 # check disks. parameter names and consistent adopt/no-adopt strategy
8787 has_adopt = has_no_adopt = False
8788 for disk in self.op.disks:
8789 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8790 if constants.IDISK_ADOPT in disk:
8794 if has_adopt and has_no_adopt:
8795 raise errors.OpPrereqError("Either all disks are adopted or none is",
8798 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8799 raise errors.OpPrereqError("Disk adoption is not supported for the"
8800 " '%s' disk template" %
8801 self.op.disk_template,
8803 if self.op.iallocator is not None:
8804 raise errors.OpPrereqError("Disk adoption not allowed with an"
8805 " iallocator script", errors.ECODE_INVAL)
8806 if self.op.mode == constants.INSTANCE_IMPORT:
8807 raise errors.OpPrereqError("Disk adoption not allowed for"
8808 " instance import", errors.ECODE_INVAL)
8810 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8811 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8812 " but no 'adopt' parameter given" %
8813 self.op.disk_template,
8816 self.adopt_disks = has_adopt
8818 # instance name verification
8819 if self.op.name_check:
8820 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8821 self.op.instance_name = self.hostname1.name
8822 # used in CheckPrereq for ip ping check
8823 self.check_ip = self.hostname1.ip
8825 self.check_ip = None
8827 # file storage checks
8828 if (self.op.file_driver and
8829 not self.op.file_driver in constants.FILE_DRIVER):
8830 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8831 self.op.file_driver, errors.ECODE_INVAL)
8833 if self.op.disk_template == constants.DT_FILE:
8834 opcodes.RequireFileStorage()
8835 elif self.op.disk_template == constants.DT_SHARED_FILE:
8836 opcodes.RequireSharedFileStorage()
8838 ### Node/iallocator related checks
8839 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8841 if self.op.pnode is not None:
8842 if self.op.disk_template in constants.DTS_INT_MIRROR:
8843 if self.op.snode is None:
8844 raise errors.OpPrereqError("The networked disk templates need"
8845 " a mirror node", errors.ECODE_INVAL)
8847 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8849 self.op.snode = None
8851 self._cds = _GetClusterDomainSecret()
8853 if self.op.mode == constants.INSTANCE_IMPORT:
8854 # On import force_variant must be True, because if we forced it at
8855 # initial install, our only chance when importing it back is that it
8857 self.op.force_variant = True
8859 if self.op.no_install:
8860 self.LogInfo("No-installation mode has no effect during import")
8862 elif self.op.mode == constants.INSTANCE_CREATE:
8863 if self.op.os_type is None:
8864 raise errors.OpPrereqError("No guest OS specified",
8866 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8867 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8868 " installation" % self.op.os_type,
8870 if self.op.disk_template is None:
8871 raise errors.OpPrereqError("No disk template specified",
8874 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8875 # Check handshake to ensure both clusters have the same domain secret
8876 src_handshake = self.op.source_handshake
8877 if not src_handshake:
8878 raise errors.OpPrereqError("Missing source handshake",
8881 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8884 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8887 # Load and check source CA
8888 self.source_x509_ca_pem = self.op.source_x509_ca
8889 if not self.source_x509_ca_pem:
8890 raise errors.OpPrereqError("Missing source X509 CA",
8894 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8896 except OpenSSL.crypto.Error, err:
8897 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8898 (err, ), errors.ECODE_INVAL)
8900 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8901 if errcode is not None:
8902 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8905 self.source_x509_ca = cert
8907 src_instance_name = self.op.source_instance_name
8908 if not src_instance_name:
8909 raise errors.OpPrereqError("Missing source instance name",
8912 self.source_instance_name = \
8913 netutils.GetHostname(name=src_instance_name).name
8916 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8917 self.op.mode, errors.ECODE_INVAL)
8919 def ExpandNames(self):
8920 """ExpandNames for CreateInstance.
8922 Figure out the right locks for instance creation.
8925 self.needed_locks = {}
8927 instance_name = self.op.instance_name
8928 # this is just a preventive check, but someone might still add this
8929 # instance in the meantime, and creation will fail at lock-add time
8930 if instance_name in self.cfg.GetInstanceList():
8931 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8932 instance_name, errors.ECODE_EXISTS)
8934 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8936 if self.op.iallocator:
8937 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8938 # specifying a group on instance creation and then selecting nodes from
8940 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8941 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8943 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8944 nodelist = [self.op.pnode]
8945 if self.op.snode is not None:
8946 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8947 nodelist.append(self.op.snode)
8948 self.needed_locks[locking.LEVEL_NODE] = nodelist
8949 # Lock resources of instance's primary and secondary nodes (copy to
8950 # prevent accidential modification)
8951 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8953 # in case of import lock the source node too
8954 if self.op.mode == constants.INSTANCE_IMPORT:
8955 src_node = self.op.src_node
8956 src_path = self.op.src_path
8958 if src_path is None:
8959 self.op.src_path = src_path = self.op.instance_name
8961 if src_node is None:
8962 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8963 self.op.src_node = None
8964 if os.path.isabs(src_path):
8965 raise errors.OpPrereqError("Importing an instance from a path"
8966 " requires a source node option",
8969 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8970 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8971 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8972 if not os.path.isabs(src_path):
8973 self.op.src_path = src_path = \
8974 utils.PathJoin(constants.EXPORT_DIR, src_path)
8976 def _RunAllocator(self):
8977 """Run the allocator based on input opcode.
8980 nics = [n.ToDict() for n in self.nics]
8981 ial = IAllocator(self.cfg, self.rpc,
8982 mode=constants.IALLOCATOR_MODE_ALLOC,
8983 name=self.op.instance_name,
8984 disk_template=self.op.disk_template,
8987 vcpus=self.be_full[constants.BE_VCPUS],
8988 memory=self.be_full[constants.BE_MAXMEM],
8991 hypervisor=self.op.hypervisor,
8994 ial.Run(self.op.iallocator)
8997 raise errors.OpPrereqError("Can't compute nodes using"
8998 " iallocator '%s': %s" %
8999 (self.op.iallocator, ial.info),
9001 if len(ial.result) != ial.required_nodes:
9002 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9003 " of nodes (%s), required %s" %
9004 (self.op.iallocator, len(ial.result),
9005 ial.required_nodes), errors.ECODE_FAULT)
9006 self.op.pnode = ial.result[0]
9007 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9008 self.op.instance_name, self.op.iallocator,
9009 utils.CommaJoin(ial.result))
9010 if ial.required_nodes == 2:
9011 self.op.snode = ial.result[1]
9013 def BuildHooksEnv(self):
9016 This runs on master, primary and secondary nodes of the instance.
9020 "ADD_MODE": self.op.mode,
9022 if self.op.mode == constants.INSTANCE_IMPORT:
9023 env["SRC_NODE"] = self.op.src_node
9024 env["SRC_PATH"] = self.op.src_path
9025 env["SRC_IMAGES"] = self.src_images
9027 env.update(_BuildInstanceHookEnv(
9028 name=self.op.instance_name,
9029 primary_node=self.op.pnode,
9030 secondary_nodes=self.secondaries,
9031 status=self.op.start,
9032 os_type=self.op.os_type,
9033 minmem=self.be_full[constants.BE_MINMEM],
9034 maxmem=self.be_full[constants.BE_MAXMEM],
9035 vcpus=self.be_full[constants.BE_VCPUS],
9036 nics=_NICListToTuple(self, self.nics),
9037 disk_template=self.op.disk_template,
9038 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9039 for d in self.disks],
9042 hypervisor_name=self.op.hypervisor,
9048 def BuildHooksNodes(self):
9049 """Build hooks nodes.
9052 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9055 def _ReadExportInfo(self):
9056 """Reads the export information from disk.
9058 It will override the opcode source node and path with the actual
9059 information, if these two were not specified before.
9061 @return: the export information
9064 assert self.op.mode == constants.INSTANCE_IMPORT
9066 src_node = self.op.src_node
9067 src_path = self.op.src_path
9069 if src_node is None:
9070 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9071 exp_list = self.rpc.call_export_list(locked_nodes)
9073 for node in exp_list:
9074 if exp_list[node].fail_msg:
9076 if src_path in exp_list[node].payload:
9078 self.op.src_node = src_node = node
9079 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9083 raise errors.OpPrereqError("No export found for relative path %s" %
9084 src_path, errors.ECODE_INVAL)
9086 _CheckNodeOnline(self, src_node)
9087 result = self.rpc.call_export_info(src_node, src_path)
9088 result.Raise("No export or invalid export found in dir %s" % src_path)
9090 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9091 if not export_info.has_section(constants.INISECT_EXP):
9092 raise errors.ProgrammerError("Corrupted export config",
9093 errors.ECODE_ENVIRON)
9095 ei_version = export_info.get(constants.INISECT_EXP, "version")
9096 if (int(ei_version) != constants.EXPORT_VERSION):
9097 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9098 (ei_version, constants.EXPORT_VERSION),
9099 errors.ECODE_ENVIRON)
9102 def _ReadExportParams(self, einfo):
9103 """Use export parameters as defaults.
9105 In case the opcode doesn't specify (as in override) some instance
9106 parameters, then try to use them from the export information, if
9110 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9112 if self.op.disk_template is None:
9113 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9114 self.op.disk_template = einfo.get(constants.INISECT_INS,
9116 if self.op.disk_template not in constants.DISK_TEMPLATES:
9117 raise errors.OpPrereqError("Disk template specified in configuration"
9118 " file is not one of the allowed values:"
9119 " %s" % " ".join(constants.DISK_TEMPLATES))
9121 raise errors.OpPrereqError("No disk template specified and the export"
9122 " is missing the disk_template information",
9125 if not self.op.disks:
9127 # TODO: import the disk iv_name too
9128 for idx in range(constants.MAX_DISKS):
9129 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9130 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9131 disks.append({constants.IDISK_SIZE: disk_sz})
9132 self.op.disks = disks
9133 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9134 raise errors.OpPrereqError("No disk info specified and the export"
9135 " is missing the disk information",
9138 if not self.op.nics:
9140 for idx in range(constants.MAX_NICS):
9141 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9143 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9144 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9151 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9152 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9154 if (self.op.hypervisor is None and
9155 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9156 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9158 if einfo.has_section(constants.INISECT_HYP):
9159 # use the export parameters but do not override the ones
9160 # specified by the user
9161 for name, value in einfo.items(constants.INISECT_HYP):
9162 if name not in self.op.hvparams:
9163 self.op.hvparams[name] = value
9165 if einfo.has_section(constants.INISECT_BEP):
9166 # use the parameters, without overriding
9167 for name, value in einfo.items(constants.INISECT_BEP):
9168 if name not in self.op.beparams:
9169 self.op.beparams[name] = value
9170 # Compatibility for the old "memory" be param
9171 if name == constants.BE_MEMORY:
9172 if constants.BE_MAXMEM not in self.op.beparams:
9173 self.op.beparams[constants.BE_MAXMEM] = value
9174 if constants.BE_MINMEM not in self.op.beparams:
9175 self.op.beparams[constants.BE_MINMEM] = value
9177 # try to read the parameters old style, from the main section
9178 for name in constants.BES_PARAMETERS:
9179 if (name not in self.op.beparams and
9180 einfo.has_option(constants.INISECT_INS, name)):
9181 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9183 if einfo.has_section(constants.INISECT_OSP):
9184 # use the parameters, without overriding
9185 for name, value in einfo.items(constants.INISECT_OSP):
9186 if name not in self.op.osparams:
9187 self.op.osparams[name] = value
9189 def _RevertToDefaults(self, cluster):
9190 """Revert the instance parameters to the default values.
9194 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9195 for name in self.op.hvparams.keys():
9196 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9197 del self.op.hvparams[name]
9199 be_defs = cluster.SimpleFillBE({})
9200 for name in self.op.beparams.keys():
9201 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9202 del self.op.beparams[name]
9204 nic_defs = cluster.SimpleFillNIC({})
9205 for nic in self.op.nics:
9206 for name in constants.NICS_PARAMETERS:
9207 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9210 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9211 for name in self.op.osparams.keys():
9212 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9213 del self.op.osparams[name]
9215 def _CalculateFileStorageDir(self):
9216 """Calculate final instance file storage dir.
9219 # file storage dir calculation/check
9220 self.instance_file_storage_dir = None
9221 if self.op.disk_template in constants.DTS_FILEBASED:
9222 # build the full file storage dir path
9225 if self.op.disk_template == constants.DT_SHARED_FILE:
9226 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9228 get_fsd_fn = self.cfg.GetFileStorageDir
9230 cfg_storagedir = get_fsd_fn()
9231 if not cfg_storagedir:
9232 raise errors.OpPrereqError("Cluster file storage dir not defined")
9233 joinargs.append(cfg_storagedir)
9235 if self.op.file_storage_dir is not None:
9236 joinargs.append(self.op.file_storage_dir)
9238 joinargs.append(self.op.instance_name)
9240 # pylint: disable=W0142
9241 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9243 def CheckPrereq(self):
9244 """Check prerequisites.
9247 self._CalculateFileStorageDir()
9249 if self.op.mode == constants.INSTANCE_IMPORT:
9250 export_info = self._ReadExportInfo()
9251 self._ReadExportParams(export_info)
9253 if (not self.cfg.GetVGName() and
9254 self.op.disk_template not in constants.DTS_NOT_LVM):
9255 raise errors.OpPrereqError("Cluster does not support lvm-based"
9256 " instances", errors.ECODE_STATE)
9258 if (self.op.hypervisor is None or
9259 self.op.hypervisor == constants.VALUE_AUTO):
9260 self.op.hypervisor = self.cfg.GetHypervisorType()
9262 cluster = self.cfg.GetClusterInfo()
9263 enabled_hvs = cluster.enabled_hypervisors
9264 if self.op.hypervisor not in enabled_hvs:
9265 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9266 " cluster (%s)" % (self.op.hypervisor,
9267 ",".join(enabled_hvs)),
9270 # Check tag validity
9271 for tag in self.op.tags:
9272 objects.TaggableObject.ValidateTag(tag)
9274 # check hypervisor parameter syntax (locally)
9275 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9276 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9278 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9279 hv_type.CheckParameterSyntax(filled_hvp)
9280 self.hv_full = filled_hvp
9281 # check that we don't specify global parameters on an instance
9282 _CheckGlobalHvParams(self.op.hvparams)
9284 # fill and remember the beparams dict
9285 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9286 for param, value in self.op.beparams.iteritems():
9287 if value == constants.VALUE_AUTO:
9288 self.op.beparams[param] = default_beparams[param]
9289 objects.UpgradeBeParams(self.op.beparams)
9290 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9291 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9293 # build os parameters
9294 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9296 # now that hvp/bep are in final format, let's reset to defaults,
9298 if self.op.identify_defaults:
9299 self._RevertToDefaults(cluster)
9303 for idx, nic in enumerate(self.op.nics):
9304 nic_mode_req = nic.get(constants.INIC_MODE, None)
9305 nic_mode = nic_mode_req
9306 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9307 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9309 # in routed mode, for the first nic, the default ip is 'auto'
9310 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9311 default_ip_mode = constants.VALUE_AUTO
9313 default_ip_mode = constants.VALUE_NONE
9315 # ip validity checks
9316 ip = nic.get(constants.INIC_IP, default_ip_mode)
9317 if ip is None or ip.lower() == constants.VALUE_NONE:
9319 elif ip.lower() == constants.VALUE_AUTO:
9320 if not self.op.name_check:
9321 raise errors.OpPrereqError("IP address set to auto but name checks"
9322 " have been skipped",
9324 nic_ip = self.hostname1.ip
9326 if not netutils.IPAddress.IsValid(ip):
9327 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9331 # TODO: check the ip address for uniqueness
9332 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9333 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9336 # MAC address verification
9337 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9338 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9339 mac = utils.NormalizeAndValidateMac(mac)
9342 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9343 except errors.ReservationError:
9344 raise errors.OpPrereqError("MAC address %s already in use"
9345 " in cluster" % mac,
9346 errors.ECODE_NOTUNIQUE)
9348 # Build nic parameters
9349 link = nic.get(constants.INIC_LINK, None)
9350 if link == constants.VALUE_AUTO:
9351 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9354 nicparams[constants.NIC_MODE] = nic_mode
9356 nicparams[constants.NIC_LINK] = link
9358 check_params = cluster.SimpleFillNIC(nicparams)
9359 objects.NIC.CheckParameterSyntax(check_params)
9360 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9362 # disk checks/pre-build
9363 default_vg = self.cfg.GetVGName()
9365 for disk in self.op.disks:
9366 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9367 if mode not in constants.DISK_ACCESS_SET:
9368 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9369 mode, errors.ECODE_INVAL)
9370 size = disk.get(constants.IDISK_SIZE, None)
9372 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9375 except (TypeError, ValueError):
9376 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9379 data_vg = disk.get(constants.IDISK_VG, default_vg)
9381 constants.IDISK_SIZE: size,
9382 constants.IDISK_MODE: mode,
9383 constants.IDISK_VG: data_vg,
9385 if constants.IDISK_METAVG in disk:
9386 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9387 if constants.IDISK_ADOPT in disk:
9388 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9389 self.disks.append(new_disk)
9391 if self.op.mode == constants.INSTANCE_IMPORT:
9393 for idx in range(len(self.disks)):
9394 option = "disk%d_dump" % idx
9395 if export_info.has_option(constants.INISECT_INS, option):
9396 # FIXME: are the old os-es, disk sizes, etc. useful?
9397 export_name = export_info.get(constants.INISECT_INS, option)
9398 image = utils.PathJoin(self.op.src_path, export_name)
9399 disk_images.append(image)
9401 disk_images.append(False)
9403 self.src_images = disk_images
9405 old_name = export_info.get(constants.INISECT_INS, "name")
9406 if self.op.instance_name == old_name:
9407 for idx, nic in enumerate(self.nics):
9408 if nic.mac == constants.VALUE_AUTO:
9409 nic_mac_ini = "nic%d_mac" % idx
9410 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9412 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9414 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9415 if self.op.ip_check:
9416 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9417 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9418 (self.check_ip, self.op.instance_name),
9419 errors.ECODE_NOTUNIQUE)
9421 #### mac address generation
9422 # By generating here the mac address both the allocator and the hooks get
9423 # the real final mac address rather than the 'auto' or 'generate' value.
9424 # There is a race condition between the generation and the instance object
9425 # creation, which means that we know the mac is valid now, but we're not
9426 # sure it will be when we actually add the instance. If things go bad
9427 # adding the instance will abort because of a duplicate mac, and the
9428 # creation job will fail.
9429 for nic in self.nics:
9430 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9431 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9435 if self.op.iallocator is not None:
9436 self._RunAllocator()
9438 # Release all unneeded node locks
9439 _ReleaseLocks(self, locking.LEVEL_NODE,
9440 keep=filter(None, [self.op.pnode, self.op.snode,
9443 #### node related checks
9445 # check primary node
9446 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9447 assert self.pnode is not None, \
9448 "Cannot retrieve locked node %s" % self.op.pnode
9450 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9451 pnode.name, errors.ECODE_STATE)
9453 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9454 pnode.name, errors.ECODE_STATE)
9455 if not pnode.vm_capable:
9456 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9457 " '%s'" % pnode.name, errors.ECODE_STATE)
9459 self.secondaries = []
9461 # mirror node verification
9462 if self.op.disk_template in constants.DTS_INT_MIRROR:
9463 if self.op.snode == pnode.name:
9464 raise errors.OpPrereqError("The secondary node cannot be the"
9465 " primary node", errors.ECODE_INVAL)
9466 _CheckNodeOnline(self, self.op.snode)
9467 _CheckNodeNotDrained(self, self.op.snode)
9468 _CheckNodeVmCapable(self, self.op.snode)
9469 self.secondaries.append(self.op.snode)
9471 snode = self.cfg.GetNodeInfo(self.op.snode)
9472 if pnode.group != snode.group:
9473 self.LogWarning("The primary and secondary nodes are in two"
9474 " different node groups; the disk parameters"
9475 " from the first disk's node group will be"
9478 nodenames = [pnode.name] + self.secondaries
9480 # disk parameters (not customizable at instance or node level)
9481 # just use the primary node parameters, ignoring the secondary.
9482 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9484 if not self.adopt_disks:
9485 # Check lv size requirements, if not adopting
9486 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9487 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9489 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9490 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9491 disk[constants.IDISK_ADOPT])
9492 for disk in self.disks])
9493 if len(all_lvs) != len(self.disks):
9494 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9496 for lv_name in all_lvs:
9498 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9499 # to ReserveLV uses the same syntax
9500 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9501 except errors.ReservationError:
9502 raise errors.OpPrereqError("LV named %s used by another instance" %
9503 lv_name, errors.ECODE_NOTUNIQUE)
9505 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9506 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9508 node_lvs = self.rpc.call_lv_list([pnode.name],
9509 vg_names.payload.keys())[pnode.name]
9510 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9511 node_lvs = node_lvs.payload
9513 delta = all_lvs.difference(node_lvs.keys())
9515 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9516 utils.CommaJoin(delta),
9518 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9520 raise errors.OpPrereqError("Online logical volumes found, cannot"
9521 " adopt: %s" % utils.CommaJoin(online_lvs),
9523 # update the size of disk based on what is found
9524 for dsk in self.disks:
9525 dsk[constants.IDISK_SIZE] = \
9526 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9527 dsk[constants.IDISK_ADOPT])][0]))
9529 elif self.op.disk_template == constants.DT_BLOCK:
9530 # Normalize and de-duplicate device paths
9531 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9532 for disk in self.disks])
9533 if len(all_disks) != len(self.disks):
9534 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9536 baddisks = [d for d in all_disks
9537 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9539 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9540 " cannot be adopted" %
9541 (", ".join(baddisks),
9542 constants.ADOPTABLE_BLOCKDEV_ROOT),
9545 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9546 list(all_disks))[pnode.name]
9547 node_disks.Raise("Cannot get block device information from node %s" %
9549 node_disks = node_disks.payload
9550 delta = all_disks.difference(node_disks.keys())
9552 raise errors.OpPrereqError("Missing block device(s): %s" %
9553 utils.CommaJoin(delta),
9555 for dsk in self.disks:
9556 dsk[constants.IDISK_SIZE] = \
9557 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9559 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9561 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9562 # check OS parameters (remotely)
9563 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9565 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9567 # memory check on primary node
9568 #TODO(dynmem): use MINMEM for checking
9570 _CheckNodeFreeMemory(self, self.pnode.name,
9571 "creating instance %s" % self.op.instance_name,
9572 self.be_full[constants.BE_MAXMEM],
9575 self.dry_run_result = list(nodenames)
9577 def Exec(self, feedback_fn):
9578 """Create and add the instance to the cluster.
9581 instance = self.op.instance_name
9582 pnode_name = self.pnode.name
9584 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9585 self.owned_locks(locking.LEVEL_NODE)), \
9586 "Node locks differ from node resource locks"
9588 ht_kind = self.op.hypervisor
9589 if ht_kind in constants.HTS_REQ_PORT:
9590 network_port = self.cfg.AllocatePort()
9594 disks = _GenerateDiskTemplate(self,
9595 self.op.disk_template,
9596 instance, pnode_name,
9599 self.instance_file_storage_dir,
9600 self.op.file_driver,
9605 iobj = objects.Instance(name=instance, os=self.op.os_type,
9606 primary_node=pnode_name,
9607 nics=self.nics, disks=disks,
9608 disk_template=self.op.disk_template,
9609 admin_state=constants.ADMINST_DOWN,
9610 network_port=network_port,
9611 beparams=self.op.beparams,
9612 hvparams=self.op.hvparams,
9613 hypervisor=self.op.hypervisor,
9614 osparams=self.op.osparams,
9618 for tag in self.op.tags:
9621 if self.adopt_disks:
9622 if self.op.disk_template == constants.DT_PLAIN:
9623 # rename LVs to the newly-generated names; we need to construct
9624 # 'fake' LV disks with the old data, plus the new unique_id
9625 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9627 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9628 rename_to.append(t_dsk.logical_id)
9629 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9630 self.cfg.SetDiskID(t_dsk, pnode_name)
9631 result = self.rpc.call_blockdev_rename(pnode_name,
9632 zip(tmp_disks, rename_to))
9633 result.Raise("Failed to rename adoped LVs")
9635 feedback_fn("* creating instance disks...")
9637 _CreateDisks(self, iobj)
9638 except errors.OpExecError:
9639 self.LogWarning("Device creation failed, reverting...")
9641 _RemoveDisks(self, iobj)
9643 self.cfg.ReleaseDRBDMinors(instance)
9646 feedback_fn("adding instance %s to cluster config" % instance)
9648 self.cfg.AddInstance(iobj, self.proc.GetECId())
9650 # Declare that we don't want to remove the instance lock anymore, as we've
9651 # added the instance to the config
9652 del self.remove_locks[locking.LEVEL_INSTANCE]
9654 if self.op.mode == constants.INSTANCE_IMPORT:
9655 # Release unused nodes
9656 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9659 _ReleaseLocks(self, locking.LEVEL_NODE)
9662 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9663 feedback_fn("* wiping instance disks...")
9665 _WipeDisks(self, iobj)
9666 except errors.OpExecError, err:
9667 logging.exception("Wiping disks failed")
9668 self.LogWarning("Wiping instance disks failed (%s)", err)
9672 # Something is already wrong with the disks, don't do anything else
9674 elif self.op.wait_for_sync:
9675 disk_abort = not _WaitForSync(self, iobj)
9676 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9677 # make sure the disks are not degraded (still sync-ing is ok)
9678 feedback_fn("* checking mirrors status")
9679 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9684 _RemoveDisks(self, iobj)
9685 self.cfg.RemoveInstance(iobj.name)
9686 # Make sure the instance lock gets removed
9687 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9688 raise errors.OpExecError("There are some degraded disks for"
9691 # Release all node resource locks
9692 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9694 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9695 if self.op.mode == constants.INSTANCE_CREATE:
9696 if not self.op.no_install:
9697 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9698 not self.op.wait_for_sync)
9700 feedback_fn("* pausing disk sync to install instance OS")
9701 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9703 for idx, success in enumerate(result.payload):
9705 logging.warn("pause-sync of instance %s for disk %d failed",
9708 feedback_fn("* running the instance OS create scripts...")
9709 # FIXME: pass debug option from opcode to backend
9711 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9712 self.op.debug_level)
9714 feedback_fn("* resuming disk sync")
9715 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9717 for idx, success in enumerate(result.payload):
9719 logging.warn("resume-sync of instance %s for disk %d failed",
9722 os_add_result.Raise("Could not add os for instance %s"
9723 " on node %s" % (instance, pnode_name))
9725 elif self.op.mode == constants.INSTANCE_IMPORT:
9726 feedback_fn("* running the instance OS import scripts...")
9730 for idx, image in enumerate(self.src_images):
9734 # FIXME: pass debug option from opcode to backend
9735 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9736 constants.IEIO_FILE, (image, ),
9737 constants.IEIO_SCRIPT,
9738 (iobj.disks[idx], idx),
9740 transfers.append(dt)
9743 masterd.instance.TransferInstanceData(self, feedback_fn,
9744 self.op.src_node, pnode_name,
9745 self.pnode.secondary_ip,
9747 if not compat.all(import_result):
9748 self.LogWarning("Some disks for instance %s on node %s were not"
9749 " imported successfully" % (instance, pnode_name))
9751 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9752 feedback_fn("* preparing remote import...")
9753 # The source cluster will stop the instance before attempting to make a
9754 # connection. In some cases stopping an instance can take a long time,
9755 # hence the shutdown timeout is added to the connection timeout.
9756 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9757 self.op.source_shutdown_timeout)
9758 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9760 assert iobj.primary_node == self.pnode.name
9762 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9763 self.source_x509_ca,
9764 self._cds, timeouts)
9765 if not compat.all(disk_results):
9766 # TODO: Should the instance still be started, even if some disks
9767 # failed to import (valid for local imports, too)?
9768 self.LogWarning("Some disks for instance %s on node %s were not"
9769 " imported successfully" % (instance, pnode_name))
9771 # Run rename script on newly imported instance
9772 assert iobj.name == instance
9773 feedback_fn("Running rename script for %s" % instance)
9774 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9775 self.source_instance_name,
9776 self.op.debug_level)
9778 self.LogWarning("Failed to run rename script for %s on node"
9779 " %s: %s" % (instance, pnode_name, result.fail_msg))
9782 # also checked in the prereq part
9783 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9786 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9789 iobj.admin_state = constants.ADMINST_UP
9790 self.cfg.Update(iobj, feedback_fn)
9791 logging.info("Starting instance %s on node %s", instance, pnode_name)
9792 feedback_fn("* starting instance...")
9793 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9795 result.Raise("Could not start instance")
9797 return list(iobj.all_nodes)
9800 class LUInstanceConsole(NoHooksLU):
9801 """Connect to an instance's console.
9803 This is somewhat special in that it returns the command line that
9804 you need to run on the master node in order to connect to the
9810 def ExpandNames(self):
9811 self.share_locks = _ShareAll()
9812 self._ExpandAndLockInstance()
9814 def CheckPrereq(self):
9815 """Check prerequisites.
9817 This checks that the instance is in the cluster.
9820 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9821 assert self.instance is not None, \
9822 "Cannot retrieve locked instance %s" % self.op.instance_name
9823 _CheckNodeOnline(self, self.instance.primary_node)
9825 def Exec(self, feedback_fn):
9826 """Connect to the console of an instance
9829 instance = self.instance
9830 node = instance.primary_node
9832 node_insts = self.rpc.call_instance_list([node],
9833 [instance.hypervisor])[node]
9834 node_insts.Raise("Can't get node information from %s" % node)
9836 if instance.name not in node_insts.payload:
9837 if instance.admin_state == constants.ADMINST_UP:
9838 state = constants.INSTST_ERRORDOWN
9839 elif instance.admin_state == constants.ADMINST_DOWN:
9840 state = constants.INSTST_ADMINDOWN
9842 state = constants.INSTST_ADMINOFFLINE
9843 raise errors.OpExecError("Instance %s is not running (state %s)" %
9844 (instance.name, state))
9846 logging.debug("Connecting to console of %s on %s", instance.name, node)
9848 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9851 def _GetInstanceConsole(cluster, instance):
9852 """Returns console information for an instance.
9854 @type cluster: L{objects.Cluster}
9855 @type instance: L{objects.Instance}
9859 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9860 # beparams and hvparams are passed separately, to avoid editing the
9861 # instance and then saving the defaults in the instance itself.
9862 hvparams = cluster.FillHV(instance)
9863 beparams = cluster.FillBE(instance)
9864 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9866 assert console.instance == instance.name
9867 assert console.Validate()
9869 return console.ToDict()
9872 class LUInstanceReplaceDisks(LogicalUnit):
9873 """Replace the disks of an instance.
9876 HPATH = "mirrors-replace"
9877 HTYPE = constants.HTYPE_INSTANCE
9880 def CheckArguments(self):
9881 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9884 def ExpandNames(self):
9885 self._ExpandAndLockInstance()
9887 assert locking.LEVEL_NODE not in self.needed_locks
9888 assert locking.LEVEL_NODE_RES not in self.needed_locks
9889 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9891 assert self.op.iallocator is None or self.op.remote_node is None, \
9892 "Conflicting options"
9894 if self.op.remote_node is not None:
9895 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9897 # Warning: do not remove the locking of the new secondary here
9898 # unless DRBD8.AddChildren is changed to work in parallel;
9899 # currently it doesn't since parallel invocations of
9900 # FindUnusedMinor will conflict
9901 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9902 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9904 self.needed_locks[locking.LEVEL_NODE] = []
9905 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9907 if self.op.iallocator is not None:
9908 # iallocator will select a new node in the same group
9909 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9911 self.needed_locks[locking.LEVEL_NODE_RES] = []
9913 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9914 self.op.iallocator, self.op.remote_node,
9915 self.op.disks, False, self.op.early_release)
9917 self.tasklets = [self.replacer]
9919 def DeclareLocks(self, level):
9920 if level == locking.LEVEL_NODEGROUP:
9921 assert self.op.remote_node is None
9922 assert self.op.iallocator is not None
9923 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9925 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9926 # Lock all groups used by instance optimistically; this requires going
9927 # via the node before it's locked, requiring verification later on
9928 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9929 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9931 elif level == locking.LEVEL_NODE:
9932 if self.op.iallocator is not None:
9933 assert self.op.remote_node is None
9934 assert not self.needed_locks[locking.LEVEL_NODE]
9936 # Lock member nodes of all locked groups
9937 self.needed_locks[locking.LEVEL_NODE] = [node_name
9938 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9939 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9941 self._LockInstancesNodes()
9942 elif level == locking.LEVEL_NODE_RES:
9944 self.needed_locks[locking.LEVEL_NODE_RES] = \
9945 self.needed_locks[locking.LEVEL_NODE]
9947 def BuildHooksEnv(self):
9950 This runs on the master, the primary and all the secondaries.
9953 instance = self.replacer.instance
9955 "MODE": self.op.mode,
9956 "NEW_SECONDARY": self.op.remote_node,
9957 "OLD_SECONDARY": instance.secondary_nodes[0],
9959 env.update(_BuildInstanceHookEnvByObject(self, instance))
9962 def BuildHooksNodes(self):
9963 """Build hooks nodes.
9966 instance = self.replacer.instance
9968 self.cfg.GetMasterNode(),
9969 instance.primary_node,
9971 if self.op.remote_node is not None:
9972 nl.append(self.op.remote_node)
9975 def CheckPrereq(self):
9976 """Check prerequisites.
9979 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9980 self.op.iallocator is None)
9982 # Verify if node group locks are still correct
9983 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9985 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9987 return LogicalUnit.CheckPrereq(self)
9990 class TLReplaceDisks(Tasklet):
9991 """Replaces disks for an instance.
9993 Note: Locking is not within the scope of this class.
9996 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9997 disks, delay_iallocator, early_release):
9998 """Initializes this class.
10001 Tasklet.__init__(self, lu)
10004 self.instance_name = instance_name
10006 self.iallocator_name = iallocator_name
10007 self.remote_node = remote_node
10009 self.delay_iallocator = delay_iallocator
10010 self.early_release = early_release
10013 self.instance = None
10014 self.new_node = None
10015 self.target_node = None
10016 self.other_node = None
10017 self.remote_node_info = None
10018 self.node_secondary_ip = None
10021 def CheckArguments(mode, remote_node, iallocator):
10022 """Helper function for users of this class.
10025 # check for valid parameter combination
10026 if mode == constants.REPLACE_DISK_CHG:
10027 if remote_node is None and iallocator is None:
10028 raise errors.OpPrereqError("When changing the secondary either an"
10029 " iallocator script must be used or the"
10030 " new node given", errors.ECODE_INVAL)
10032 if remote_node is not None and iallocator is not None:
10033 raise errors.OpPrereqError("Give either the iallocator or the new"
10034 " secondary, not both", errors.ECODE_INVAL)
10036 elif remote_node is not None or iallocator is not None:
10037 # Not replacing the secondary
10038 raise errors.OpPrereqError("The iallocator and new node options can"
10039 " only be used when changing the"
10040 " secondary node", errors.ECODE_INVAL)
10043 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10044 """Compute a new secondary node using an IAllocator.
10047 ial = IAllocator(lu.cfg, lu.rpc,
10048 mode=constants.IALLOCATOR_MODE_RELOC,
10049 name=instance_name,
10050 relocate_from=list(relocate_from))
10052 ial.Run(iallocator_name)
10054 if not ial.success:
10055 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10056 " %s" % (iallocator_name, ial.info),
10057 errors.ECODE_NORES)
10059 if len(ial.result) != ial.required_nodes:
10060 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10061 " of nodes (%s), required %s" %
10063 len(ial.result), ial.required_nodes),
10064 errors.ECODE_FAULT)
10066 remote_node_name = ial.result[0]
10068 lu.LogInfo("Selected new secondary for instance '%s': %s",
10069 instance_name, remote_node_name)
10071 return remote_node_name
10073 def _FindFaultyDisks(self, node_name):
10074 """Wrapper for L{_FindFaultyInstanceDisks}.
10077 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10080 def _CheckDisksActivated(self, instance):
10081 """Checks if the instance disks are activated.
10083 @param instance: The instance to check disks
10084 @return: True if they are activated, False otherwise
10087 nodes = instance.all_nodes
10089 for idx, dev in enumerate(instance.disks):
10091 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10092 self.cfg.SetDiskID(dev, node)
10094 result = self.rpc.call_blockdev_find(node, dev)
10098 elif result.fail_msg or not result.payload:
10103 def CheckPrereq(self):
10104 """Check prerequisites.
10106 This checks that the instance is in the cluster.
10109 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10110 assert instance is not None, \
10111 "Cannot retrieve locked instance %s" % self.instance_name
10113 if instance.disk_template != constants.DT_DRBD8:
10114 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10115 " instances", errors.ECODE_INVAL)
10117 if len(instance.secondary_nodes) != 1:
10118 raise errors.OpPrereqError("The instance has a strange layout,"
10119 " expected one secondary but found %d" %
10120 len(instance.secondary_nodes),
10121 errors.ECODE_FAULT)
10123 if not self.delay_iallocator:
10124 self._CheckPrereq2()
10126 def _CheckPrereq2(self):
10127 """Check prerequisites, second part.
10129 This function should always be part of CheckPrereq. It was separated and is
10130 now called from Exec because during node evacuation iallocator was only
10131 called with an unmodified cluster model, not taking planned changes into
10135 instance = self.instance
10136 secondary_node = instance.secondary_nodes[0]
10138 if self.iallocator_name is None:
10139 remote_node = self.remote_node
10141 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10142 instance.name, instance.secondary_nodes)
10144 if remote_node is None:
10145 self.remote_node_info = None
10147 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10148 "Remote node '%s' is not locked" % remote_node
10150 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10151 assert self.remote_node_info is not None, \
10152 "Cannot retrieve locked node %s" % remote_node
10154 if remote_node == self.instance.primary_node:
10155 raise errors.OpPrereqError("The specified node is the primary node of"
10156 " the instance", errors.ECODE_INVAL)
10158 if remote_node == secondary_node:
10159 raise errors.OpPrereqError("The specified node is already the"
10160 " secondary node of the instance",
10161 errors.ECODE_INVAL)
10163 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10164 constants.REPLACE_DISK_CHG):
10165 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10166 errors.ECODE_INVAL)
10168 if self.mode == constants.REPLACE_DISK_AUTO:
10169 if not self._CheckDisksActivated(instance):
10170 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10171 " first" % self.instance_name,
10172 errors.ECODE_STATE)
10173 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10174 faulty_secondary = self._FindFaultyDisks(secondary_node)
10176 if faulty_primary and faulty_secondary:
10177 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10178 " one node and can not be repaired"
10179 " automatically" % self.instance_name,
10180 errors.ECODE_STATE)
10183 self.disks = faulty_primary
10184 self.target_node = instance.primary_node
10185 self.other_node = secondary_node
10186 check_nodes = [self.target_node, self.other_node]
10187 elif faulty_secondary:
10188 self.disks = faulty_secondary
10189 self.target_node = secondary_node
10190 self.other_node = instance.primary_node
10191 check_nodes = [self.target_node, self.other_node]
10197 # Non-automatic modes
10198 if self.mode == constants.REPLACE_DISK_PRI:
10199 self.target_node = instance.primary_node
10200 self.other_node = secondary_node
10201 check_nodes = [self.target_node, self.other_node]
10203 elif self.mode == constants.REPLACE_DISK_SEC:
10204 self.target_node = secondary_node
10205 self.other_node = instance.primary_node
10206 check_nodes = [self.target_node, self.other_node]
10208 elif self.mode == constants.REPLACE_DISK_CHG:
10209 self.new_node = remote_node
10210 self.other_node = instance.primary_node
10211 self.target_node = secondary_node
10212 check_nodes = [self.new_node, self.other_node]
10214 _CheckNodeNotDrained(self.lu, remote_node)
10215 _CheckNodeVmCapable(self.lu, remote_node)
10217 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10218 assert old_node_info is not None
10219 if old_node_info.offline and not self.early_release:
10220 # doesn't make sense to delay the release
10221 self.early_release = True
10222 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10223 " early-release mode", secondary_node)
10226 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10229 # If not specified all disks should be replaced
10231 self.disks = range(len(self.instance.disks))
10233 # TODO: compute disk parameters
10234 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10235 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10236 if primary_node_info.group != secondary_node_info.group:
10237 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10238 " different node groups; the disk parameters of the"
10239 " primary node's group will be applied.")
10241 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10243 for node in check_nodes:
10244 _CheckNodeOnline(self.lu, node)
10246 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10249 if node_name is not None)
10251 # Release unneeded node and node resource locks
10252 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10253 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10255 # Release any owned node group
10256 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10257 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10259 # Check whether disks are valid
10260 for disk_idx in self.disks:
10261 instance.FindDisk(disk_idx)
10263 # Get secondary node IP addresses
10264 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10265 in self.cfg.GetMultiNodeInfo(touched_nodes))
10267 def Exec(self, feedback_fn):
10268 """Execute disk replacement.
10270 This dispatches the disk replacement to the appropriate handler.
10273 if self.delay_iallocator:
10274 self._CheckPrereq2()
10277 # Verify owned locks before starting operation
10278 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10279 assert set(owned_nodes) == set(self.node_secondary_ip), \
10280 ("Incorrect node locks, owning %s, expected %s" %
10281 (owned_nodes, self.node_secondary_ip.keys()))
10282 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10283 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10285 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10286 assert list(owned_instances) == [self.instance_name], \
10287 "Instance '%s' not locked" % self.instance_name
10289 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10290 "Should not own any node group lock at this point"
10293 feedback_fn("No disks need replacement")
10296 feedback_fn("Replacing disk(s) %s for %s" %
10297 (utils.CommaJoin(self.disks), self.instance.name))
10299 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10301 # Activate the instance disks if we're replacing them on a down instance
10303 _StartInstanceDisks(self.lu, self.instance, True)
10306 # Should we replace the secondary node?
10307 if self.new_node is not None:
10308 fn = self._ExecDrbd8Secondary
10310 fn = self._ExecDrbd8DiskOnly
10312 result = fn(feedback_fn)
10314 # Deactivate the instance disks if we're replacing them on a
10317 _SafeShutdownInstanceDisks(self.lu, self.instance)
10319 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10322 # Verify owned locks
10323 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10324 nodes = frozenset(self.node_secondary_ip)
10325 assert ((self.early_release and not owned_nodes) or
10326 (not self.early_release and not (set(owned_nodes) - nodes))), \
10327 ("Not owning the correct locks, early_release=%s, owned=%r,"
10328 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10332 def _CheckVolumeGroup(self, nodes):
10333 self.lu.LogInfo("Checking volume groups")
10335 vgname = self.cfg.GetVGName()
10337 # Make sure volume group exists on all involved nodes
10338 results = self.rpc.call_vg_list(nodes)
10340 raise errors.OpExecError("Can't list volume groups on the nodes")
10343 res = results[node]
10344 res.Raise("Error checking node %s" % node)
10345 if vgname not in res.payload:
10346 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10349 def _CheckDisksExistence(self, nodes):
10350 # Check disk existence
10351 for idx, dev in enumerate(self.instance.disks):
10352 if idx not in self.disks:
10356 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10357 self.cfg.SetDiskID(dev, node)
10359 result = self.rpc.call_blockdev_find(node, dev)
10361 msg = result.fail_msg
10362 if msg or not result.payload:
10364 msg = "disk not found"
10365 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10368 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10369 for idx, dev in enumerate(self.instance.disks):
10370 if idx not in self.disks:
10373 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10376 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10378 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10379 " replace disks for instance %s" %
10380 (node_name, self.instance.name))
10382 def _CreateNewStorage(self, node_name):
10383 """Create new storage on the primary or secondary node.
10385 This is only used for same-node replaces, not for changing the
10386 secondary node, hence we don't want to modify the existing disk.
10391 for idx, dev in enumerate(self.instance.disks):
10392 if idx not in self.disks:
10395 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10397 self.cfg.SetDiskID(dev, node_name)
10399 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10400 names = _GenerateUniqueNames(self.lu, lv_names)
10402 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10404 vg_data = dev.children[0].logical_id[0]
10405 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10406 logical_id=(vg_data, names[0]), params=data_p)
10407 vg_meta = dev.children[1].logical_id[0]
10408 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10409 logical_id=(vg_meta, names[1]), params=meta_p)
10411 new_lvs = [lv_data, lv_meta]
10412 old_lvs = [child.Copy() for child in dev.children]
10413 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10415 # we pass force_create=True to force the LVM creation
10416 for new_lv in new_lvs:
10417 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10418 _GetInstanceInfoText(self.instance), False)
10422 def _CheckDevices(self, node_name, iv_names):
10423 for name, (dev, _, _) in iv_names.iteritems():
10424 self.cfg.SetDiskID(dev, node_name)
10426 result = self.rpc.call_blockdev_find(node_name, dev)
10428 msg = result.fail_msg
10429 if msg or not result.payload:
10431 msg = "disk not found"
10432 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10435 if result.payload.is_degraded:
10436 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10438 def _RemoveOldStorage(self, node_name, iv_names):
10439 for name, (_, old_lvs, _) in iv_names.iteritems():
10440 self.lu.LogInfo("Remove logical volumes for %s" % name)
10443 self.cfg.SetDiskID(lv, node_name)
10445 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10447 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10448 hint="remove unused LVs manually")
10450 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10451 """Replace a disk on the primary or secondary for DRBD 8.
10453 The algorithm for replace is quite complicated:
10455 1. for each disk to be replaced:
10457 1. create new LVs on the target node with unique names
10458 1. detach old LVs from the drbd device
10459 1. rename old LVs to name_replaced.<time_t>
10460 1. rename new LVs to old LVs
10461 1. attach the new LVs (with the old names now) to the drbd device
10463 1. wait for sync across all devices
10465 1. for each modified disk:
10467 1. remove old LVs (which have the name name_replaces.<time_t>)
10469 Failures are not very well handled.
10474 # Step: check device activation
10475 self.lu.LogStep(1, steps_total, "Check device existence")
10476 self._CheckDisksExistence([self.other_node, self.target_node])
10477 self._CheckVolumeGroup([self.target_node, self.other_node])
10479 # Step: check other node consistency
10480 self.lu.LogStep(2, steps_total, "Check peer consistency")
10481 self._CheckDisksConsistency(self.other_node,
10482 self.other_node == self.instance.primary_node,
10485 # Step: create new storage
10486 self.lu.LogStep(3, steps_total, "Allocate new storage")
10487 iv_names = self._CreateNewStorage(self.target_node)
10489 # Step: for each lv, detach+rename*2+attach
10490 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10491 for dev, old_lvs, new_lvs in iv_names.itervalues():
10492 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10494 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10496 result.Raise("Can't detach drbd from local storage on node"
10497 " %s for device %s" % (self.target_node, dev.iv_name))
10499 #cfg.Update(instance)
10501 # ok, we created the new LVs, so now we know we have the needed
10502 # storage; as such, we proceed on the target node to rename
10503 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10504 # using the assumption that logical_id == physical_id (which in
10505 # turn is the unique_id on that node)
10507 # FIXME(iustin): use a better name for the replaced LVs
10508 temp_suffix = int(time.time())
10509 ren_fn = lambda d, suff: (d.physical_id[0],
10510 d.physical_id[1] + "_replaced-%s" % suff)
10512 # Build the rename list based on what LVs exist on the node
10513 rename_old_to_new = []
10514 for to_ren in old_lvs:
10515 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10516 if not result.fail_msg and result.payload:
10518 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10520 self.lu.LogInfo("Renaming the old LVs on the target node")
10521 result = self.rpc.call_blockdev_rename(self.target_node,
10523 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10525 # Now we rename the new LVs to the old LVs
10526 self.lu.LogInfo("Renaming the new LVs on the target node")
10527 rename_new_to_old = [(new, old.physical_id)
10528 for old, new in zip(old_lvs, new_lvs)]
10529 result = self.rpc.call_blockdev_rename(self.target_node,
10531 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10533 # Intermediate steps of in memory modifications
10534 for old, new in zip(old_lvs, new_lvs):
10535 new.logical_id = old.logical_id
10536 self.cfg.SetDiskID(new, self.target_node)
10538 # We need to modify old_lvs so that removal later removes the
10539 # right LVs, not the newly added ones; note that old_lvs is a
10541 for disk in old_lvs:
10542 disk.logical_id = ren_fn(disk, temp_suffix)
10543 self.cfg.SetDiskID(disk, self.target_node)
10545 # Now that the new lvs have the old name, we can add them to the device
10546 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10547 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10549 msg = result.fail_msg
10551 for new_lv in new_lvs:
10552 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10555 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10556 hint=("cleanup manually the unused logical"
10558 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10560 cstep = itertools.count(5)
10562 if self.early_release:
10563 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10564 self._RemoveOldStorage(self.target_node, iv_names)
10565 # TODO: Check if releasing locks early still makes sense
10566 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10568 # Release all resource locks except those used by the instance
10569 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10570 keep=self.node_secondary_ip.keys())
10572 # Release all node locks while waiting for sync
10573 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10575 # TODO: Can the instance lock be downgraded here? Take the optional disk
10576 # shutdown in the caller into consideration.
10579 # This can fail as the old devices are degraded and _WaitForSync
10580 # does a combined result over all disks, so we don't check its return value
10581 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10582 _WaitForSync(self.lu, self.instance)
10584 # Check all devices manually
10585 self._CheckDevices(self.instance.primary_node, iv_names)
10587 # Step: remove old storage
10588 if not self.early_release:
10589 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10590 self._RemoveOldStorage(self.target_node, iv_names)
10592 def _ExecDrbd8Secondary(self, feedback_fn):
10593 """Replace the secondary node for DRBD 8.
10595 The algorithm for replace is quite complicated:
10596 - for all disks of the instance:
10597 - create new LVs on the new node with same names
10598 - shutdown the drbd device on the old secondary
10599 - disconnect the drbd network on the primary
10600 - create the drbd device on the new secondary
10601 - network attach the drbd on the primary, using an artifice:
10602 the drbd code for Attach() will connect to the network if it
10603 finds a device which is connected to the good local disks but
10604 not network enabled
10605 - wait for sync across all devices
10606 - remove all disks from the old secondary
10608 Failures are not very well handled.
10613 pnode = self.instance.primary_node
10615 # Step: check device activation
10616 self.lu.LogStep(1, steps_total, "Check device existence")
10617 self._CheckDisksExistence([self.instance.primary_node])
10618 self._CheckVolumeGroup([self.instance.primary_node])
10620 # Step: check other node consistency
10621 self.lu.LogStep(2, steps_total, "Check peer consistency")
10622 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10624 # Step: create new storage
10625 self.lu.LogStep(3, steps_total, "Allocate new storage")
10626 for idx, dev in enumerate(self.instance.disks):
10627 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10628 (self.new_node, idx))
10629 # we pass force_create=True to force LVM creation
10630 for new_lv in dev.children:
10631 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10632 _GetInstanceInfoText(self.instance), False)
10634 # Step 4: dbrd minors and drbd setups changes
10635 # after this, we must manually remove the drbd minors on both the
10636 # error and the success paths
10637 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10638 minors = self.cfg.AllocateDRBDMinor([self.new_node
10639 for dev in self.instance.disks],
10640 self.instance.name)
10641 logging.debug("Allocated minors %r", minors)
10644 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10645 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10646 (self.new_node, idx))
10647 # create new devices on new_node; note that we create two IDs:
10648 # one without port, so the drbd will be activated without
10649 # networking information on the new node at this stage, and one
10650 # with network, for the latter activation in step 4
10651 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10652 if self.instance.primary_node == o_node1:
10655 assert self.instance.primary_node == o_node2, "Three-node instance?"
10658 new_alone_id = (self.instance.primary_node, self.new_node, None,
10659 p_minor, new_minor, o_secret)
10660 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10661 p_minor, new_minor, o_secret)
10663 iv_names[idx] = (dev, dev.children, new_net_id)
10664 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10666 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10667 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10668 logical_id=new_alone_id,
10669 children=dev.children,
10671 params=drbd_params)
10673 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10674 _GetInstanceInfoText(self.instance), False)
10675 except errors.GenericError:
10676 self.cfg.ReleaseDRBDMinors(self.instance.name)
10679 # We have new devices, shutdown the drbd on the old secondary
10680 for idx, dev in enumerate(self.instance.disks):
10681 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10682 self.cfg.SetDiskID(dev, self.target_node)
10683 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10685 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10686 "node: %s" % (idx, msg),
10687 hint=("Please cleanup this device manually as"
10688 " soon as possible"))
10690 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10691 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10692 self.instance.disks)[pnode]
10694 msg = result.fail_msg
10696 # detaches didn't succeed (unlikely)
10697 self.cfg.ReleaseDRBDMinors(self.instance.name)
10698 raise errors.OpExecError("Can't detach the disks from the network on"
10699 " old node: %s" % (msg,))
10701 # if we managed to detach at least one, we update all the disks of
10702 # the instance to point to the new secondary
10703 self.lu.LogInfo("Updating instance configuration")
10704 for dev, _, new_logical_id in iv_names.itervalues():
10705 dev.logical_id = new_logical_id
10706 self.cfg.SetDiskID(dev, self.instance.primary_node)
10708 self.cfg.Update(self.instance, feedback_fn)
10710 # Release all node locks (the configuration has been updated)
10711 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10713 # and now perform the drbd attach
10714 self.lu.LogInfo("Attaching primary drbds to new secondary"
10715 " (standalone => connected)")
10716 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10718 self.node_secondary_ip,
10719 self.instance.disks,
10720 self.instance.name,
10722 for to_node, to_result in result.items():
10723 msg = to_result.fail_msg
10725 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10727 hint=("please do a gnt-instance info to see the"
10728 " status of disks"))
10730 cstep = itertools.count(5)
10732 if self.early_release:
10733 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10734 self._RemoveOldStorage(self.target_node, iv_names)
10735 # TODO: Check if releasing locks early still makes sense
10736 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10738 # Release all resource locks except those used by the instance
10739 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10740 keep=self.node_secondary_ip.keys())
10742 # TODO: Can the instance lock be downgraded here? Take the optional disk
10743 # shutdown in the caller into consideration.
10746 # This can fail as the old devices are degraded and _WaitForSync
10747 # does a combined result over all disks, so we don't check its return value
10748 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10749 _WaitForSync(self.lu, self.instance)
10751 # Check all devices manually
10752 self._CheckDevices(self.instance.primary_node, iv_names)
10754 # Step: remove old storage
10755 if not self.early_release:
10756 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10757 self._RemoveOldStorage(self.target_node, iv_names)
10760 class LURepairNodeStorage(NoHooksLU):
10761 """Repairs the volume group on a node.
10766 def CheckArguments(self):
10767 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10769 storage_type = self.op.storage_type
10771 if (constants.SO_FIX_CONSISTENCY not in
10772 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10773 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10774 " repaired" % storage_type,
10775 errors.ECODE_INVAL)
10777 def ExpandNames(self):
10778 self.needed_locks = {
10779 locking.LEVEL_NODE: [self.op.node_name],
10782 def _CheckFaultyDisks(self, instance, node_name):
10783 """Ensure faulty disks abort the opcode or at least warn."""
10785 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10787 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10788 " node '%s'" % (instance.name, node_name),
10789 errors.ECODE_STATE)
10790 except errors.OpPrereqError, err:
10791 if self.op.ignore_consistency:
10792 self.proc.LogWarning(str(err.args[0]))
10796 def CheckPrereq(self):
10797 """Check prerequisites.
10800 # Check whether any instance on this node has faulty disks
10801 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10802 if inst.admin_state != constants.ADMINST_UP:
10804 check_nodes = set(inst.all_nodes)
10805 check_nodes.discard(self.op.node_name)
10806 for inst_node_name in check_nodes:
10807 self._CheckFaultyDisks(inst, inst_node_name)
10809 def Exec(self, feedback_fn):
10810 feedback_fn("Repairing storage unit '%s' on %s ..." %
10811 (self.op.name, self.op.node_name))
10813 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10814 result = self.rpc.call_storage_execute(self.op.node_name,
10815 self.op.storage_type, st_args,
10817 constants.SO_FIX_CONSISTENCY)
10818 result.Raise("Failed to repair storage unit '%s' on %s" %
10819 (self.op.name, self.op.node_name))
10822 class LUNodeEvacuate(NoHooksLU):
10823 """Evacuates instances off a list of nodes.
10828 _MODE2IALLOCATOR = {
10829 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10830 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10831 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10833 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10834 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10835 constants.IALLOCATOR_NEVAC_MODES)
10837 def CheckArguments(self):
10838 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10840 def ExpandNames(self):
10841 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10843 if self.op.remote_node is not None:
10844 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10845 assert self.op.remote_node
10847 if self.op.remote_node == self.op.node_name:
10848 raise errors.OpPrereqError("Can not use evacuated node as a new"
10849 " secondary node", errors.ECODE_INVAL)
10851 if self.op.mode != constants.NODE_EVAC_SEC:
10852 raise errors.OpPrereqError("Without the use of an iallocator only"
10853 " secondary instances can be evacuated",
10854 errors.ECODE_INVAL)
10857 self.share_locks = _ShareAll()
10858 self.needed_locks = {
10859 locking.LEVEL_INSTANCE: [],
10860 locking.LEVEL_NODEGROUP: [],
10861 locking.LEVEL_NODE: [],
10864 # Determine nodes (via group) optimistically, needs verification once locks
10865 # have been acquired
10866 self.lock_nodes = self._DetermineNodes()
10868 def _DetermineNodes(self):
10869 """Gets the list of nodes to operate on.
10872 if self.op.remote_node is None:
10873 # Iallocator will choose any node(s) in the same group
10874 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10876 group_nodes = frozenset([self.op.remote_node])
10878 # Determine nodes to be locked
10879 return set([self.op.node_name]) | group_nodes
10881 def _DetermineInstances(self):
10882 """Builds list of instances to operate on.
10885 assert self.op.mode in constants.NODE_EVAC_MODES
10887 if self.op.mode == constants.NODE_EVAC_PRI:
10888 # Primary instances only
10889 inst_fn = _GetNodePrimaryInstances
10890 assert self.op.remote_node is None, \
10891 "Evacuating primary instances requires iallocator"
10892 elif self.op.mode == constants.NODE_EVAC_SEC:
10893 # Secondary instances only
10894 inst_fn = _GetNodeSecondaryInstances
10897 assert self.op.mode == constants.NODE_EVAC_ALL
10898 inst_fn = _GetNodeInstances
10899 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10901 raise errors.OpPrereqError("Due to an issue with the iallocator"
10902 " interface it is not possible to evacuate"
10903 " all instances at once; specify explicitly"
10904 " whether to evacuate primary or secondary"
10906 errors.ECODE_INVAL)
10908 return inst_fn(self.cfg, self.op.node_name)
10910 def DeclareLocks(self, level):
10911 if level == locking.LEVEL_INSTANCE:
10912 # Lock instances optimistically, needs verification once node and group
10913 # locks have been acquired
10914 self.needed_locks[locking.LEVEL_INSTANCE] = \
10915 set(i.name for i in self._DetermineInstances())
10917 elif level == locking.LEVEL_NODEGROUP:
10918 # Lock node groups for all potential target nodes optimistically, needs
10919 # verification once nodes have been acquired
10920 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10921 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10923 elif level == locking.LEVEL_NODE:
10924 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10926 def CheckPrereq(self):
10928 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10929 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10930 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10932 need_nodes = self._DetermineNodes()
10934 if not owned_nodes.issuperset(need_nodes):
10935 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10936 " locks were acquired, current nodes are"
10937 " are '%s', used to be '%s'; retry the"
10939 (self.op.node_name,
10940 utils.CommaJoin(need_nodes),
10941 utils.CommaJoin(owned_nodes)),
10942 errors.ECODE_STATE)
10944 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10945 if owned_groups != wanted_groups:
10946 raise errors.OpExecError("Node groups changed since locks were acquired,"
10947 " current groups are '%s', used to be '%s';"
10948 " retry the operation" %
10949 (utils.CommaJoin(wanted_groups),
10950 utils.CommaJoin(owned_groups)))
10952 # Determine affected instances
10953 self.instances = self._DetermineInstances()
10954 self.instance_names = [i.name for i in self.instances]
10956 if set(self.instance_names) != owned_instances:
10957 raise errors.OpExecError("Instances on node '%s' changed since locks"
10958 " were acquired, current instances are '%s',"
10959 " used to be '%s'; retry the operation" %
10960 (self.op.node_name,
10961 utils.CommaJoin(self.instance_names),
10962 utils.CommaJoin(owned_instances)))
10964 if self.instance_names:
10965 self.LogInfo("Evacuating instances from node '%s': %s",
10967 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10969 self.LogInfo("No instances to evacuate from node '%s'",
10972 if self.op.remote_node is not None:
10973 for i in self.instances:
10974 if i.primary_node == self.op.remote_node:
10975 raise errors.OpPrereqError("Node %s is the primary node of"
10976 " instance %s, cannot use it as"
10978 (self.op.remote_node, i.name),
10979 errors.ECODE_INVAL)
10981 def Exec(self, feedback_fn):
10982 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10984 if not self.instance_names:
10985 # No instances to evacuate
10988 elif self.op.iallocator is not None:
10989 # TODO: Implement relocation to other group
10990 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10991 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10992 instances=list(self.instance_names))
10994 ial.Run(self.op.iallocator)
10996 if not ial.success:
10997 raise errors.OpPrereqError("Can't compute node evacuation using"
10998 " iallocator '%s': %s" %
10999 (self.op.iallocator, ial.info),
11000 errors.ECODE_NORES)
11002 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11004 elif self.op.remote_node is not None:
11005 assert self.op.mode == constants.NODE_EVAC_SEC
11007 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11008 remote_node=self.op.remote_node,
11010 mode=constants.REPLACE_DISK_CHG,
11011 early_release=self.op.early_release)]
11012 for instance_name in self.instance_names
11016 raise errors.ProgrammerError("No iallocator or remote node")
11018 return ResultWithJobs(jobs)
11021 def _SetOpEarlyRelease(early_release, op):
11022 """Sets C{early_release} flag on opcodes if available.
11026 op.early_release = early_release
11027 except AttributeError:
11028 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11033 def _NodeEvacDest(use_nodes, group, nodes):
11034 """Returns group or nodes depending on caller's choice.
11038 return utils.CommaJoin(nodes)
11043 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11044 """Unpacks the result of change-group and node-evacuate iallocator requests.
11046 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11047 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11049 @type lu: L{LogicalUnit}
11050 @param lu: Logical unit instance
11051 @type alloc_result: tuple/list
11052 @param alloc_result: Result from iallocator
11053 @type early_release: bool
11054 @param early_release: Whether to release locks early if possible
11055 @type use_nodes: bool
11056 @param use_nodes: Whether to display node names instead of groups
11059 (moved, failed, jobs) = alloc_result
11062 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11063 for (name, reason) in failed)
11064 lu.LogWarning("Unable to evacuate instances %s", failreason)
11065 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11068 lu.LogInfo("Instances to be moved: %s",
11069 utils.CommaJoin("%s (to %s)" %
11070 (name, _NodeEvacDest(use_nodes, group, nodes))
11071 for (name, group, nodes) in moved))
11073 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11074 map(opcodes.OpCode.LoadOpCode, ops))
11078 class LUInstanceGrowDisk(LogicalUnit):
11079 """Grow a disk of an instance.
11082 HPATH = "disk-grow"
11083 HTYPE = constants.HTYPE_INSTANCE
11086 def ExpandNames(self):
11087 self._ExpandAndLockInstance()
11088 self.needed_locks[locking.LEVEL_NODE] = []
11089 self.needed_locks[locking.LEVEL_NODE_RES] = []
11090 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11092 def DeclareLocks(self, level):
11093 if level == locking.LEVEL_NODE:
11094 self._LockInstancesNodes()
11095 elif level == locking.LEVEL_NODE_RES:
11097 self.needed_locks[locking.LEVEL_NODE_RES] = \
11098 self.needed_locks[locking.LEVEL_NODE][:]
11100 def BuildHooksEnv(self):
11101 """Build hooks env.
11103 This runs on the master, the primary and all the secondaries.
11107 "DISK": self.op.disk,
11108 "AMOUNT": self.op.amount,
11110 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11113 def BuildHooksNodes(self):
11114 """Build hooks nodes.
11117 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11120 def CheckPrereq(self):
11121 """Check prerequisites.
11123 This checks that the instance is in the cluster.
11126 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11127 assert instance is not None, \
11128 "Cannot retrieve locked instance %s" % self.op.instance_name
11129 nodenames = list(instance.all_nodes)
11130 for node in nodenames:
11131 _CheckNodeOnline(self, node)
11133 self.instance = instance
11135 if instance.disk_template not in constants.DTS_GROWABLE:
11136 raise errors.OpPrereqError("Instance's disk layout does not support"
11137 " growing", errors.ECODE_INVAL)
11139 self.disk = instance.FindDisk(self.op.disk)
11141 if instance.disk_template not in (constants.DT_FILE,
11142 constants.DT_SHARED_FILE):
11143 # TODO: check the free disk space for file, when that feature will be
11145 _CheckNodesFreeDiskPerVG(self, nodenames,
11146 self.disk.ComputeGrowth(self.op.amount))
11148 def Exec(self, feedback_fn):
11149 """Execute disk grow.
11152 instance = self.instance
11155 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11156 assert (self.owned_locks(locking.LEVEL_NODE) ==
11157 self.owned_locks(locking.LEVEL_NODE_RES))
11159 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11161 raise errors.OpExecError("Cannot activate block device to grow")
11163 feedback_fn("Growing disk %s of instance '%s' by %s" %
11164 (self.op.disk, instance.name,
11165 utils.FormatUnit(self.op.amount, "h")))
11167 # First run all grow ops in dry-run mode
11168 for node in instance.all_nodes:
11169 self.cfg.SetDiskID(disk, node)
11170 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11171 result.Raise("Grow request failed to node %s" % node)
11173 # We know that (as far as we can test) operations across different
11174 # nodes will succeed, time to run it for real
11175 for node in instance.all_nodes:
11176 self.cfg.SetDiskID(disk, node)
11177 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11178 result.Raise("Grow request failed to node %s" % node)
11180 # TODO: Rewrite code to work properly
11181 # DRBD goes into sync mode for a short amount of time after executing the
11182 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11183 # calling "resize" in sync mode fails. Sleeping for a short amount of
11184 # time is a work-around.
11187 disk.RecordGrow(self.op.amount)
11188 self.cfg.Update(instance, feedback_fn)
11190 # Changes have been recorded, release node lock
11191 _ReleaseLocks(self, locking.LEVEL_NODE)
11193 # Downgrade lock while waiting for sync
11194 self.glm.downgrade(locking.LEVEL_INSTANCE)
11196 if self.op.wait_for_sync:
11197 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11199 self.proc.LogWarning("Disk sync-ing has not returned a good"
11200 " status; please check the instance")
11201 if instance.admin_state != constants.ADMINST_UP:
11202 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11203 elif instance.admin_state != constants.ADMINST_UP:
11204 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11205 " not supposed to be running because no wait for"
11206 " sync mode was requested")
11208 assert self.owned_locks(locking.LEVEL_NODE_RES)
11209 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11212 class LUInstanceQueryData(NoHooksLU):
11213 """Query runtime instance data.
11218 def ExpandNames(self):
11219 self.needed_locks = {}
11221 # Use locking if requested or when non-static information is wanted
11222 if not (self.op.static or self.op.use_locking):
11223 self.LogWarning("Non-static data requested, locks need to be acquired")
11224 self.op.use_locking = True
11226 if self.op.instances or not self.op.use_locking:
11227 # Expand instance names right here
11228 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11230 # Will use acquired locks
11231 self.wanted_names = None
11233 if self.op.use_locking:
11234 self.share_locks = _ShareAll()
11236 if self.wanted_names is None:
11237 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11239 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11241 self.needed_locks[locking.LEVEL_NODE] = []
11242 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11244 def DeclareLocks(self, level):
11245 if self.op.use_locking and level == locking.LEVEL_NODE:
11246 self._LockInstancesNodes()
11248 def CheckPrereq(self):
11249 """Check prerequisites.
11251 This only checks the optional instance list against the existing names.
11254 if self.wanted_names is None:
11255 assert self.op.use_locking, "Locking was not used"
11256 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11258 self.wanted_instances = \
11259 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11261 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11262 """Returns the status of a block device
11265 if self.op.static or not node:
11268 self.cfg.SetDiskID(dev, node)
11270 result = self.rpc.call_blockdev_find(node, dev)
11274 result.Raise("Can't compute disk status for %s" % instance_name)
11276 status = result.payload
11280 return (status.dev_path, status.major, status.minor,
11281 status.sync_percent, status.estimated_time,
11282 status.is_degraded, status.ldisk_status)
11284 def _ComputeDiskStatus(self, instance, snode, dev):
11285 """Compute block device status.
11288 if dev.dev_type in constants.LDS_DRBD:
11289 # we change the snode then (otherwise we use the one passed in)
11290 if dev.logical_id[0] == instance.primary_node:
11291 snode = dev.logical_id[1]
11293 snode = dev.logical_id[0]
11295 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11296 instance.name, dev)
11297 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11300 dev_children = map(compat.partial(self._ComputeDiskStatus,
11307 "iv_name": dev.iv_name,
11308 "dev_type": dev.dev_type,
11309 "logical_id": dev.logical_id,
11310 "physical_id": dev.physical_id,
11311 "pstatus": dev_pstatus,
11312 "sstatus": dev_sstatus,
11313 "children": dev_children,
11318 def Exec(self, feedback_fn):
11319 """Gather and return data"""
11322 cluster = self.cfg.GetClusterInfo()
11324 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11325 for i in self.wanted_instances)
11326 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11327 if self.op.static or pnode.offline:
11328 remote_state = None
11330 self.LogWarning("Primary node %s is marked offline, returning static"
11331 " information only for instance %s" %
11332 (pnode.name, instance.name))
11334 remote_info = self.rpc.call_instance_info(instance.primary_node,
11336 instance.hypervisor)
11337 remote_info.Raise("Error checking node %s" % instance.primary_node)
11338 remote_info = remote_info.payload
11339 if remote_info and "state" in remote_info:
11340 remote_state = "up"
11342 if instance.admin_state == constants.ADMINST_UP:
11343 remote_state = "down"
11345 remote_state = instance.admin_state
11347 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11350 result[instance.name] = {
11351 "name": instance.name,
11352 "config_state": instance.admin_state,
11353 "run_state": remote_state,
11354 "pnode": instance.primary_node,
11355 "snodes": instance.secondary_nodes,
11357 # this happens to be the same format used for hooks
11358 "nics": _NICListToTuple(self, instance.nics),
11359 "disk_template": instance.disk_template,
11361 "hypervisor": instance.hypervisor,
11362 "network_port": instance.network_port,
11363 "hv_instance": instance.hvparams,
11364 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11365 "be_instance": instance.beparams,
11366 "be_actual": cluster.FillBE(instance),
11367 "os_instance": instance.osparams,
11368 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11369 "serial_no": instance.serial_no,
11370 "mtime": instance.mtime,
11371 "ctime": instance.ctime,
11372 "uuid": instance.uuid,
11378 class LUInstanceSetParams(LogicalUnit):
11379 """Modifies an instances's parameters.
11382 HPATH = "instance-modify"
11383 HTYPE = constants.HTYPE_INSTANCE
11386 def CheckArguments(self):
11387 if not (self.op.nics or self.op.disks or self.op.disk_template or
11388 self.op.hvparams or self.op.beparams or self.op.os_name or
11389 self.op.online_inst or self.op.offline_inst):
11390 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11392 if self.op.hvparams:
11393 _CheckGlobalHvParams(self.op.hvparams)
11397 for disk_op, disk_dict in self.op.disks:
11398 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11399 if disk_op == constants.DDM_REMOVE:
11400 disk_addremove += 1
11402 elif disk_op == constants.DDM_ADD:
11403 disk_addremove += 1
11405 if not isinstance(disk_op, int):
11406 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11407 if not isinstance(disk_dict, dict):
11408 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11409 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11411 if disk_op == constants.DDM_ADD:
11412 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11413 if mode not in constants.DISK_ACCESS_SET:
11414 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11415 errors.ECODE_INVAL)
11416 size = disk_dict.get(constants.IDISK_SIZE, None)
11418 raise errors.OpPrereqError("Required disk parameter size missing",
11419 errors.ECODE_INVAL)
11422 except (TypeError, ValueError), err:
11423 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11424 str(err), errors.ECODE_INVAL)
11425 disk_dict[constants.IDISK_SIZE] = size
11427 # modification of disk
11428 if constants.IDISK_SIZE in disk_dict:
11429 raise errors.OpPrereqError("Disk size change not possible, use"
11430 " grow-disk", errors.ECODE_INVAL)
11432 if disk_addremove > 1:
11433 raise errors.OpPrereqError("Only one disk add or remove operation"
11434 " supported at a time", errors.ECODE_INVAL)
11436 if self.op.disks and self.op.disk_template is not None:
11437 raise errors.OpPrereqError("Disk template conversion and other disk"
11438 " changes not supported at the same time",
11439 errors.ECODE_INVAL)
11441 if (self.op.disk_template and
11442 self.op.disk_template in constants.DTS_INT_MIRROR and
11443 self.op.remote_node is None):
11444 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11445 " one requires specifying a secondary node",
11446 errors.ECODE_INVAL)
11450 for nic_op, nic_dict in self.op.nics:
11451 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11452 if nic_op == constants.DDM_REMOVE:
11455 elif nic_op == constants.DDM_ADD:
11458 if not isinstance(nic_op, int):
11459 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11460 if not isinstance(nic_dict, dict):
11461 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11462 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11464 # nic_dict should be a dict
11465 nic_ip = nic_dict.get(constants.INIC_IP, None)
11466 if nic_ip is not None:
11467 if nic_ip.lower() == constants.VALUE_NONE:
11468 nic_dict[constants.INIC_IP] = None
11470 if not netutils.IPAddress.IsValid(nic_ip):
11471 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11472 errors.ECODE_INVAL)
11474 nic_bridge = nic_dict.get("bridge", None)
11475 nic_link = nic_dict.get(constants.INIC_LINK, None)
11476 if nic_bridge and nic_link:
11477 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11478 " at the same time", errors.ECODE_INVAL)
11479 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11480 nic_dict["bridge"] = None
11481 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11482 nic_dict[constants.INIC_LINK] = None
11484 if nic_op == constants.DDM_ADD:
11485 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11486 if nic_mac is None:
11487 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11489 if constants.INIC_MAC in nic_dict:
11490 nic_mac = nic_dict[constants.INIC_MAC]
11491 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11492 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11494 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11495 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11496 " modifying an existing nic",
11497 errors.ECODE_INVAL)
11499 if nic_addremove > 1:
11500 raise errors.OpPrereqError("Only one NIC add or remove operation"
11501 " supported at a time", errors.ECODE_INVAL)
11503 def ExpandNames(self):
11504 self._ExpandAndLockInstance()
11505 # Can't even acquire node locks in shared mode as upcoming changes in
11506 # Ganeti 2.6 will start to modify the node object on disk conversion
11507 self.needed_locks[locking.LEVEL_NODE] = []
11508 self.needed_locks[locking.LEVEL_NODE_RES] = []
11509 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11511 def DeclareLocks(self, level):
11512 if level == locking.LEVEL_NODE:
11513 self._LockInstancesNodes()
11514 if self.op.disk_template and self.op.remote_node:
11515 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11516 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11517 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11519 self.needed_locks[locking.LEVEL_NODE_RES] = \
11520 self.needed_locks[locking.LEVEL_NODE][:]
11522 def BuildHooksEnv(self):
11523 """Build hooks env.
11525 This runs on the master, primary and secondaries.
11529 if constants.BE_MINMEM in self.be_new:
11530 args["minmem"] = self.be_new[constants.BE_MINMEM]
11531 if constants.BE_MAXMEM in self.be_new:
11532 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11533 if constants.BE_VCPUS in self.be_new:
11534 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11535 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11536 # information at all.
11539 nic_override = dict(self.op.nics)
11540 for idx, nic in enumerate(self.instance.nics):
11541 if idx in nic_override:
11542 this_nic_override = nic_override[idx]
11544 this_nic_override = {}
11545 if constants.INIC_IP in this_nic_override:
11546 ip = this_nic_override[constants.INIC_IP]
11549 if constants.INIC_MAC in this_nic_override:
11550 mac = this_nic_override[constants.INIC_MAC]
11553 if idx in self.nic_pnew:
11554 nicparams = self.nic_pnew[idx]
11556 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11557 mode = nicparams[constants.NIC_MODE]
11558 link = nicparams[constants.NIC_LINK]
11559 args["nics"].append((ip, mac, mode, link))
11560 if constants.DDM_ADD in nic_override:
11561 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11562 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11563 nicparams = self.nic_pnew[constants.DDM_ADD]
11564 mode = nicparams[constants.NIC_MODE]
11565 link = nicparams[constants.NIC_LINK]
11566 args["nics"].append((ip, mac, mode, link))
11567 elif constants.DDM_REMOVE in nic_override:
11568 del args["nics"][-1]
11570 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11571 if self.op.disk_template:
11572 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11576 def BuildHooksNodes(self):
11577 """Build hooks nodes.
11580 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11583 def CheckPrereq(self):
11584 """Check prerequisites.
11586 This only checks the instance list against the existing names.
11589 # checking the new params on the primary/secondary nodes
11591 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11592 cluster = self.cluster = self.cfg.GetClusterInfo()
11593 assert self.instance is not None, \
11594 "Cannot retrieve locked instance %s" % self.op.instance_name
11595 pnode = instance.primary_node
11596 nodelist = list(instance.all_nodes)
11597 pnode_info = self.cfg.GetNodeInfo(pnode)
11598 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11601 if self.op.os_name and not self.op.force:
11602 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11603 self.op.force_variant)
11604 instance_os = self.op.os_name
11606 instance_os = instance.os
11608 if self.op.disk_template:
11609 if instance.disk_template == self.op.disk_template:
11610 raise errors.OpPrereqError("Instance already has disk template %s" %
11611 instance.disk_template, errors.ECODE_INVAL)
11613 if (instance.disk_template,
11614 self.op.disk_template) not in self._DISK_CONVERSIONS:
11615 raise errors.OpPrereqError("Unsupported disk template conversion from"
11616 " %s to %s" % (instance.disk_template,
11617 self.op.disk_template),
11618 errors.ECODE_INVAL)
11619 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11620 msg="cannot change disk template")
11621 if self.op.disk_template in constants.DTS_INT_MIRROR:
11622 if self.op.remote_node == pnode:
11623 raise errors.OpPrereqError("Given new secondary node %s is the same"
11624 " as the primary node of the instance" %
11625 self.op.remote_node, errors.ECODE_STATE)
11626 _CheckNodeOnline(self, self.op.remote_node)
11627 _CheckNodeNotDrained(self, self.op.remote_node)
11628 # FIXME: here we assume that the old instance type is DT_PLAIN
11629 assert instance.disk_template == constants.DT_PLAIN
11630 disks = [{constants.IDISK_SIZE: d.size,
11631 constants.IDISK_VG: d.logical_id[0]}
11632 for d in instance.disks]
11633 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11634 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11636 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11637 if pnode_info.group != snode_info.group:
11638 self.LogWarning("The primary and secondary nodes are in two"
11639 " different node groups; the disk parameters"
11640 " from the first disk's node group will be"
11643 # hvparams processing
11644 if self.op.hvparams:
11645 hv_type = instance.hypervisor
11646 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11647 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11648 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11651 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11652 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11653 self.hv_proposed = self.hv_new = hv_new # the new actual values
11654 self.hv_inst = i_hvdict # the new dict (without defaults)
11656 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11658 self.hv_new = self.hv_inst = {}
11660 # beparams processing
11661 if self.op.beparams:
11662 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11664 objects.UpgradeBeParams(i_bedict)
11665 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11666 be_new = cluster.SimpleFillBE(i_bedict)
11667 self.be_proposed = self.be_new = be_new # the new actual values
11668 self.be_inst = i_bedict # the new dict (without defaults)
11670 self.be_new = self.be_inst = {}
11671 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11672 be_old = cluster.FillBE(instance)
11674 # CPU param validation -- checking every time a paramtere is
11675 # changed to cover all cases where either CPU mask or vcpus have
11677 if (constants.BE_VCPUS in self.be_proposed and
11678 constants.HV_CPU_MASK in self.hv_proposed):
11680 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11681 # Verify mask is consistent with number of vCPUs. Can skip this
11682 # test if only 1 entry in the CPU mask, which means same mask
11683 # is applied to all vCPUs.
11684 if (len(cpu_list) > 1 and
11685 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11686 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11688 (self.be_proposed[constants.BE_VCPUS],
11689 self.hv_proposed[constants.HV_CPU_MASK]),
11690 errors.ECODE_INVAL)
11692 # Only perform this test if a new CPU mask is given
11693 if constants.HV_CPU_MASK in self.hv_new:
11694 # Calculate the largest CPU number requested
11695 max_requested_cpu = max(map(max, cpu_list))
11696 # Check that all of the instance's nodes have enough physical CPUs to
11697 # satisfy the requested CPU mask
11698 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11699 max_requested_cpu + 1, instance.hypervisor)
11701 # osparams processing
11702 if self.op.osparams:
11703 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11704 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11705 self.os_inst = i_osdict # the new dict (without defaults)
11711 #TODO(dynmem): do the appropriate check involving MINMEM
11712 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11713 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11714 mem_check_list = [pnode]
11715 if be_new[constants.BE_AUTO_BALANCE]:
11716 # either we changed auto_balance to yes or it was from before
11717 mem_check_list.extend(instance.secondary_nodes)
11718 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11719 instance.hypervisor)
11720 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11721 [instance.hypervisor])
11722 pninfo = nodeinfo[pnode]
11723 msg = pninfo.fail_msg
11725 # Assume the primary node is unreachable and go ahead
11726 self.warn.append("Can't get info from primary node %s: %s" %
11729 (_, _, (pnhvinfo, )) = pninfo.payload
11730 if not isinstance(pnhvinfo.get("memory_free", None), int):
11731 self.warn.append("Node data from primary node %s doesn't contain"
11732 " free memory information" % pnode)
11733 elif instance_info.fail_msg:
11734 self.warn.append("Can't get instance runtime information: %s" %
11735 instance_info.fail_msg)
11737 if instance_info.payload:
11738 current_mem = int(instance_info.payload["memory"])
11740 # Assume instance not running
11741 # (there is a slight race condition here, but it's not very
11742 # probable, and we have no other way to check)
11743 # TODO: Describe race condition
11745 #TODO(dynmem): do the appropriate check involving MINMEM
11746 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11747 pnhvinfo["memory_free"])
11749 raise errors.OpPrereqError("This change will prevent the instance"
11750 " from starting, due to %d MB of memory"
11751 " missing on its primary node" %
11753 errors.ECODE_NORES)
11755 if be_new[constants.BE_AUTO_BALANCE]:
11756 for node, nres in nodeinfo.items():
11757 if node not in instance.secondary_nodes:
11759 nres.Raise("Can't get info from secondary node %s" % node,
11760 prereq=True, ecode=errors.ECODE_STATE)
11761 (_, _, (nhvinfo, )) = nres.payload
11762 if not isinstance(nhvinfo.get("memory_free", None), int):
11763 raise errors.OpPrereqError("Secondary node %s didn't return free"
11764 " memory information" % node,
11765 errors.ECODE_STATE)
11766 #TODO(dynmem): do the appropriate check involving MINMEM
11767 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11768 raise errors.OpPrereqError("This change will prevent the instance"
11769 " from failover to its secondary node"
11770 " %s, due to not enough memory" % node,
11771 errors.ECODE_STATE)
11775 self.nic_pinst = {}
11776 for nic_op, nic_dict in self.op.nics:
11777 if nic_op == constants.DDM_REMOVE:
11778 if not instance.nics:
11779 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11780 errors.ECODE_INVAL)
11782 if nic_op != constants.DDM_ADD:
11784 if not instance.nics:
11785 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11786 " no NICs" % nic_op,
11787 errors.ECODE_INVAL)
11788 if nic_op < 0 or nic_op >= len(instance.nics):
11789 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11791 (nic_op, len(instance.nics) - 1),
11792 errors.ECODE_INVAL)
11793 old_nic_params = instance.nics[nic_op].nicparams
11794 old_nic_ip = instance.nics[nic_op].ip
11796 old_nic_params = {}
11799 update_params_dict = dict([(key, nic_dict[key])
11800 for key in constants.NICS_PARAMETERS
11801 if key in nic_dict])
11803 if "bridge" in nic_dict:
11804 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11806 new_nic_params = _GetUpdatedParams(old_nic_params,
11807 update_params_dict)
11808 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11809 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11810 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11811 self.nic_pinst[nic_op] = new_nic_params
11812 self.nic_pnew[nic_op] = new_filled_nic_params
11813 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11815 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11816 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11817 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11819 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11821 self.warn.append(msg)
11823 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11824 if new_nic_mode == constants.NIC_MODE_ROUTED:
11825 if constants.INIC_IP in nic_dict:
11826 nic_ip = nic_dict[constants.INIC_IP]
11828 nic_ip = old_nic_ip
11830 raise errors.OpPrereqError("Cannot set the nic ip to None"
11831 " on a routed nic", errors.ECODE_INVAL)
11832 if constants.INIC_MAC in nic_dict:
11833 nic_mac = nic_dict[constants.INIC_MAC]
11834 if nic_mac is None:
11835 raise errors.OpPrereqError("Cannot set the nic mac to None",
11836 errors.ECODE_INVAL)
11837 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11838 # otherwise generate the mac
11839 nic_dict[constants.INIC_MAC] = \
11840 self.cfg.GenerateMAC(self.proc.GetECId())
11842 # or validate/reserve the current one
11844 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11845 except errors.ReservationError:
11846 raise errors.OpPrereqError("MAC address %s already in use"
11847 " in cluster" % nic_mac,
11848 errors.ECODE_NOTUNIQUE)
11851 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11852 raise errors.OpPrereqError("Disk operations not supported for"
11853 " diskless instances",
11854 errors.ECODE_INVAL)
11855 for disk_op, _ in self.op.disks:
11856 if disk_op == constants.DDM_REMOVE:
11857 if len(instance.disks) == 1:
11858 raise errors.OpPrereqError("Cannot remove the last disk of"
11859 " an instance", errors.ECODE_INVAL)
11860 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11861 msg="cannot remove disks")
11863 if (disk_op == constants.DDM_ADD and
11864 len(instance.disks) >= constants.MAX_DISKS):
11865 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11866 " add more" % constants.MAX_DISKS,
11867 errors.ECODE_STATE)
11868 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11870 if disk_op < 0 or disk_op >= len(instance.disks):
11871 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11873 (disk_op, len(instance.disks)),
11874 errors.ECODE_INVAL)
11876 # disabling the instance
11877 if self.op.offline_inst:
11878 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11879 msg="cannot change instance state to offline")
11881 # enabling the instance
11882 if self.op.online_inst:
11883 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11884 msg="cannot make instance go online")
11886 def _ConvertPlainToDrbd(self, feedback_fn):
11887 """Converts an instance from plain to drbd.
11890 feedback_fn("Converting template to drbd")
11891 instance = self.instance
11892 pnode = instance.primary_node
11893 snode = self.op.remote_node
11895 assert instance.disk_template == constants.DT_PLAIN
11897 # create a fake disk info for _GenerateDiskTemplate
11898 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11899 constants.IDISK_VG: d.logical_id[0]}
11900 for d in instance.disks]
11901 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11902 instance.name, pnode, [snode],
11903 disk_info, None, None, 0, feedback_fn,
11905 info = _GetInstanceInfoText(instance)
11906 feedback_fn("Creating aditional volumes...")
11907 # first, create the missing data and meta devices
11908 for disk in new_disks:
11909 # unfortunately this is... not too nice
11910 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11912 for child in disk.children:
11913 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11914 # at this stage, all new LVs have been created, we can rename the
11916 feedback_fn("Renaming original volumes...")
11917 rename_list = [(o, n.children[0].logical_id)
11918 for (o, n) in zip(instance.disks, new_disks)]
11919 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11920 result.Raise("Failed to rename original LVs")
11922 feedback_fn("Initializing DRBD devices...")
11923 # all child devices are in place, we can now create the DRBD devices
11924 for disk in new_disks:
11925 for node in [pnode, snode]:
11926 f_create = node == pnode
11927 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11929 # at this point, the instance has been modified
11930 instance.disk_template = constants.DT_DRBD8
11931 instance.disks = new_disks
11932 self.cfg.Update(instance, feedback_fn)
11934 # Release node locks while waiting for sync
11935 _ReleaseLocks(self, locking.LEVEL_NODE)
11937 # disks are created, waiting for sync
11938 disk_abort = not _WaitForSync(self, instance,
11939 oneshot=not self.op.wait_for_sync)
11941 raise errors.OpExecError("There are some degraded disks for"
11942 " this instance, please cleanup manually")
11944 # Node resource locks will be released by caller
11946 def _ConvertDrbdToPlain(self, feedback_fn):
11947 """Converts an instance from drbd to plain.
11950 instance = self.instance
11952 assert len(instance.secondary_nodes) == 1
11953 assert instance.disk_template == constants.DT_DRBD8
11955 pnode = instance.primary_node
11956 snode = instance.secondary_nodes[0]
11957 feedback_fn("Converting template to plain")
11959 old_disks = instance.disks
11960 new_disks = [d.children[0] for d in old_disks]
11962 # copy over size and mode
11963 for parent, child in zip(old_disks, new_disks):
11964 child.size = parent.size
11965 child.mode = parent.mode
11967 # update instance structure
11968 instance.disks = new_disks
11969 instance.disk_template = constants.DT_PLAIN
11970 self.cfg.Update(instance, feedback_fn)
11972 # Release locks in case removing disks takes a while
11973 _ReleaseLocks(self, locking.LEVEL_NODE)
11975 feedback_fn("Removing volumes on the secondary node...")
11976 for disk in old_disks:
11977 self.cfg.SetDiskID(disk, snode)
11978 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11980 self.LogWarning("Could not remove block device %s on node %s,"
11981 " continuing anyway: %s", disk.iv_name, snode, msg)
11983 feedback_fn("Removing unneeded volumes on the primary node...")
11984 for idx, disk in enumerate(old_disks):
11985 meta = disk.children[1]
11986 self.cfg.SetDiskID(meta, pnode)
11987 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11989 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11990 " continuing anyway: %s", idx, pnode, msg)
11992 # this is a DRBD disk, return its port to the pool
11993 for disk in old_disks:
11994 tcp_port = disk.logical_id[2]
11995 self.cfg.AddTcpUdpPort(tcp_port)
11997 # Node resource locks will be released by caller
11999 def Exec(self, feedback_fn):
12000 """Modifies an instance.
12002 All parameters take effect only at the next restart of the instance.
12005 # Process here the warnings from CheckPrereq, as we don't have a
12006 # feedback_fn there.
12007 for warn in self.warn:
12008 feedback_fn("WARNING: %s" % warn)
12010 assert ((self.op.disk_template is None) ^
12011 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12012 "Not owning any node resource locks"
12015 instance = self.instance
12017 for disk_op, disk_dict in self.op.disks:
12018 if disk_op == constants.DDM_REMOVE:
12019 # remove the last disk
12020 device = instance.disks.pop()
12021 device_idx = len(instance.disks)
12022 for node, disk in device.ComputeNodeTree(instance.primary_node):
12023 self.cfg.SetDiskID(disk, node)
12024 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12026 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12027 " continuing anyway", device_idx, node, msg)
12028 result.append(("disk/%d" % device_idx, "remove"))
12030 # if this is a DRBD disk, return its port to the pool
12031 if device.dev_type in constants.LDS_DRBD:
12032 tcp_port = device.logical_id[2]
12033 self.cfg.AddTcpUdpPort(tcp_port)
12034 elif disk_op == constants.DDM_ADD:
12036 if instance.disk_template in (constants.DT_FILE,
12037 constants.DT_SHARED_FILE):
12038 file_driver, file_path = instance.disks[0].logical_id
12039 file_path = os.path.dirname(file_path)
12041 file_driver = file_path = None
12042 disk_idx_base = len(instance.disks)
12043 new_disk = _GenerateDiskTemplate(self,
12044 instance.disk_template,
12045 instance.name, instance.primary_node,
12046 instance.secondary_nodes,
12052 self.diskparams)[0]
12053 instance.disks.append(new_disk)
12054 info = _GetInstanceInfoText(instance)
12056 logging.info("Creating volume %s for instance %s",
12057 new_disk.iv_name, instance.name)
12058 # Note: this needs to be kept in sync with _CreateDisks
12060 for node in instance.all_nodes:
12061 f_create = node == instance.primary_node
12063 _CreateBlockDev(self, node, instance, new_disk,
12064 f_create, info, f_create)
12065 except errors.OpExecError, err:
12066 self.LogWarning("Failed to create volume %s (%s) on"
12068 new_disk.iv_name, new_disk, node, err)
12069 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12070 (new_disk.size, new_disk.mode)))
12072 # change a given disk
12073 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12074 result.append(("disk.mode/%d" % disk_op,
12075 disk_dict[constants.IDISK_MODE]))
12077 if self.op.disk_template:
12079 check_nodes = set(instance.all_nodes)
12080 if self.op.remote_node:
12081 check_nodes.add(self.op.remote_node)
12082 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12083 owned = self.owned_locks(level)
12084 assert not (check_nodes - owned), \
12085 ("Not owning the correct locks, owning %r, expected at least %r" %
12086 (owned, check_nodes))
12088 r_shut = _ShutdownInstanceDisks(self, instance)
12090 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12091 " proceed with disk template conversion")
12092 mode = (instance.disk_template, self.op.disk_template)
12094 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12096 self.cfg.ReleaseDRBDMinors(instance.name)
12098 result.append(("disk_template", self.op.disk_template))
12100 assert instance.disk_template == self.op.disk_template, \
12101 ("Expected disk template '%s', found '%s'" %
12102 (self.op.disk_template, instance.disk_template))
12104 # Release node and resource locks if there are any (they might already have
12105 # been released during disk conversion)
12106 _ReleaseLocks(self, locking.LEVEL_NODE)
12107 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12110 for nic_op, nic_dict in self.op.nics:
12111 if nic_op == constants.DDM_REMOVE:
12112 # remove the last nic
12113 del instance.nics[-1]
12114 result.append(("nic.%d" % len(instance.nics), "remove"))
12115 elif nic_op == constants.DDM_ADD:
12116 # mac and bridge should be set, by now
12117 mac = nic_dict[constants.INIC_MAC]
12118 ip = nic_dict.get(constants.INIC_IP, None)
12119 nicparams = self.nic_pinst[constants.DDM_ADD]
12120 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12121 instance.nics.append(new_nic)
12122 result.append(("nic.%d" % (len(instance.nics) - 1),
12123 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12124 (new_nic.mac, new_nic.ip,
12125 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12126 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12129 for key in (constants.INIC_MAC, constants.INIC_IP):
12130 if key in nic_dict:
12131 setattr(instance.nics[nic_op], key, nic_dict[key])
12132 if nic_op in self.nic_pinst:
12133 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12134 for key, val in nic_dict.iteritems():
12135 result.append(("nic.%s/%d" % (key, nic_op), val))
12138 if self.op.hvparams:
12139 instance.hvparams = self.hv_inst
12140 for key, val in self.op.hvparams.iteritems():
12141 result.append(("hv/%s" % key, val))
12144 if self.op.beparams:
12145 instance.beparams = self.be_inst
12146 for key, val in self.op.beparams.iteritems():
12147 result.append(("be/%s" % key, val))
12150 if self.op.os_name:
12151 instance.os = self.op.os_name
12154 if self.op.osparams:
12155 instance.osparams = self.os_inst
12156 for key, val in self.op.osparams.iteritems():
12157 result.append(("os/%s" % key, val))
12159 # online/offline instance
12160 if self.op.online_inst:
12161 self.cfg.MarkInstanceDown(instance.name)
12162 result.append(("admin_state", constants.ADMINST_DOWN))
12163 if self.op.offline_inst:
12164 self.cfg.MarkInstanceOffline(instance.name)
12165 result.append(("admin_state", constants.ADMINST_OFFLINE))
12167 self.cfg.Update(instance, feedback_fn)
12169 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12170 self.owned_locks(locking.LEVEL_NODE)), \
12171 "All node locks should have been released by now"
12175 _DISK_CONVERSIONS = {
12176 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12177 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12181 class LUInstanceChangeGroup(LogicalUnit):
12182 HPATH = "instance-change-group"
12183 HTYPE = constants.HTYPE_INSTANCE
12186 def ExpandNames(self):
12187 self.share_locks = _ShareAll()
12188 self.needed_locks = {
12189 locking.LEVEL_NODEGROUP: [],
12190 locking.LEVEL_NODE: [],
12193 self._ExpandAndLockInstance()
12195 if self.op.target_groups:
12196 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12197 self.op.target_groups)
12199 self.req_target_uuids = None
12201 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12203 def DeclareLocks(self, level):
12204 if level == locking.LEVEL_NODEGROUP:
12205 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12207 if self.req_target_uuids:
12208 lock_groups = set(self.req_target_uuids)
12210 # Lock all groups used by instance optimistically; this requires going
12211 # via the node before it's locked, requiring verification later on
12212 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12213 lock_groups.update(instance_groups)
12215 # No target groups, need to lock all of them
12216 lock_groups = locking.ALL_SET
12218 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12220 elif level == locking.LEVEL_NODE:
12221 if self.req_target_uuids:
12222 # Lock all nodes used by instances
12223 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12224 self._LockInstancesNodes()
12226 # Lock all nodes in all potential target groups
12227 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12228 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12229 member_nodes = [node_name
12230 for group in lock_groups
12231 for node_name in self.cfg.GetNodeGroup(group).members]
12232 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12234 # Lock all nodes as all groups are potential targets
12235 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12237 def CheckPrereq(self):
12238 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12239 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12240 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12242 assert (self.req_target_uuids is None or
12243 owned_groups.issuperset(self.req_target_uuids))
12244 assert owned_instances == set([self.op.instance_name])
12246 # Get instance information
12247 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12249 # Check if node groups for locked instance are still correct
12250 assert owned_nodes.issuperset(self.instance.all_nodes), \
12251 ("Instance %s's nodes changed while we kept the lock" %
12252 self.op.instance_name)
12254 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12257 if self.req_target_uuids:
12258 # User requested specific target groups
12259 self.target_uuids = self.req_target_uuids
12261 # All groups except those used by the instance are potential targets
12262 self.target_uuids = owned_groups - inst_groups
12264 conflicting_groups = self.target_uuids & inst_groups
12265 if conflicting_groups:
12266 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12267 " used by the instance '%s'" %
12268 (utils.CommaJoin(conflicting_groups),
12269 self.op.instance_name),
12270 errors.ECODE_INVAL)
12272 if not self.target_uuids:
12273 raise errors.OpPrereqError("There are no possible target groups",
12274 errors.ECODE_INVAL)
12276 def BuildHooksEnv(self):
12277 """Build hooks env.
12280 assert self.target_uuids
12283 "TARGET_GROUPS": " ".join(self.target_uuids),
12286 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12290 def BuildHooksNodes(self):
12291 """Build hooks nodes.
12294 mn = self.cfg.GetMasterNode()
12295 return ([mn], [mn])
12297 def Exec(self, feedback_fn):
12298 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12300 assert instances == [self.op.instance_name], "Instance not locked"
12302 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12303 instances=instances, target_groups=list(self.target_uuids))
12305 ial.Run(self.op.iallocator)
12307 if not ial.success:
12308 raise errors.OpPrereqError("Can't compute solution for changing group of"
12309 " instance '%s' using iallocator '%s': %s" %
12310 (self.op.instance_name, self.op.iallocator,
12312 errors.ECODE_NORES)
12314 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12316 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12317 " instance '%s'", len(jobs), self.op.instance_name)
12319 return ResultWithJobs(jobs)
12322 class LUBackupQuery(NoHooksLU):
12323 """Query the exports list
12328 def ExpandNames(self):
12329 self.needed_locks = {}
12330 self.share_locks[locking.LEVEL_NODE] = 1
12331 if not self.op.nodes:
12332 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12334 self.needed_locks[locking.LEVEL_NODE] = \
12335 _GetWantedNodes(self, self.op.nodes)
12337 def Exec(self, feedback_fn):
12338 """Compute the list of all the exported system images.
12341 @return: a dictionary with the structure node->(export-list)
12342 where export-list is a list of the instances exported on
12346 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12347 rpcresult = self.rpc.call_export_list(self.nodes)
12349 for node in rpcresult:
12350 if rpcresult[node].fail_msg:
12351 result[node] = False
12353 result[node] = rpcresult[node].payload
12358 class LUBackupPrepare(NoHooksLU):
12359 """Prepares an instance for an export and returns useful information.
12364 def ExpandNames(self):
12365 self._ExpandAndLockInstance()
12367 def CheckPrereq(self):
12368 """Check prerequisites.
12371 instance_name = self.op.instance_name
12373 self.instance = self.cfg.GetInstanceInfo(instance_name)
12374 assert self.instance is not None, \
12375 "Cannot retrieve locked instance %s" % self.op.instance_name
12376 _CheckNodeOnline(self, self.instance.primary_node)
12378 self._cds = _GetClusterDomainSecret()
12380 def Exec(self, feedback_fn):
12381 """Prepares an instance for an export.
12384 instance = self.instance
12386 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12387 salt = utils.GenerateSecret(8)
12389 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12390 result = self.rpc.call_x509_cert_create(instance.primary_node,
12391 constants.RIE_CERT_VALIDITY)
12392 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12394 (name, cert_pem) = result.payload
12396 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12400 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12401 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12403 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12409 class LUBackupExport(LogicalUnit):
12410 """Export an instance to an image in the cluster.
12413 HPATH = "instance-export"
12414 HTYPE = constants.HTYPE_INSTANCE
12417 def CheckArguments(self):
12418 """Check the arguments.
12421 self.x509_key_name = self.op.x509_key_name
12422 self.dest_x509_ca_pem = self.op.destination_x509_ca
12424 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12425 if not self.x509_key_name:
12426 raise errors.OpPrereqError("Missing X509 key name for encryption",
12427 errors.ECODE_INVAL)
12429 if not self.dest_x509_ca_pem:
12430 raise errors.OpPrereqError("Missing destination X509 CA",
12431 errors.ECODE_INVAL)
12433 def ExpandNames(self):
12434 self._ExpandAndLockInstance()
12436 # Lock all nodes for local exports
12437 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12438 # FIXME: lock only instance primary and destination node
12440 # Sad but true, for now we have do lock all nodes, as we don't know where
12441 # the previous export might be, and in this LU we search for it and
12442 # remove it from its current node. In the future we could fix this by:
12443 # - making a tasklet to search (share-lock all), then create the
12444 # new one, then one to remove, after
12445 # - removing the removal operation altogether
12446 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12448 def DeclareLocks(self, level):
12449 """Last minute lock declaration."""
12450 # All nodes are locked anyway, so nothing to do here.
12452 def BuildHooksEnv(self):
12453 """Build hooks env.
12455 This will run on the master, primary node and target node.
12459 "EXPORT_MODE": self.op.mode,
12460 "EXPORT_NODE": self.op.target_node,
12461 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12462 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12463 # TODO: Generic function for boolean env variables
12464 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12467 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12471 def BuildHooksNodes(self):
12472 """Build hooks nodes.
12475 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12477 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12478 nl.append(self.op.target_node)
12482 def CheckPrereq(self):
12483 """Check prerequisites.
12485 This checks that the instance and node names are valid.
12488 instance_name = self.op.instance_name
12490 self.instance = self.cfg.GetInstanceInfo(instance_name)
12491 assert self.instance is not None, \
12492 "Cannot retrieve locked instance %s" % self.op.instance_name
12493 _CheckNodeOnline(self, self.instance.primary_node)
12495 if (self.op.remove_instance and
12496 self.instance.admin_state == constants.ADMINST_UP and
12497 not self.op.shutdown):
12498 raise errors.OpPrereqError("Can not remove instance without shutting it"
12501 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12502 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12503 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12504 assert self.dst_node is not None
12506 _CheckNodeOnline(self, self.dst_node.name)
12507 _CheckNodeNotDrained(self, self.dst_node.name)
12510 self.dest_disk_info = None
12511 self.dest_x509_ca = None
12513 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12514 self.dst_node = None
12516 if len(self.op.target_node) != len(self.instance.disks):
12517 raise errors.OpPrereqError(("Received destination information for %s"
12518 " disks, but instance %s has %s disks") %
12519 (len(self.op.target_node), instance_name,
12520 len(self.instance.disks)),
12521 errors.ECODE_INVAL)
12523 cds = _GetClusterDomainSecret()
12525 # Check X509 key name
12527 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12528 except (TypeError, ValueError), err:
12529 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12531 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12532 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12533 errors.ECODE_INVAL)
12535 # Load and verify CA
12537 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12538 except OpenSSL.crypto.Error, err:
12539 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12540 (err, ), errors.ECODE_INVAL)
12542 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12543 if errcode is not None:
12544 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12545 (msg, ), errors.ECODE_INVAL)
12547 self.dest_x509_ca = cert
12549 # Verify target information
12551 for idx, disk_data in enumerate(self.op.target_node):
12553 (host, port, magic) = \
12554 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12555 except errors.GenericError, err:
12556 raise errors.OpPrereqError("Target info for disk %s: %s" %
12557 (idx, err), errors.ECODE_INVAL)
12559 disk_info.append((host, port, magic))
12561 assert len(disk_info) == len(self.op.target_node)
12562 self.dest_disk_info = disk_info
12565 raise errors.ProgrammerError("Unhandled export mode %r" %
12568 # instance disk type verification
12569 # TODO: Implement export support for file-based disks
12570 for disk in self.instance.disks:
12571 if disk.dev_type == constants.LD_FILE:
12572 raise errors.OpPrereqError("Export not supported for instances with"
12573 " file-based disks", errors.ECODE_INVAL)
12575 def _CleanupExports(self, feedback_fn):
12576 """Removes exports of current instance from all other nodes.
12578 If an instance in a cluster with nodes A..D was exported to node C, its
12579 exports will be removed from the nodes A, B and D.
12582 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12584 nodelist = self.cfg.GetNodeList()
12585 nodelist.remove(self.dst_node.name)
12587 # on one-node clusters nodelist will be empty after the removal
12588 # if we proceed the backup would be removed because OpBackupQuery
12589 # substitutes an empty list with the full cluster node list.
12590 iname = self.instance.name
12592 feedback_fn("Removing old exports for instance %s" % iname)
12593 exportlist = self.rpc.call_export_list(nodelist)
12594 for node in exportlist:
12595 if exportlist[node].fail_msg:
12597 if iname in exportlist[node].payload:
12598 msg = self.rpc.call_export_remove(node, iname).fail_msg
12600 self.LogWarning("Could not remove older export for instance %s"
12601 " on node %s: %s", iname, node, msg)
12603 def Exec(self, feedback_fn):
12604 """Export an instance to an image in the cluster.
12607 assert self.op.mode in constants.EXPORT_MODES
12609 instance = self.instance
12610 src_node = instance.primary_node
12612 if self.op.shutdown:
12613 # shutdown the instance, but not the disks
12614 feedback_fn("Shutting down instance %s" % instance.name)
12615 result = self.rpc.call_instance_shutdown(src_node, instance,
12616 self.op.shutdown_timeout)
12617 # TODO: Maybe ignore failures if ignore_remove_failures is set
12618 result.Raise("Could not shutdown instance %s on"
12619 " node %s" % (instance.name, src_node))
12621 # set the disks ID correctly since call_instance_start needs the
12622 # correct drbd minor to create the symlinks
12623 for disk in instance.disks:
12624 self.cfg.SetDiskID(disk, src_node)
12626 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12629 # Activate the instance disks if we'exporting a stopped instance
12630 feedback_fn("Activating disks for %s" % instance.name)
12631 _StartInstanceDisks(self, instance, None)
12634 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12637 helper.CreateSnapshots()
12639 if (self.op.shutdown and
12640 instance.admin_state == constants.ADMINST_UP and
12641 not self.op.remove_instance):
12642 assert not activate_disks
12643 feedback_fn("Starting instance %s" % instance.name)
12644 result = self.rpc.call_instance_start(src_node,
12645 (instance, None, None), False)
12646 msg = result.fail_msg
12648 feedback_fn("Failed to start instance: %s" % msg)
12649 _ShutdownInstanceDisks(self, instance)
12650 raise errors.OpExecError("Could not start instance: %s" % msg)
12652 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12653 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12654 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12655 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12656 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12658 (key_name, _, _) = self.x509_key_name
12661 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12664 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12665 key_name, dest_ca_pem,
12670 # Check for backwards compatibility
12671 assert len(dresults) == len(instance.disks)
12672 assert compat.all(isinstance(i, bool) for i in dresults), \
12673 "Not all results are boolean: %r" % dresults
12677 feedback_fn("Deactivating disks for %s" % instance.name)
12678 _ShutdownInstanceDisks(self, instance)
12680 if not (compat.all(dresults) and fin_resu):
12683 failures.append("export finalization")
12684 if not compat.all(dresults):
12685 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12687 failures.append("disk export: disk(s) %s" % fdsk)
12689 raise errors.OpExecError("Export failed, errors in %s" %
12690 utils.CommaJoin(failures))
12692 # At this point, the export was successful, we can cleanup/finish
12694 # Remove instance if requested
12695 if self.op.remove_instance:
12696 feedback_fn("Removing instance %s" % instance.name)
12697 _RemoveInstance(self, feedback_fn, instance,
12698 self.op.ignore_remove_failures)
12700 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12701 self._CleanupExports(feedback_fn)
12703 return fin_resu, dresults
12706 class LUBackupRemove(NoHooksLU):
12707 """Remove exports related to the named instance.
12712 def ExpandNames(self):
12713 self.needed_locks = {}
12714 # We need all nodes to be locked in order for RemoveExport to work, but we
12715 # don't need to lock the instance itself, as nothing will happen to it (and
12716 # we can remove exports also for a removed instance)
12717 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12719 def Exec(self, feedback_fn):
12720 """Remove any export.
12723 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12724 # If the instance was not found we'll try with the name that was passed in.
12725 # This will only work if it was an FQDN, though.
12727 if not instance_name:
12729 instance_name = self.op.instance_name
12731 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12732 exportlist = self.rpc.call_export_list(locked_nodes)
12734 for node in exportlist:
12735 msg = exportlist[node].fail_msg
12737 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12739 if instance_name in exportlist[node].payload:
12741 result = self.rpc.call_export_remove(node, instance_name)
12742 msg = result.fail_msg
12744 logging.error("Could not remove export for instance %s"
12745 " on node %s: %s", instance_name, node, msg)
12747 if fqdn_warn and not found:
12748 feedback_fn("Export not found. If trying to remove an export belonging"
12749 " to a deleted instance please use its Fully Qualified"
12753 class LUGroupAdd(LogicalUnit):
12754 """Logical unit for creating node groups.
12757 HPATH = "group-add"
12758 HTYPE = constants.HTYPE_GROUP
12761 def ExpandNames(self):
12762 # We need the new group's UUID here so that we can create and acquire the
12763 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12764 # that it should not check whether the UUID exists in the configuration.
12765 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12766 self.needed_locks = {}
12767 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12769 def CheckPrereq(self):
12770 """Check prerequisites.
12772 This checks that the given group name is not an existing node group
12777 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12778 except errors.OpPrereqError:
12781 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12782 " node group (UUID: %s)" %
12783 (self.op.group_name, existing_uuid),
12784 errors.ECODE_EXISTS)
12786 if self.op.ndparams:
12787 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12789 if self.op.hv_state:
12790 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12792 self.new_hv_state = None
12794 if self.op.disk_state:
12795 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12797 self.new_disk_state = None
12799 if self.op.diskparams:
12800 for templ in constants.DISK_TEMPLATES:
12801 if templ not in self.op.diskparams:
12802 self.op.diskparams[templ] = {}
12803 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12805 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12807 if self.op.ipolicy:
12808 cluster = self.cfg.GetClusterInfo()
12809 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12810 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12812 def BuildHooksEnv(self):
12813 """Build hooks env.
12817 "GROUP_NAME": self.op.group_name,
12820 def BuildHooksNodes(self):
12821 """Build hooks nodes.
12824 mn = self.cfg.GetMasterNode()
12825 return ([mn], [mn])
12827 def Exec(self, feedback_fn):
12828 """Add the node group to the cluster.
12831 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12832 uuid=self.group_uuid,
12833 alloc_policy=self.op.alloc_policy,
12834 ndparams=self.op.ndparams,
12835 diskparams=self.op.diskparams,
12836 ipolicy=self.op.ipolicy,
12837 hv_state_static=self.new_hv_state,
12838 disk_state_static=self.new_disk_state)
12840 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12841 del self.remove_locks[locking.LEVEL_NODEGROUP]
12844 class LUGroupAssignNodes(NoHooksLU):
12845 """Logical unit for assigning nodes to groups.
12850 def ExpandNames(self):
12851 # These raise errors.OpPrereqError on their own:
12852 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12853 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12855 # We want to lock all the affected nodes and groups. We have readily
12856 # available the list of nodes, and the *destination* group. To gather the
12857 # list of "source" groups, we need to fetch node information later on.
12858 self.needed_locks = {
12859 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12860 locking.LEVEL_NODE: self.op.nodes,
12863 def DeclareLocks(self, level):
12864 if level == locking.LEVEL_NODEGROUP:
12865 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12867 # Try to get all affected nodes' groups without having the group or node
12868 # lock yet. Needs verification later in the code flow.
12869 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12871 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12873 def CheckPrereq(self):
12874 """Check prerequisites.
12877 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12878 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12879 frozenset(self.op.nodes))
12881 expected_locks = (set([self.group_uuid]) |
12882 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12883 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12884 if actual_locks != expected_locks:
12885 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12886 " current groups are '%s', used to be '%s'" %
12887 (utils.CommaJoin(expected_locks),
12888 utils.CommaJoin(actual_locks)))
12890 self.node_data = self.cfg.GetAllNodesInfo()
12891 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12892 instance_data = self.cfg.GetAllInstancesInfo()
12894 if self.group is None:
12895 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12896 (self.op.group_name, self.group_uuid))
12898 (new_splits, previous_splits) = \
12899 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12900 for node in self.op.nodes],
12901 self.node_data, instance_data)
12904 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12906 if not self.op.force:
12907 raise errors.OpExecError("The following instances get split by this"
12908 " change and --force was not given: %s" %
12911 self.LogWarning("This operation will split the following instances: %s",
12914 if previous_splits:
12915 self.LogWarning("In addition, these already-split instances continue"
12916 " to be split across groups: %s",
12917 utils.CommaJoin(utils.NiceSort(previous_splits)))
12919 def Exec(self, feedback_fn):
12920 """Assign nodes to a new group.
12923 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12925 self.cfg.AssignGroupNodes(mods)
12928 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12929 """Check for split instances after a node assignment.
12931 This method considers a series of node assignments as an atomic operation,
12932 and returns information about split instances after applying the set of
12935 In particular, it returns information about newly split instances, and
12936 instances that were already split, and remain so after the change.
12938 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12941 @type changes: list of (node_name, new_group_uuid) pairs.
12942 @param changes: list of node assignments to consider.
12943 @param node_data: a dict with data for all nodes
12944 @param instance_data: a dict with all instances to consider
12945 @rtype: a two-tuple
12946 @return: a list of instances that were previously okay and result split as a
12947 consequence of this change, and a list of instances that were previously
12948 split and this change does not fix.
12951 changed_nodes = dict((node, group) for node, group in changes
12952 if node_data[node].group != group)
12954 all_split_instances = set()
12955 previously_split_instances = set()
12957 def InstanceNodes(instance):
12958 return [instance.primary_node] + list(instance.secondary_nodes)
12960 for inst in instance_data.values():
12961 if inst.disk_template not in constants.DTS_INT_MIRROR:
12964 instance_nodes = InstanceNodes(inst)
12966 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12967 previously_split_instances.add(inst.name)
12969 if len(set(changed_nodes.get(node, node_data[node].group)
12970 for node in instance_nodes)) > 1:
12971 all_split_instances.add(inst.name)
12973 return (list(all_split_instances - previously_split_instances),
12974 list(previously_split_instances & all_split_instances))
12977 class _GroupQuery(_QueryBase):
12978 FIELDS = query.GROUP_FIELDS
12980 def ExpandNames(self, lu):
12981 lu.needed_locks = {}
12983 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12984 self._cluster = lu.cfg.GetClusterInfo()
12985 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12988 self.wanted = [name_to_uuid[name]
12989 for name in utils.NiceSort(name_to_uuid.keys())]
12991 # Accept names to be either names or UUIDs.
12994 all_uuid = frozenset(self._all_groups.keys())
12996 for name in self.names:
12997 if name in all_uuid:
12998 self.wanted.append(name)
12999 elif name in name_to_uuid:
13000 self.wanted.append(name_to_uuid[name])
13002 missing.append(name)
13005 raise errors.OpPrereqError("Some groups do not exist: %s" %
13006 utils.CommaJoin(missing),
13007 errors.ECODE_NOENT)
13009 def DeclareLocks(self, lu, level):
13012 def _GetQueryData(self, lu):
13013 """Computes the list of node groups and their attributes.
13016 do_nodes = query.GQ_NODE in self.requested_data
13017 do_instances = query.GQ_INST in self.requested_data
13019 group_to_nodes = None
13020 group_to_instances = None
13022 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13023 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13024 # latter GetAllInstancesInfo() is not enough, for we have to go through
13025 # instance->node. Hence, we will need to process nodes even if we only need
13026 # instance information.
13027 if do_nodes or do_instances:
13028 all_nodes = lu.cfg.GetAllNodesInfo()
13029 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13032 for node in all_nodes.values():
13033 if node.group in group_to_nodes:
13034 group_to_nodes[node.group].append(node.name)
13035 node_to_group[node.name] = node.group
13038 all_instances = lu.cfg.GetAllInstancesInfo()
13039 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13041 for instance in all_instances.values():
13042 node = instance.primary_node
13043 if node in node_to_group:
13044 group_to_instances[node_to_group[node]].append(instance.name)
13047 # Do not pass on node information if it was not requested.
13048 group_to_nodes = None
13050 return query.GroupQueryData(self._cluster,
13051 [self._all_groups[uuid]
13052 for uuid in self.wanted],
13053 group_to_nodes, group_to_instances)
13056 class LUGroupQuery(NoHooksLU):
13057 """Logical unit for querying node groups.
13062 def CheckArguments(self):
13063 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13064 self.op.output_fields, False)
13066 def ExpandNames(self):
13067 self.gq.ExpandNames(self)
13069 def DeclareLocks(self, level):
13070 self.gq.DeclareLocks(self, level)
13072 def Exec(self, feedback_fn):
13073 return self.gq.OldStyleQuery(self)
13076 class LUGroupSetParams(LogicalUnit):
13077 """Modifies the parameters of a node group.
13080 HPATH = "group-modify"
13081 HTYPE = constants.HTYPE_GROUP
13084 def CheckArguments(self):
13087 self.op.diskparams,
13088 self.op.alloc_policy,
13090 self.op.disk_state,
13094 if all_changes.count(None) == len(all_changes):
13095 raise errors.OpPrereqError("Please pass at least one modification",
13096 errors.ECODE_INVAL)
13098 def ExpandNames(self):
13099 # This raises errors.OpPrereqError on its own:
13100 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13102 self.needed_locks = {
13103 locking.LEVEL_NODEGROUP: [self.group_uuid],
13106 def CheckPrereq(self):
13107 """Check prerequisites.
13110 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13112 if self.group is None:
13113 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13114 (self.op.group_name, self.group_uuid))
13116 if self.op.ndparams:
13117 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13118 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13119 self.new_ndparams = new_ndparams
13121 if self.op.diskparams:
13122 self.new_diskparams = dict()
13123 for templ in constants.DISK_TEMPLATES:
13124 if templ not in self.op.diskparams:
13125 self.op.diskparams[templ] = {}
13126 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13127 self.op.diskparams[templ])
13128 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13129 self.new_diskparams[templ] = new_templ_params
13131 if self.op.hv_state:
13132 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13133 self.group.hv_state_static)
13135 if self.op.disk_state:
13136 self.new_disk_state = \
13137 _MergeAndVerifyDiskState(self.op.disk_state,
13138 self.group.disk_state_static)
13140 if self.op.ipolicy:
13142 for key, value in self.op.ipolicy.iteritems():
13143 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13146 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13147 self.new_ipolicy = g_ipolicy
13148 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13150 def BuildHooksEnv(self):
13151 """Build hooks env.
13155 "GROUP_NAME": self.op.group_name,
13156 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13159 def BuildHooksNodes(self):
13160 """Build hooks nodes.
13163 mn = self.cfg.GetMasterNode()
13164 return ([mn], [mn])
13166 def Exec(self, feedback_fn):
13167 """Modifies the node group.
13172 if self.op.ndparams:
13173 self.group.ndparams = self.new_ndparams
13174 result.append(("ndparams", str(self.group.ndparams)))
13176 if self.op.diskparams:
13177 self.group.diskparams = self.new_diskparams
13178 result.append(("diskparams", str(self.group.diskparams)))
13180 if self.op.alloc_policy:
13181 self.group.alloc_policy = self.op.alloc_policy
13183 if self.op.hv_state:
13184 self.group.hv_state_static = self.new_hv_state
13186 if self.op.disk_state:
13187 self.group.disk_state_static = self.new_disk_state
13189 if self.op.ipolicy:
13190 self.group.ipolicy = self.new_ipolicy
13192 self.cfg.Update(self.group, feedback_fn)
13196 class LUGroupRemove(LogicalUnit):
13197 HPATH = "group-remove"
13198 HTYPE = constants.HTYPE_GROUP
13201 def ExpandNames(self):
13202 # This will raises errors.OpPrereqError on its own:
13203 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13204 self.needed_locks = {
13205 locking.LEVEL_NODEGROUP: [self.group_uuid],
13208 def CheckPrereq(self):
13209 """Check prerequisites.
13211 This checks that the given group name exists as a node group, that is
13212 empty (i.e., contains no nodes), and that is not the last group of the
13216 # Verify that the group is empty.
13217 group_nodes = [node.name
13218 for node in self.cfg.GetAllNodesInfo().values()
13219 if node.group == self.group_uuid]
13222 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13224 (self.op.group_name,
13225 utils.CommaJoin(utils.NiceSort(group_nodes))),
13226 errors.ECODE_STATE)
13228 # Verify the cluster would not be left group-less.
13229 if len(self.cfg.GetNodeGroupList()) == 1:
13230 raise errors.OpPrereqError("Group '%s' is the only group,"
13231 " cannot be removed" %
13232 self.op.group_name,
13233 errors.ECODE_STATE)
13235 def BuildHooksEnv(self):
13236 """Build hooks env.
13240 "GROUP_NAME": self.op.group_name,
13243 def BuildHooksNodes(self):
13244 """Build hooks nodes.
13247 mn = self.cfg.GetMasterNode()
13248 return ([mn], [mn])
13250 def Exec(self, feedback_fn):
13251 """Remove the node group.
13255 self.cfg.RemoveNodeGroup(self.group_uuid)
13256 except errors.ConfigurationError:
13257 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13258 (self.op.group_name, self.group_uuid))
13260 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13263 class LUGroupRename(LogicalUnit):
13264 HPATH = "group-rename"
13265 HTYPE = constants.HTYPE_GROUP
13268 def ExpandNames(self):
13269 # This raises errors.OpPrereqError on its own:
13270 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13272 self.needed_locks = {
13273 locking.LEVEL_NODEGROUP: [self.group_uuid],
13276 def CheckPrereq(self):
13277 """Check prerequisites.
13279 Ensures requested new name is not yet used.
13283 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13284 except errors.OpPrereqError:
13287 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13288 " node group (UUID: %s)" %
13289 (self.op.new_name, new_name_uuid),
13290 errors.ECODE_EXISTS)
13292 def BuildHooksEnv(self):
13293 """Build hooks env.
13297 "OLD_NAME": self.op.group_name,
13298 "NEW_NAME": self.op.new_name,
13301 def BuildHooksNodes(self):
13302 """Build hooks nodes.
13305 mn = self.cfg.GetMasterNode()
13307 all_nodes = self.cfg.GetAllNodesInfo()
13308 all_nodes.pop(mn, None)
13311 run_nodes.extend(node.name for node in all_nodes.values()
13312 if node.group == self.group_uuid)
13314 return (run_nodes, run_nodes)
13316 def Exec(self, feedback_fn):
13317 """Rename the node group.
13320 group = self.cfg.GetNodeGroup(self.group_uuid)
13323 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13324 (self.op.group_name, self.group_uuid))
13326 group.name = self.op.new_name
13327 self.cfg.Update(group, feedback_fn)
13329 return self.op.new_name
13332 class LUGroupEvacuate(LogicalUnit):
13333 HPATH = "group-evacuate"
13334 HTYPE = constants.HTYPE_GROUP
13337 def ExpandNames(self):
13338 # This raises errors.OpPrereqError on its own:
13339 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13341 if self.op.target_groups:
13342 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13343 self.op.target_groups)
13345 self.req_target_uuids = []
13347 if self.group_uuid in self.req_target_uuids:
13348 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13349 " as a target group (targets are %s)" %
13351 utils.CommaJoin(self.req_target_uuids)),
13352 errors.ECODE_INVAL)
13354 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13356 self.share_locks = _ShareAll()
13357 self.needed_locks = {
13358 locking.LEVEL_INSTANCE: [],
13359 locking.LEVEL_NODEGROUP: [],
13360 locking.LEVEL_NODE: [],
13363 def DeclareLocks(self, level):
13364 if level == locking.LEVEL_INSTANCE:
13365 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13367 # Lock instances optimistically, needs verification once node and group
13368 # locks have been acquired
13369 self.needed_locks[locking.LEVEL_INSTANCE] = \
13370 self.cfg.GetNodeGroupInstances(self.group_uuid)
13372 elif level == locking.LEVEL_NODEGROUP:
13373 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13375 if self.req_target_uuids:
13376 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13378 # Lock all groups used by instances optimistically; this requires going
13379 # via the node before it's locked, requiring verification later on
13380 lock_groups.update(group_uuid
13381 for instance_name in
13382 self.owned_locks(locking.LEVEL_INSTANCE)
13384 self.cfg.GetInstanceNodeGroups(instance_name))
13386 # No target groups, need to lock all of them
13387 lock_groups = locking.ALL_SET
13389 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13391 elif level == locking.LEVEL_NODE:
13392 # This will only lock the nodes in the group to be evacuated which
13393 # contain actual instances
13394 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13395 self._LockInstancesNodes()
13397 # Lock all nodes in group to be evacuated and target groups
13398 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13399 assert self.group_uuid in owned_groups
13400 member_nodes = [node_name
13401 for group in owned_groups
13402 for node_name in self.cfg.GetNodeGroup(group).members]
13403 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13405 def CheckPrereq(self):
13406 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13407 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13408 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13410 assert owned_groups.issuperset(self.req_target_uuids)
13411 assert self.group_uuid in owned_groups
13413 # Check if locked instances are still correct
13414 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13416 # Get instance information
13417 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13419 # Check if node groups for locked instances are still correct
13420 for instance_name in owned_instances:
13421 inst = self.instances[instance_name]
13422 assert owned_nodes.issuperset(inst.all_nodes), \
13423 "Instance %s's nodes changed while we kept the lock" % instance_name
13425 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13428 assert self.group_uuid in inst_groups, \
13429 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13431 if self.req_target_uuids:
13432 # User requested specific target groups
13433 self.target_uuids = self.req_target_uuids
13435 # All groups except the one to be evacuated are potential targets
13436 self.target_uuids = [group_uuid for group_uuid in owned_groups
13437 if group_uuid != self.group_uuid]
13439 if not self.target_uuids:
13440 raise errors.OpPrereqError("There are no possible target groups",
13441 errors.ECODE_INVAL)
13443 def BuildHooksEnv(self):
13444 """Build hooks env.
13448 "GROUP_NAME": self.op.group_name,
13449 "TARGET_GROUPS": " ".join(self.target_uuids),
13452 def BuildHooksNodes(self):
13453 """Build hooks nodes.
13456 mn = self.cfg.GetMasterNode()
13458 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13460 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13462 return (run_nodes, run_nodes)
13464 def Exec(self, feedback_fn):
13465 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13467 assert self.group_uuid not in self.target_uuids
13469 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13470 instances=instances, target_groups=self.target_uuids)
13472 ial.Run(self.op.iallocator)
13474 if not ial.success:
13475 raise errors.OpPrereqError("Can't compute group evacuation using"
13476 " iallocator '%s': %s" %
13477 (self.op.iallocator, ial.info),
13478 errors.ECODE_NORES)
13480 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13482 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13483 len(jobs), self.op.group_name)
13485 return ResultWithJobs(jobs)
13488 class TagsLU(NoHooksLU): # pylint: disable=W0223
13489 """Generic tags LU.
13491 This is an abstract class which is the parent of all the other tags LUs.
13494 def ExpandNames(self):
13495 self.group_uuid = None
13496 self.needed_locks = {}
13497 if self.op.kind == constants.TAG_NODE:
13498 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13499 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13500 elif self.op.kind == constants.TAG_INSTANCE:
13501 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13502 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13503 elif self.op.kind == constants.TAG_NODEGROUP:
13504 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13506 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13507 # not possible to acquire the BGL based on opcode parameters)
13509 def CheckPrereq(self):
13510 """Check prerequisites.
13513 if self.op.kind == constants.TAG_CLUSTER:
13514 self.target = self.cfg.GetClusterInfo()
13515 elif self.op.kind == constants.TAG_NODE:
13516 self.target = self.cfg.GetNodeInfo(self.op.name)
13517 elif self.op.kind == constants.TAG_INSTANCE:
13518 self.target = self.cfg.GetInstanceInfo(self.op.name)
13519 elif self.op.kind == constants.TAG_NODEGROUP:
13520 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13522 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13523 str(self.op.kind), errors.ECODE_INVAL)
13526 class LUTagsGet(TagsLU):
13527 """Returns the tags of a given object.
13532 def ExpandNames(self):
13533 TagsLU.ExpandNames(self)
13535 # Share locks as this is only a read operation
13536 self.share_locks = _ShareAll()
13538 def Exec(self, feedback_fn):
13539 """Returns the tag list.
13542 return list(self.target.GetTags())
13545 class LUTagsSearch(NoHooksLU):
13546 """Searches the tags for a given pattern.
13551 def ExpandNames(self):
13552 self.needed_locks = {}
13554 def CheckPrereq(self):
13555 """Check prerequisites.
13557 This checks the pattern passed for validity by compiling it.
13561 self.re = re.compile(self.op.pattern)
13562 except re.error, err:
13563 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13564 (self.op.pattern, err), errors.ECODE_INVAL)
13566 def Exec(self, feedback_fn):
13567 """Returns the tag list.
13571 tgts = [("/cluster", cfg.GetClusterInfo())]
13572 ilist = cfg.GetAllInstancesInfo().values()
13573 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13574 nlist = cfg.GetAllNodesInfo().values()
13575 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13576 tgts.extend(("/nodegroup/%s" % n.name, n)
13577 for n in cfg.GetAllNodeGroupsInfo().values())
13579 for path, target in tgts:
13580 for tag in target.GetTags():
13581 if self.re.search(tag):
13582 results.append((path, tag))
13586 class LUTagsSet(TagsLU):
13587 """Sets a tag on a given object.
13592 def CheckPrereq(self):
13593 """Check prerequisites.
13595 This checks the type and length of the tag name and value.
13598 TagsLU.CheckPrereq(self)
13599 for tag in self.op.tags:
13600 objects.TaggableObject.ValidateTag(tag)
13602 def Exec(self, feedback_fn):
13607 for tag in self.op.tags:
13608 self.target.AddTag(tag)
13609 except errors.TagError, err:
13610 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13611 self.cfg.Update(self.target, feedback_fn)
13614 class LUTagsDel(TagsLU):
13615 """Delete a list of tags from a given object.
13620 def CheckPrereq(self):
13621 """Check prerequisites.
13623 This checks that we have the given tag.
13626 TagsLU.CheckPrereq(self)
13627 for tag in self.op.tags:
13628 objects.TaggableObject.ValidateTag(tag)
13629 del_tags = frozenset(self.op.tags)
13630 cur_tags = self.target.GetTags()
13632 diff_tags = del_tags - cur_tags
13634 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13635 raise errors.OpPrereqError("Tag(s) %s not found" %
13636 (utils.CommaJoin(diff_names), ),
13637 errors.ECODE_NOENT)
13639 def Exec(self, feedback_fn):
13640 """Remove the tag from the object.
13643 for tag in self.op.tags:
13644 self.target.RemoveTag(tag)
13645 self.cfg.Update(self.target, feedback_fn)
13648 class LUTestDelay(NoHooksLU):
13649 """Sleep for a specified amount of time.
13651 This LU sleeps on the master and/or nodes for a specified amount of
13657 def ExpandNames(self):
13658 """Expand names and set required locks.
13660 This expands the node list, if any.
13663 self.needed_locks = {}
13664 if self.op.on_nodes:
13665 # _GetWantedNodes can be used here, but is not always appropriate to use
13666 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13667 # more information.
13668 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13669 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13671 def _TestDelay(self):
13672 """Do the actual sleep.
13675 if self.op.on_master:
13676 if not utils.TestDelay(self.op.duration):
13677 raise errors.OpExecError("Error during master delay test")
13678 if self.op.on_nodes:
13679 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13680 for node, node_result in result.items():
13681 node_result.Raise("Failure during rpc call to node %s" % node)
13683 def Exec(self, feedback_fn):
13684 """Execute the test delay opcode, with the wanted repetitions.
13687 if self.op.repeat == 0:
13690 top_value = self.op.repeat - 1
13691 for i in range(self.op.repeat):
13692 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13696 class LUTestJqueue(NoHooksLU):
13697 """Utility LU to test some aspects of the job queue.
13702 # Must be lower than default timeout for WaitForJobChange to see whether it
13703 # notices changed jobs
13704 _CLIENT_CONNECT_TIMEOUT = 20.0
13705 _CLIENT_CONFIRM_TIMEOUT = 60.0
13708 def _NotifyUsingSocket(cls, cb, errcls):
13709 """Opens a Unix socket and waits for another program to connect.
13712 @param cb: Callback to send socket name to client
13713 @type errcls: class
13714 @param errcls: Exception class to use for errors
13717 # Using a temporary directory as there's no easy way to create temporary
13718 # sockets without writing a custom loop around tempfile.mktemp and
13720 tmpdir = tempfile.mkdtemp()
13722 tmpsock = utils.PathJoin(tmpdir, "sock")
13724 logging.debug("Creating temporary socket at %s", tmpsock)
13725 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13730 # Send details to client
13733 # Wait for client to connect before continuing
13734 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13736 (conn, _) = sock.accept()
13737 except socket.error, err:
13738 raise errcls("Client didn't connect in time (%s)" % err)
13742 # Remove as soon as client is connected
13743 shutil.rmtree(tmpdir)
13745 # Wait for client to close
13748 # pylint: disable=E1101
13749 # Instance of '_socketobject' has no ... member
13750 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13752 except socket.error, err:
13753 raise errcls("Client failed to confirm notification (%s)" % err)
13757 def _SendNotification(self, test, arg, sockname):
13758 """Sends a notification to the client.
13761 @param test: Test name
13762 @param arg: Test argument (depends on test)
13763 @type sockname: string
13764 @param sockname: Socket path
13767 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13769 def _Notify(self, prereq, test, arg):
13770 """Notifies the client of a test.
13773 @param prereq: Whether this is a prereq-phase test
13775 @param test: Test name
13776 @param arg: Test argument (depends on test)
13780 errcls = errors.OpPrereqError
13782 errcls = errors.OpExecError
13784 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13788 def CheckArguments(self):
13789 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13790 self.expandnames_calls = 0
13792 def ExpandNames(self):
13793 checkargs_calls = getattr(self, "checkargs_calls", 0)
13794 if checkargs_calls < 1:
13795 raise errors.ProgrammerError("CheckArguments was not called")
13797 self.expandnames_calls += 1
13799 if self.op.notify_waitlock:
13800 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13802 self.LogInfo("Expanding names")
13804 # Get lock on master node (just to get a lock, not for a particular reason)
13805 self.needed_locks = {
13806 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13809 def Exec(self, feedback_fn):
13810 if self.expandnames_calls < 1:
13811 raise errors.ProgrammerError("ExpandNames was not called")
13813 if self.op.notify_exec:
13814 self._Notify(False, constants.JQT_EXEC, None)
13816 self.LogInfo("Executing")
13818 if self.op.log_messages:
13819 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13820 for idx, msg in enumerate(self.op.log_messages):
13821 self.LogInfo("Sending log message %s", idx + 1)
13822 feedback_fn(constants.JQT_MSGPREFIX + msg)
13823 # Report how many test messages have been sent
13824 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13827 raise errors.OpExecError("Opcode failure was requested")
13832 class IAllocator(object):
13833 """IAllocator framework.
13835 An IAllocator instance has three sets of attributes:
13836 - cfg that is needed to query the cluster
13837 - input data (all members of the _KEYS class attribute are required)
13838 - four buffer attributes (in|out_data|text), that represent the
13839 input (to the external script) in text and data structure format,
13840 and the output from it, again in two formats
13841 - the result variables from the script (success, info, nodes) for
13845 # pylint: disable=R0902
13846 # lots of instance attributes
13848 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13850 self.rpc = rpc_runner
13851 # init buffer variables
13852 self.in_text = self.out_text = self.in_data = self.out_data = None
13853 # init all input fields so that pylint is happy
13855 self.memory = self.disks = self.disk_template = None
13856 self.os = self.tags = self.nics = self.vcpus = None
13857 self.hypervisor = None
13858 self.relocate_from = None
13860 self.instances = None
13861 self.evac_mode = None
13862 self.target_groups = []
13864 self.required_nodes = None
13865 # init result fields
13866 self.success = self.info = self.result = None
13869 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13871 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13872 " IAllocator" % self.mode)
13874 keyset = [n for (n, _) in keydata]
13877 if key not in keyset:
13878 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13879 " IAllocator" % key)
13880 setattr(self, key, kwargs[key])
13883 if key not in kwargs:
13884 raise errors.ProgrammerError("Missing input parameter '%s' to"
13885 " IAllocator" % key)
13886 self._BuildInputData(compat.partial(fn, self), keydata)
13888 def _ComputeClusterData(self):
13889 """Compute the generic allocator input data.
13891 This is the data that is independent of the actual operation.
13895 cluster_info = cfg.GetClusterInfo()
13898 "version": constants.IALLOCATOR_VERSION,
13899 "cluster_name": cfg.GetClusterName(),
13900 "cluster_tags": list(cluster_info.GetTags()),
13901 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13902 # we don't have job IDs
13904 ninfo = cfg.GetAllNodesInfo()
13905 iinfo = cfg.GetAllInstancesInfo().values()
13906 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13909 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13911 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13912 hypervisor_name = self.hypervisor
13913 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13914 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13916 hypervisor_name = cluster_info.primary_hypervisor
13918 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13921 self.rpc.call_all_instances_info(node_list,
13922 cluster_info.enabled_hypervisors)
13924 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13926 config_ndata = self._ComputeBasicNodeData(ninfo)
13927 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13928 i_list, config_ndata)
13929 assert len(data["nodes"]) == len(ninfo), \
13930 "Incomplete node data computed"
13932 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13934 self.in_data = data
13937 def _ComputeNodeGroupData(cfg):
13938 """Compute node groups data.
13941 ng = dict((guuid, {
13942 "name": gdata.name,
13943 "alloc_policy": gdata.alloc_policy,
13945 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13950 def _ComputeBasicNodeData(node_cfg):
13951 """Compute global node data.
13954 @returns: a dict of name: (node dict, node config)
13957 # fill in static (config-based) values
13958 node_results = dict((ninfo.name, {
13959 "tags": list(ninfo.GetTags()),
13960 "primary_ip": ninfo.primary_ip,
13961 "secondary_ip": ninfo.secondary_ip,
13962 "offline": ninfo.offline,
13963 "drained": ninfo.drained,
13964 "master_candidate": ninfo.master_candidate,
13965 "group": ninfo.group,
13966 "master_capable": ninfo.master_capable,
13967 "vm_capable": ninfo.vm_capable,
13969 for ninfo in node_cfg.values())
13971 return node_results
13974 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13976 """Compute global node data.
13978 @param node_results: the basic node structures as filled from the config
13981 #TODO(dynmem): compute the right data on MAX and MIN memory
13982 # make a copy of the current dict
13983 node_results = dict(node_results)
13984 for nname, nresult in node_data.items():
13985 assert nname in node_results, "Missing basic data for node %s" % nname
13986 ninfo = node_cfg[nname]
13988 if not (ninfo.offline or ninfo.drained):
13989 nresult.Raise("Can't get data for node %s" % nname)
13990 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13992 remote_info = _MakeLegacyNodeInfo(nresult.payload)
13994 for attr in ["memory_total", "memory_free", "memory_dom0",
13995 "vg_size", "vg_free", "cpu_total"]:
13996 if attr not in remote_info:
13997 raise errors.OpExecError("Node '%s' didn't return attribute"
13998 " '%s'" % (nname, attr))
13999 if not isinstance(remote_info[attr], int):
14000 raise errors.OpExecError("Node '%s' returned invalid value"
14002 (nname, attr, remote_info[attr]))
14003 # compute memory used by primary instances
14004 i_p_mem = i_p_up_mem = 0
14005 for iinfo, beinfo in i_list:
14006 if iinfo.primary_node == nname:
14007 i_p_mem += beinfo[constants.BE_MAXMEM]
14008 if iinfo.name not in node_iinfo[nname].payload:
14011 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14012 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14013 remote_info["memory_free"] -= max(0, i_mem_diff)
14015 if iinfo.admin_state == constants.ADMINST_UP:
14016 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14018 # compute memory used by instances
14020 "total_memory": remote_info["memory_total"],
14021 "reserved_memory": remote_info["memory_dom0"],
14022 "free_memory": remote_info["memory_free"],
14023 "total_disk": remote_info["vg_size"],
14024 "free_disk": remote_info["vg_free"],
14025 "total_cpus": remote_info["cpu_total"],
14026 "i_pri_memory": i_p_mem,
14027 "i_pri_up_memory": i_p_up_mem,
14029 pnr_dyn.update(node_results[nname])
14030 node_results[nname] = pnr_dyn
14032 return node_results
14035 def _ComputeInstanceData(cluster_info, i_list):
14036 """Compute global instance data.
14040 for iinfo, beinfo in i_list:
14042 for nic in iinfo.nics:
14043 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14047 "mode": filled_params[constants.NIC_MODE],
14048 "link": filled_params[constants.NIC_LINK],
14050 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14051 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14052 nic_data.append(nic_dict)
14054 "tags": list(iinfo.GetTags()),
14055 "admin_state": iinfo.admin_state,
14056 "vcpus": beinfo[constants.BE_VCPUS],
14057 "memory": beinfo[constants.BE_MAXMEM],
14059 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14061 "disks": [{constants.IDISK_SIZE: dsk.size,
14062 constants.IDISK_MODE: dsk.mode}
14063 for dsk in iinfo.disks],
14064 "disk_template": iinfo.disk_template,
14065 "hypervisor": iinfo.hypervisor,
14067 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14069 instance_data[iinfo.name] = pir
14071 return instance_data
14073 def _AddNewInstance(self):
14074 """Add new instance data to allocator structure.
14076 This in combination with _AllocatorGetClusterData will create the
14077 correct structure needed as input for the allocator.
14079 The checks for the completeness of the opcode must have already been
14083 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14085 if self.disk_template in constants.DTS_INT_MIRROR:
14086 self.required_nodes = 2
14088 self.required_nodes = 1
14092 "disk_template": self.disk_template,
14095 "vcpus": self.vcpus,
14096 "memory": self.memory,
14097 "disks": self.disks,
14098 "disk_space_total": disk_space,
14100 "required_nodes": self.required_nodes,
14101 "hypervisor": self.hypervisor,
14106 def _AddRelocateInstance(self):
14107 """Add relocate instance data to allocator structure.
14109 This in combination with _IAllocatorGetClusterData will create the
14110 correct structure needed as input for the allocator.
14112 The checks for the completeness of the opcode must have already been
14116 instance = self.cfg.GetInstanceInfo(self.name)
14117 if instance is None:
14118 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14119 " IAllocator" % self.name)
14121 if instance.disk_template not in constants.DTS_MIRRORED:
14122 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14123 errors.ECODE_INVAL)
14125 if instance.disk_template in constants.DTS_INT_MIRROR and \
14126 len(instance.secondary_nodes) != 1:
14127 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14128 errors.ECODE_STATE)
14130 self.required_nodes = 1
14131 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14132 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14136 "disk_space_total": disk_space,
14137 "required_nodes": self.required_nodes,
14138 "relocate_from": self.relocate_from,
14142 def _AddNodeEvacuate(self):
14143 """Get data for node-evacuate requests.
14147 "instances": self.instances,
14148 "evac_mode": self.evac_mode,
14151 def _AddChangeGroup(self):
14152 """Get data for node-evacuate requests.
14156 "instances": self.instances,
14157 "target_groups": self.target_groups,
14160 def _BuildInputData(self, fn, keydata):
14161 """Build input data structures.
14164 self._ComputeClusterData()
14167 request["type"] = self.mode
14168 for keyname, keytype in keydata:
14169 if keyname not in request:
14170 raise errors.ProgrammerError("Request parameter %s is missing" %
14172 val = request[keyname]
14173 if not keytype(val):
14174 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14175 " validation, value %s, expected"
14176 " type %s" % (keyname, val, keytype))
14177 self.in_data["request"] = request
14179 self.in_text = serializer.Dump(self.in_data)
14181 _STRING_LIST = ht.TListOf(ht.TString)
14182 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14183 # pylint: disable=E1101
14184 # Class '...' has no 'OP_ID' member
14185 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14186 opcodes.OpInstanceMigrate.OP_ID,
14187 opcodes.OpInstanceReplaceDisks.OP_ID])
14191 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14192 ht.TItems([ht.TNonEmptyString,
14193 ht.TNonEmptyString,
14194 ht.TListOf(ht.TNonEmptyString),
14197 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14198 ht.TItems([ht.TNonEmptyString,
14201 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14202 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14205 constants.IALLOCATOR_MODE_ALLOC:
14208 ("name", ht.TString),
14209 ("memory", ht.TInt),
14210 ("disks", ht.TListOf(ht.TDict)),
14211 ("disk_template", ht.TString),
14212 ("os", ht.TString),
14213 ("tags", _STRING_LIST),
14214 ("nics", ht.TListOf(ht.TDict)),
14215 ("vcpus", ht.TInt),
14216 ("hypervisor", ht.TString),
14218 constants.IALLOCATOR_MODE_RELOC:
14219 (_AddRelocateInstance,
14220 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14222 constants.IALLOCATOR_MODE_NODE_EVAC:
14223 (_AddNodeEvacuate, [
14224 ("instances", _STRING_LIST),
14225 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14227 constants.IALLOCATOR_MODE_CHG_GROUP:
14228 (_AddChangeGroup, [
14229 ("instances", _STRING_LIST),
14230 ("target_groups", _STRING_LIST),
14234 def Run(self, name, validate=True, call_fn=None):
14235 """Run an instance allocator and return the results.
14238 if call_fn is None:
14239 call_fn = self.rpc.call_iallocator_runner
14241 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14242 result.Raise("Failure while running the iallocator script")
14244 self.out_text = result.payload
14246 self._ValidateResult()
14248 def _ValidateResult(self):
14249 """Process the allocator results.
14251 This will process and if successful save the result in
14252 self.out_data and the other parameters.
14256 rdict = serializer.Load(self.out_text)
14257 except Exception, err:
14258 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14260 if not isinstance(rdict, dict):
14261 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14263 # TODO: remove backwards compatiblity in later versions
14264 if "nodes" in rdict and "result" not in rdict:
14265 rdict["result"] = rdict["nodes"]
14268 for key in "success", "info", "result":
14269 if key not in rdict:
14270 raise errors.OpExecError("Can't parse iallocator results:"
14271 " missing key '%s'" % key)
14272 setattr(self, key, rdict[key])
14274 if not self._result_check(self.result):
14275 raise errors.OpExecError("Iallocator returned invalid result,"
14276 " expected %s, got %s" %
14277 (self._result_check, self.result),
14278 errors.ECODE_INVAL)
14280 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14281 assert self.relocate_from is not None
14282 assert self.required_nodes == 1
14284 node2group = dict((name, ndata["group"])
14285 for (name, ndata) in self.in_data["nodes"].items())
14287 fn = compat.partial(self._NodesToGroups, node2group,
14288 self.in_data["nodegroups"])
14290 instance = self.cfg.GetInstanceInfo(self.name)
14291 request_groups = fn(self.relocate_from + [instance.primary_node])
14292 result_groups = fn(rdict["result"] + [instance.primary_node])
14294 if self.success and not set(result_groups).issubset(request_groups):
14295 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14296 " differ from original groups (%s)" %
14297 (utils.CommaJoin(result_groups),
14298 utils.CommaJoin(request_groups)))
14300 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14301 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14303 self.out_data = rdict
14306 def _NodesToGroups(node2group, groups, nodes):
14307 """Returns a list of unique group names for a list of nodes.
14309 @type node2group: dict
14310 @param node2group: Map from node name to group UUID
14312 @param groups: Group information
14314 @param nodes: Node names
14321 group_uuid = node2group[node]
14323 # Ignore unknown node
14327 group = groups[group_uuid]
14329 # Can't find group, let's use UUID
14330 group_name = group_uuid
14332 group_name = group["name"]
14334 result.add(group_name)
14336 return sorted(result)
14339 class LUTestAllocator(NoHooksLU):
14340 """Run allocator tests.
14342 This LU runs the allocator tests
14345 def CheckPrereq(self):
14346 """Check prerequisites.
14348 This checks the opcode parameters depending on the director and mode test.
14351 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14352 for attr in ["memory", "disks", "disk_template",
14353 "os", "tags", "nics", "vcpus"]:
14354 if not hasattr(self.op, attr):
14355 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14356 attr, errors.ECODE_INVAL)
14357 iname = self.cfg.ExpandInstanceName(self.op.name)
14358 if iname is not None:
14359 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14360 iname, errors.ECODE_EXISTS)
14361 if not isinstance(self.op.nics, list):
14362 raise errors.OpPrereqError("Invalid parameter 'nics'",
14363 errors.ECODE_INVAL)
14364 if not isinstance(self.op.disks, list):
14365 raise errors.OpPrereqError("Invalid parameter 'disks'",
14366 errors.ECODE_INVAL)
14367 for row in self.op.disks:
14368 if (not isinstance(row, dict) or
14369 constants.IDISK_SIZE not in row or
14370 not isinstance(row[constants.IDISK_SIZE], int) or
14371 constants.IDISK_MODE not in row or
14372 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14373 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14374 " parameter", errors.ECODE_INVAL)
14375 if self.op.hypervisor is None:
14376 self.op.hypervisor = self.cfg.GetHypervisorType()
14377 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14378 fname = _ExpandInstanceName(self.cfg, self.op.name)
14379 self.op.name = fname
14380 self.relocate_from = \
14381 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14382 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14383 constants.IALLOCATOR_MODE_NODE_EVAC):
14384 if not self.op.instances:
14385 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14386 self.op.instances = _GetWantedInstances(self, self.op.instances)
14388 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14389 self.op.mode, errors.ECODE_INVAL)
14391 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14392 if self.op.allocator is None:
14393 raise errors.OpPrereqError("Missing allocator name",
14394 errors.ECODE_INVAL)
14395 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14396 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14397 self.op.direction, errors.ECODE_INVAL)
14399 def Exec(self, feedback_fn):
14400 """Run the allocator test.
14403 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14404 ial = IAllocator(self.cfg, self.rpc,
14407 memory=self.op.memory,
14408 disks=self.op.disks,
14409 disk_template=self.op.disk_template,
14413 vcpus=self.op.vcpus,
14414 hypervisor=self.op.hypervisor,
14416 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14417 ial = IAllocator(self.cfg, self.rpc,
14420 relocate_from=list(self.relocate_from),
14422 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14423 ial = IAllocator(self.cfg, self.rpc,
14425 instances=self.op.instances,
14426 target_groups=self.op.target_groups)
14427 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14428 ial = IAllocator(self.cfg, self.rpc,
14430 instances=self.op.instances,
14431 evac_mode=self.op.evac_mode)
14433 raise errors.ProgrammerError("Uncatched mode %s in"
14434 " LUTestAllocator.Exec", self.op.mode)
14436 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14437 result = ial.in_text
14439 ial.Run(self.op.allocator, validate=False)
14440 result = ial.out_text
14444 #: Query type implementations
14446 constants.QR_INSTANCE: _InstanceQuery,
14447 constants.QR_NODE: _NodeQuery,
14448 constants.QR_GROUP: _GroupQuery,
14449 constants.QR_OS: _OsQuery,
14452 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14455 def _GetQueryImplementation(name):
14456 """Returns the implemtnation for a query type.
14458 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14462 return _QUERY_IMPL[name]
14464 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14465 errors.ECODE_INVAL)