4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.MAX_ISPECS].get(name, value)
1032 min_v = ipolicy[constants.MIN_ISPECS].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ExpandItemName(fn, name, kind):
1040 """Expand an item name.
1042 @param fn: the function to use for expansion
1043 @param name: requested item name
1044 @param kind: text description ('Node' or 'Instance')
1045 @return: the resolved (full) name
1046 @raise errors.OpPrereqError: if the item is not found
1049 full_name = fn(name)
1050 if full_name is None:
1051 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1056 def _ExpandNodeName(cfg, name):
1057 """Wrapper over L{_ExpandItemName} for nodes."""
1058 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1061 def _ExpandInstanceName(cfg, name):
1062 """Wrapper over L{_ExpandItemName} for instance."""
1063 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1066 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1067 minmem, maxmem, vcpus, nics, disk_template, disks,
1068 bep, hvp, hypervisor_name, tags):
1069 """Builds instance related env variables for hooks
1071 This builds the hook environment from individual variables.
1074 @param name: the name of the instance
1075 @type primary_node: string
1076 @param primary_node: the name of the instance's primary node
1077 @type secondary_nodes: list
1078 @param secondary_nodes: list of secondary nodes as strings
1079 @type os_type: string
1080 @param os_type: the name of the instance's OS
1081 @type status: string
1082 @param status: the desired status of the instance
1083 @type minmem: string
1084 @param minmem: the minimum memory size of the instance
1085 @type maxmem: string
1086 @param maxmem: the maximum memory size of the instance
1088 @param vcpus: the count of VCPUs the instance has
1090 @param nics: list of tuples (ip, mac, mode, link) representing
1091 the NICs the instance has
1092 @type disk_template: string
1093 @param disk_template: the disk template of the instance
1095 @param disks: the list of (size, mode) pairs
1097 @param bep: the backend parameters for the instance
1099 @param hvp: the hypervisor parameters for the instance
1100 @type hypervisor_name: string
1101 @param hypervisor_name: the hypervisor for the instance
1103 @param tags: list of instance tags as strings
1105 @return: the hook environment for this instance
1110 "INSTANCE_NAME": name,
1111 "INSTANCE_PRIMARY": primary_node,
1112 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1113 "INSTANCE_OS_TYPE": os_type,
1114 "INSTANCE_STATUS": status,
1115 "INSTANCE_MINMEM": minmem,
1116 "INSTANCE_MAXMEM": maxmem,
1117 # TODO(2.7) remove deprecated "memory" value
1118 "INSTANCE_MEMORY": maxmem,
1119 "INSTANCE_VCPUS": vcpus,
1120 "INSTANCE_DISK_TEMPLATE": disk_template,
1121 "INSTANCE_HYPERVISOR": hypervisor_name,
1124 nic_count = len(nics)
1125 for idx, (ip, mac, mode, link) in enumerate(nics):
1128 env["INSTANCE_NIC%d_IP" % idx] = ip
1129 env["INSTANCE_NIC%d_MAC" % idx] = mac
1130 env["INSTANCE_NIC%d_MODE" % idx] = mode
1131 env["INSTANCE_NIC%d_LINK" % idx] = link
1132 if mode == constants.NIC_MODE_BRIDGED:
1133 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1137 env["INSTANCE_NIC_COUNT"] = nic_count
1140 disk_count = len(disks)
1141 for idx, (size, mode) in enumerate(disks):
1142 env["INSTANCE_DISK%d_SIZE" % idx] = size
1143 env["INSTANCE_DISK%d_MODE" % idx] = mode
1147 env["INSTANCE_DISK_COUNT"] = disk_count
1152 env["INSTANCE_TAGS"] = " ".join(tags)
1154 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1155 for key, value in source.items():
1156 env["INSTANCE_%s_%s" % (kind, key)] = value
1161 def _NICListToTuple(lu, nics):
1162 """Build a list of nic information tuples.
1164 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1165 value in LUInstanceQueryData.
1167 @type lu: L{LogicalUnit}
1168 @param lu: the logical unit on whose behalf we execute
1169 @type nics: list of L{objects.NIC}
1170 @param nics: list of nics to convert to hooks tuples
1174 cluster = lu.cfg.GetClusterInfo()
1178 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1179 mode = filled_params[constants.NIC_MODE]
1180 link = filled_params[constants.NIC_LINK]
1181 hooks_nics.append((ip, mac, mode, link))
1185 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1186 """Builds instance related env variables for hooks from an object.
1188 @type lu: L{LogicalUnit}
1189 @param lu: the logical unit on whose behalf we execute
1190 @type instance: L{objects.Instance}
1191 @param instance: the instance for which we should build the
1193 @type override: dict
1194 @param override: dictionary with key/values that will override
1197 @return: the hook environment dictionary
1200 cluster = lu.cfg.GetClusterInfo()
1201 bep = cluster.FillBE(instance)
1202 hvp = cluster.FillHV(instance)
1204 "name": instance.name,
1205 "primary_node": instance.primary_node,
1206 "secondary_nodes": instance.secondary_nodes,
1207 "os_type": instance.os,
1208 "status": instance.admin_state,
1209 "maxmem": bep[constants.BE_MAXMEM],
1210 "minmem": bep[constants.BE_MINMEM],
1211 "vcpus": bep[constants.BE_VCPUS],
1212 "nics": _NICListToTuple(lu, instance.nics),
1213 "disk_template": instance.disk_template,
1214 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1217 "hypervisor_name": instance.hypervisor,
1218 "tags": instance.tags,
1221 args.update(override)
1222 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1225 def _AdjustCandidatePool(lu, exceptions):
1226 """Adjust the candidate pool after node operations.
1229 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1231 lu.LogInfo("Promoted nodes to master candidate role: %s",
1232 utils.CommaJoin(node.name for node in mod_list))
1233 for name in mod_list:
1234 lu.context.ReaddNode(name)
1235 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1237 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1241 def _DecideSelfPromotion(lu, exceptions=None):
1242 """Decide whether I should promote myself as a master candidate.
1245 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1246 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1247 # the new node will increase mc_max with one, so:
1248 mc_should = min(mc_should + 1, cp_size)
1249 return mc_now < mc_should
1252 def _CalculateGroupIPolicy(cfg, group):
1253 """Calculate instance policy for group.
1256 cluster = cfg.GetClusterInfo()
1257 return cluster.SimpleFillIPolicy(group.ipolicy)
1260 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1261 """Check that the brigdes needed by a list of nics exist.
1264 cluster = lu.cfg.GetClusterInfo()
1265 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1266 brlist = [params[constants.NIC_LINK] for params in paramslist
1267 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1269 result = lu.rpc.call_bridges_exist(target_node, brlist)
1270 result.Raise("Error checking bridges on destination node '%s'" %
1271 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1274 def _CheckInstanceBridgesExist(lu, instance, node=None):
1275 """Check that the brigdes needed by an instance exist.
1279 node = instance.primary_node
1280 _CheckNicsBridgesExist(lu, instance.nics, node)
1283 def _CheckOSVariant(os_obj, name):
1284 """Check whether an OS name conforms to the os variants specification.
1286 @type os_obj: L{objects.OS}
1287 @param os_obj: OS object to check
1289 @param name: OS name passed by the user, to check for validity
1292 variant = objects.OS.GetVariant(name)
1293 if not os_obj.supported_variants:
1295 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1296 " passed)" % (os_obj.name, variant),
1300 raise errors.OpPrereqError("OS name must include a variant",
1303 if variant not in os_obj.supported_variants:
1304 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1307 def _GetNodeInstancesInner(cfg, fn):
1308 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1311 def _GetNodeInstances(cfg, node_name):
1312 """Returns a list of all primary and secondary instances on a node.
1316 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1319 def _GetNodePrimaryInstances(cfg, node_name):
1320 """Returns primary instances on a node.
1323 return _GetNodeInstancesInner(cfg,
1324 lambda inst: node_name == inst.primary_node)
1327 def _GetNodeSecondaryInstances(cfg, node_name):
1328 """Returns secondary instances on a node.
1331 return _GetNodeInstancesInner(cfg,
1332 lambda inst: node_name in inst.secondary_nodes)
1335 def _GetStorageTypeArgs(cfg, storage_type):
1336 """Returns the arguments for a storage type.
1339 # Special case for file storage
1340 if storage_type == constants.ST_FILE:
1341 # storage.FileStorage wants a list of storage directories
1342 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1347 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1350 for dev in instance.disks:
1351 cfg.SetDiskID(dev, node_name)
1353 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1354 result.Raise("Failed to get disk status from node %s" % node_name,
1355 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1357 for idx, bdev_status in enumerate(result.payload):
1358 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1364 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1365 """Check the sanity of iallocator and node arguments and use the
1366 cluster-wide iallocator if appropriate.
1368 Check that at most one of (iallocator, node) is specified. If none is
1369 specified, then the LU's opcode's iallocator slot is filled with the
1370 cluster-wide default iallocator.
1372 @type iallocator_slot: string
1373 @param iallocator_slot: the name of the opcode iallocator slot
1374 @type node_slot: string
1375 @param node_slot: the name of the opcode target node slot
1378 node = getattr(lu.op, node_slot, None)
1379 iallocator = getattr(lu.op, iallocator_slot, None)
1381 if node is not None and iallocator is not None:
1382 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1384 elif node is None and iallocator is None:
1385 default_iallocator = lu.cfg.GetDefaultIAllocator()
1386 if default_iallocator:
1387 setattr(lu.op, iallocator_slot, default_iallocator)
1389 raise errors.OpPrereqError("No iallocator or node given and no"
1390 " cluster-wide default iallocator found;"
1391 " please specify either an iallocator or a"
1392 " node, or set a cluster-wide default"
1396 def _GetDefaultIAllocator(cfg, iallocator):
1397 """Decides on which iallocator to use.
1399 @type cfg: L{config.ConfigWriter}
1400 @param cfg: Cluster configuration object
1401 @type iallocator: string or None
1402 @param iallocator: Iallocator specified in opcode
1404 @return: Iallocator name
1408 # Use default iallocator
1409 iallocator = cfg.GetDefaultIAllocator()
1412 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1413 " opcode nor as a cluster-wide default",
1419 class LUClusterPostInit(LogicalUnit):
1420 """Logical unit for running hooks after cluster initialization.
1423 HPATH = "cluster-init"
1424 HTYPE = constants.HTYPE_CLUSTER
1426 def BuildHooksEnv(self):
1431 "OP_TARGET": self.cfg.GetClusterName(),
1434 def BuildHooksNodes(self):
1435 """Build hooks nodes.
1438 return ([], [self.cfg.GetMasterNode()])
1440 def Exec(self, feedback_fn):
1447 class LUClusterDestroy(LogicalUnit):
1448 """Logical unit for destroying the cluster.
1451 HPATH = "cluster-destroy"
1452 HTYPE = constants.HTYPE_CLUSTER
1454 def BuildHooksEnv(self):
1459 "OP_TARGET": self.cfg.GetClusterName(),
1462 def BuildHooksNodes(self):
1463 """Build hooks nodes.
1468 def CheckPrereq(self):
1469 """Check prerequisites.
1471 This checks whether the cluster is empty.
1473 Any errors are signaled by raising errors.OpPrereqError.
1476 master = self.cfg.GetMasterNode()
1478 nodelist = self.cfg.GetNodeList()
1479 if len(nodelist) != 1 or nodelist[0] != master:
1480 raise errors.OpPrereqError("There are still %d node(s) in"
1481 " this cluster." % (len(nodelist) - 1),
1483 instancelist = self.cfg.GetInstanceList()
1485 raise errors.OpPrereqError("There are still %d instance(s) in"
1486 " this cluster." % len(instancelist),
1489 def Exec(self, feedback_fn):
1490 """Destroys the cluster.
1493 master_params = self.cfg.GetMasterNetworkParameters()
1495 # Run post hooks on master node before it's removed
1496 _RunPostHook(self, master_params.name)
1498 ems = self.cfg.GetUseExternalMipScript()
1499 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1501 result.Raise("Could not disable the master role")
1503 return master_params.name
1506 def _VerifyCertificate(filename):
1507 """Verifies a certificate for L{LUClusterVerifyConfig}.
1509 @type filename: string
1510 @param filename: Path to PEM file
1514 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1515 utils.ReadFile(filename))
1516 except Exception, err: # pylint: disable=W0703
1517 return (LUClusterVerifyConfig.ETYPE_ERROR,
1518 "Failed to load X509 certificate %s: %s" % (filename, err))
1521 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1522 constants.SSL_CERT_EXPIRATION_ERROR)
1525 fnamemsg = "While verifying %s: %s" % (filename, msg)
1530 return (None, fnamemsg)
1531 elif errcode == utils.CERT_WARNING:
1532 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1533 elif errcode == utils.CERT_ERROR:
1534 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1536 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1539 def _GetAllHypervisorParameters(cluster, instances):
1540 """Compute the set of all hypervisor parameters.
1542 @type cluster: L{objects.Cluster}
1543 @param cluster: the cluster object
1544 @param instances: list of L{objects.Instance}
1545 @param instances: additional instances from which to obtain parameters
1546 @rtype: list of (origin, hypervisor, parameters)
1547 @return: a list with all parameters found, indicating the hypervisor they
1548 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1553 for hv_name in cluster.enabled_hypervisors:
1554 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1556 for os_name, os_hvp in cluster.os_hvp.items():
1557 for hv_name, hv_params in os_hvp.items():
1559 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1560 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1562 # TODO: collapse identical parameter values in a single one
1563 for instance in instances:
1564 if instance.hvparams:
1565 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1566 cluster.FillHV(instance)))
1571 class _VerifyErrors(object):
1572 """Mix-in for cluster/group verify LUs.
1574 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1575 self.op and self._feedback_fn to be available.)
1579 ETYPE_FIELD = "code"
1580 ETYPE_ERROR = "ERROR"
1581 ETYPE_WARNING = "WARNING"
1583 def _Error(self, ecode, item, msg, *args, **kwargs):
1584 """Format an error message.
1586 Based on the opcode's error_codes parameter, either format a
1587 parseable error code, or a simpler error string.
1589 This must be called only from Exec and functions called from Exec.
1592 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1593 itype, etxt, _ = ecode
1594 # first complete the msg
1597 # then format the whole message
1598 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1599 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1605 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1606 # and finally report it via the feedback_fn
1607 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1609 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1610 """Log an error message if the passed condition is True.
1614 or self.op.debug_simulate_errors) # pylint: disable=E1101
1616 # If the error code is in the list of ignored errors, demote the error to a
1618 (_, etxt, _) = ecode
1619 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1620 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1623 self._Error(ecode, *args, **kwargs)
1625 # do not mark the operation as failed for WARN cases only
1626 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1627 self.bad = self.bad or cond
1630 class LUClusterVerify(NoHooksLU):
1631 """Submits all jobs necessary to verify the cluster.
1636 def ExpandNames(self):
1637 self.needed_locks = {}
1639 def Exec(self, feedback_fn):
1642 if self.op.group_name:
1643 groups = [self.op.group_name]
1644 depends_fn = lambda: None
1646 groups = self.cfg.GetNodeGroupList()
1648 # Verify global configuration
1650 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1653 # Always depend on global verification
1654 depends_fn = lambda: [(-len(jobs), [])]
1656 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1657 ignore_errors=self.op.ignore_errors,
1658 depends=depends_fn())]
1659 for group in groups)
1661 # Fix up all parameters
1662 for op in itertools.chain(*jobs): # pylint: disable=W0142
1663 op.debug_simulate_errors = self.op.debug_simulate_errors
1664 op.verbose = self.op.verbose
1665 op.error_codes = self.op.error_codes
1667 op.skip_checks = self.op.skip_checks
1668 except AttributeError:
1669 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1671 return ResultWithJobs(jobs)
1674 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1675 """Verifies the cluster config.
1680 def _VerifyHVP(self, hvp_data):
1681 """Verifies locally the syntax of the hypervisor parameters.
1684 for item, hv_name, hv_params in hvp_data:
1685 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1688 hv_class = hypervisor.GetHypervisor(hv_name)
1689 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1690 hv_class.CheckParameterSyntax(hv_params)
1691 except errors.GenericError, err:
1692 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1694 def ExpandNames(self):
1695 # Information can be safely retrieved as the BGL is acquired in exclusive
1697 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1698 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1699 self.all_node_info = self.cfg.GetAllNodesInfo()
1700 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1701 self.needed_locks = {}
1703 def Exec(self, feedback_fn):
1704 """Verify integrity of cluster, performing various test on nodes.
1708 self._feedback_fn = feedback_fn
1710 feedback_fn("* Verifying cluster config")
1712 for msg in self.cfg.VerifyConfig():
1713 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1715 feedback_fn("* Verifying cluster certificate files")
1717 for cert_filename in constants.ALL_CERT_FILES:
1718 (errcode, msg) = _VerifyCertificate(cert_filename)
1719 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1721 feedback_fn("* Verifying hypervisor parameters")
1723 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1724 self.all_inst_info.values()))
1726 feedback_fn("* Verifying all nodes belong to an existing group")
1728 # We do this verification here because, should this bogus circumstance
1729 # occur, it would never be caught by VerifyGroup, which only acts on
1730 # nodes/instances reachable from existing node groups.
1732 dangling_nodes = set(node.name for node in self.all_node_info.values()
1733 if node.group not in self.all_group_info)
1735 dangling_instances = {}
1736 no_node_instances = []
1738 for inst in self.all_inst_info.values():
1739 if inst.primary_node in dangling_nodes:
1740 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1741 elif inst.primary_node not in self.all_node_info:
1742 no_node_instances.append(inst.name)
1747 utils.CommaJoin(dangling_instances.get(node.name,
1749 for node in dangling_nodes]
1751 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1753 "the following nodes (and their instances) belong to a non"
1754 " existing group: %s", utils.CommaJoin(pretty_dangling))
1756 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1758 "the following instances have a non-existing primary-node:"
1759 " %s", utils.CommaJoin(no_node_instances))
1764 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1765 """Verifies the status of a node group.
1768 HPATH = "cluster-verify"
1769 HTYPE = constants.HTYPE_CLUSTER
1772 _HOOKS_INDENT_RE = re.compile("^", re.M)
1774 class NodeImage(object):
1775 """A class representing the logical and physical status of a node.
1778 @ivar name: the node name to which this object refers
1779 @ivar volumes: a structure as returned from
1780 L{ganeti.backend.GetVolumeList} (runtime)
1781 @ivar instances: a list of running instances (runtime)
1782 @ivar pinst: list of configured primary instances (config)
1783 @ivar sinst: list of configured secondary instances (config)
1784 @ivar sbp: dictionary of {primary-node: list of instances} for all
1785 instances for which this node is secondary (config)
1786 @ivar mfree: free memory, as reported by hypervisor (runtime)
1787 @ivar dfree: free disk, as reported by the node (runtime)
1788 @ivar offline: the offline status (config)
1789 @type rpc_fail: boolean
1790 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1791 not whether the individual keys were correct) (runtime)
1792 @type lvm_fail: boolean
1793 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1794 @type hyp_fail: boolean
1795 @ivar hyp_fail: whether the RPC call didn't return the instance list
1796 @type ghost: boolean
1797 @ivar ghost: whether this is a known node or not (config)
1798 @type os_fail: boolean
1799 @ivar os_fail: whether the RPC call didn't return valid OS data
1801 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1802 @type vm_capable: boolean
1803 @ivar vm_capable: whether the node can host instances
1806 def __init__(self, offline=False, name=None, vm_capable=True):
1815 self.offline = offline
1816 self.vm_capable = vm_capable
1817 self.rpc_fail = False
1818 self.lvm_fail = False
1819 self.hyp_fail = False
1821 self.os_fail = False
1824 def ExpandNames(self):
1825 # This raises errors.OpPrereqError on its own:
1826 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1828 # Get instances in node group; this is unsafe and needs verification later
1829 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1831 self.needed_locks = {
1832 locking.LEVEL_INSTANCE: inst_names,
1833 locking.LEVEL_NODEGROUP: [self.group_uuid],
1834 locking.LEVEL_NODE: [],
1837 self.share_locks = _ShareAll()
1839 def DeclareLocks(self, level):
1840 if level == locking.LEVEL_NODE:
1841 # Get members of node group; this is unsafe and needs verification later
1842 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1844 all_inst_info = self.cfg.GetAllInstancesInfo()
1846 # In Exec(), we warn about mirrored instances that have primary and
1847 # secondary living in separate node groups. To fully verify that
1848 # volumes for these instances are healthy, we will need to do an
1849 # extra call to their secondaries. We ensure here those nodes will
1851 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1852 # Important: access only the instances whose lock is owned
1853 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1854 nodes.update(all_inst_info[inst].secondary_nodes)
1856 self.needed_locks[locking.LEVEL_NODE] = nodes
1858 def CheckPrereq(self):
1859 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1860 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1862 group_nodes = set(self.group_info.members)
1863 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1866 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1868 unlocked_instances = \
1869 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1872 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1873 utils.CommaJoin(unlocked_nodes))
1875 if unlocked_instances:
1876 raise errors.OpPrereqError("Missing lock for instances: %s" %
1877 utils.CommaJoin(unlocked_instances))
1879 self.all_node_info = self.cfg.GetAllNodesInfo()
1880 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1882 self.my_node_names = utils.NiceSort(group_nodes)
1883 self.my_inst_names = utils.NiceSort(group_instances)
1885 self.my_node_info = dict((name, self.all_node_info[name])
1886 for name in self.my_node_names)
1888 self.my_inst_info = dict((name, self.all_inst_info[name])
1889 for name in self.my_inst_names)
1891 # We detect here the nodes that will need the extra RPC calls for verifying
1892 # split LV volumes; they should be locked.
1893 extra_lv_nodes = set()
1895 for inst in self.my_inst_info.values():
1896 if inst.disk_template in constants.DTS_INT_MIRROR:
1897 group = self.my_node_info[inst.primary_node].group
1898 for nname in inst.secondary_nodes:
1899 if self.all_node_info[nname].group != group:
1900 extra_lv_nodes.add(nname)
1902 unlocked_lv_nodes = \
1903 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1905 if unlocked_lv_nodes:
1906 raise errors.OpPrereqError("these nodes could be locked: %s" %
1907 utils.CommaJoin(unlocked_lv_nodes))
1908 self.extra_lv_nodes = list(extra_lv_nodes)
1910 def _VerifyNode(self, ninfo, nresult):
1911 """Perform some basic validation on data returned from a node.
1913 - check the result data structure is well formed and has all the
1915 - check ganeti version
1917 @type ninfo: L{objects.Node}
1918 @param ninfo: the node to check
1919 @param nresult: the results from the node
1921 @return: whether overall this call was successful (and we can expect
1922 reasonable values in the respose)
1926 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1928 # main result, nresult should be a non-empty dict
1929 test = not nresult or not isinstance(nresult, dict)
1930 _ErrorIf(test, constants.CV_ENODERPC, node,
1931 "unable to verify node: no data returned")
1935 # compares ganeti version
1936 local_version = constants.PROTOCOL_VERSION
1937 remote_version = nresult.get("version", None)
1938 test = not (remote_version and
1939 isinstance(remote_version, (list, tuple)) and
1940 len(remote_version) == 2)
1941 _ErrorIf(test, constants.CV_ENODERPC, node,
1942 "connection to node returned invalid data")
1946 test = local_version != remote_version[0]
1947 _ErrorIf(test, constants.CV_ENODEVERSION, node,
1948 "incompatible protocol versions: master %s,"
1949 " node %s", local_version, remote_version[0])
1953 # node seems compatible, we can actually try to look into its results
1955 # full package version
1956 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1957 constants.CV_ENODEVERSION, node,
1958 "software version mismatch: master %s, node %s",
1959 constants.RELEASE_VERSION, remote_version[1],
1960 code=self.ETYPE_WARNING)
1962 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1963 if ninfo.vm_capable and isinstance(hyp_result, dict):
1964 for hv_name, hv_result in hyp_result.iteritems():
1965 test = hv_result is not None
1966 _ErrorIf(test, constants.CV_ENODEHV, node,
1967 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1969 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1970 if ninfo.vm_capable and isinstance(hvp_result, list):
1971 for item, hv_name, hv_result in hvp_result:
1972 _ErrorIf(True, constants.CV_ENODEHV, node,
1973 "hypervisor %s parameter verify failure (source %s): %s",
1974 hv_name, item, hv_result)
1976 test = nresult.get(constants.NV_NODESETUP,
1977 ["Missing NODESETUP results"])
1978 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1983 def _VerifyNodeTime(self, ninfo, nresult,
1984 nvinfo_starttime, nvinfo_endtime):
1985 """Check the node time.
1987 @type ninfo: L{objects.Node}
1988 @param ninfo: the node to check
1989 @param nresult: the remote results for the node
1990 @param nvinfo_starttime: the start time of the RPC call
1991 @param nvinfo_endtime: the end time of the RPC call
1995 _ErrorIf = self._ErrorIf # pylint: disable=C0103
1997 ntime = nresult.get(constants.NV_TIME, None)
1999 ntime_merged = utils.MergeTime(ntime)
2000 except (ValueError, TypeError):
2001 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2004 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2005 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2006 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2007 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2011 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2012 "Node time diverges by at least %s from master node time",
2015 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2016 """Check the node LVM results.
2018 @type ninfo: L{objects.Node}
2019 @param ninfo: the node to check
2020 @param nresult: the remote results for the node
2021 @param vg_name: the configured VG name
2028 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2030 # checks vg existence and size > 20G
2031 vglist = nresult.get(constants.NV_VGLIST, None)
2033 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2035 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2036 constants.MIN_VG_SIZE)
2037 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2040 pvlist = nresult.get(constants.NV_PVLIST, None)
2041 test = pvlist is None
2042 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2044 # check that ':' is not present in PV names, since it's a
2045 # special character for lvcreate (denotes the range of PEs to
2047 for _, pvname, owner_vg in pvlist:
2048 test = ":" in pvname
2049 _ErrorIf(test, constants.CV_ENODELVM, node,
2050 "Invalid character ':' in PV '%s' of VG '%s'",
2053 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2054 """Check the node bridges.
2056 @type ninfo: L{objects.Node}
2057 @param ninfo: the node to check
2058 @param nresult: the remote results for the node
2059 @param bridges: the expected list of bridges
2066 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2068 missing = nresult.get(constants.NV_BRIDGES, None)
2069 test = not isinstance(missing, list)
2070 _ErrorIf(test, constants.CV_ENODENET, node,
2071 "did not return valid bridge information")
2073 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2074 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2076 def _VerifyNodeUserScripts(self, ninfo, nresult):
2077 """Check the results of user scripts presence and executability on the node
2079 @type ninfo: L{objects.Node}
2080 @param ninfo: the node to check
2081 @param nresult: the remote results for the node
2086 test = not constants.NV_USERSCRIPTS in nresult
2087 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2088 "did not return user scripts information")
2090 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2092 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2093 "user scripts not present or not executable: %s" %
2094 utils.CommaJoin(sorted(broken_scripts)))
2096 def _VerifyNodeNetwork(self, ninfo, nresult):
2097 """Check the node network connectivity results.
2099 @type ninfo: L{objects.Node}
2100 @param ninfo: the node to check
2101 @param nresult: the remote results for the node
2105 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2107 test = constants.NV_NODELIST not in nresult
2108 _ErrorIf(test, constants.CV_ENODESSH, node,
2109 "node hasn't returned node ssh connectivity data")
2111 if nresult[constants.NV_NODELIST]:
2112 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2113 _ErrorIf(True, constants.CV_ENODESSH, node,
2114 "ssh communication with node '%s': %s", a_node, a_msg)
2116 test = constants.NV_NODENETTEST not in nresult
2117 _ErrorIf(test, constants.CV_ENODENET, node,
2118 "node hasn't returned node tcp connectivity data")
2120 if nresult[constants.NV_NODENETTEST]:
2121 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2123 _ErrorIf(True, constants.CV_ENODENET, node,
2124 "tcp communication with node '%s': %s",
2125 anode, nresult[constants.NV_NODENETTEST][anode])
2127 test = constants.NV_MASTERIP not in nresult
2128 _ErrorIf(test, constants.CV_ENODENET, node,
2129 "node hasn't returned node master IP reachability data")
2131 if not nresult[constants.NV_MASTERIP]:
2132 if node == self.master_node:
2133 msg = "the master node cannot reach the master IP (not configured?)"
2135 msg = "cannot reach the master IP"
2136 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2138 def _VerifyInstancePolicy(self, instance):
2139 """Verify instance specs against instance policy set on node group level.
2143 cluster = self.cfg.GetClusterInfo()
2144 full_beparams = cluster.FillBE(instance)
2145 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2147 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2148 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2149 disk_count = len(instance.disks)
2150 disk_sizes = [disk.size for disk in instance.disks]
2151 nic_count = len(instance.nics)
2154 (constants.MEM_SIZE_SPEC, mem_size),
2155 (constants.CPU_COUNT_SPEC, cpu_count),
2156 (constants.DISK_COUNT_SPEC, disk_count),
2157 (constants.NIC_COUNT_SPEC, nic_count),
2158 ] + map((lambda d: (constants.DISK_SIZE_SPEC, d)), disk_sizes)
2160 for (name, value) in test_settings:
2161 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2162 self._ErrorIf(test_result is not None,
2163 constants.CV_EINSTANCEPOLICY, instance.name,
2166 def _VerifyInstance(self, instance, instanceconfig, node_image,
2168 """Verify an instance.
2170 This function checks to see if the required block devices are
2171 available on the instance's node.
2174 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2175 node_current = instanceconfig.primary_node
2177 node_vol_should = {}
2178 instanceconfig.MapLVsByNode(node_vol_should)
2180 self._VerifyInstancePolicy(instanceconfig)
2182 for node in node_vol_should:
2183 n_img = node_image[node]
2184 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2185 # ignore missing volumes on offline or broken nodes
2187 for volume in node_vol_should[node]:
2188 test = volume not in n_img.volumes
2189 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2190 "volume %s missing on node %s", volume, node)
2192 if instanceconfig.admin_state == constants.ADMINST_UP:
2193 pri_img = node_image[node_current]
2194 test = instance not in pri_img.instances and not pri_img.offline
2195 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2196 "instance not running on its primary node %s",
2199 diskdata = [(nname, success, status, idx)
2200 for (nname, disks) in diskstatus.items()
2201 for idx, (success, status) in enumerate(disks)]
2203 for nname, success, bdev_status, idx in diskdata:
2204 # the 'ghost node' construction in Exec() ensures that we have a
2206 snode = node_image[nname]
2207 bad_snode = snode.ghost or snode.offline
2208 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2209 not success and not bad_snode,
2210 constants.CV_EINSTANCEFAULTYDISK, instance,
2211 "couldn't retrieve status for disk/%s on %s: %s",
2212 idx, nname, bdev_status)
2213 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2214 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2215 constants.CV_EINSTANCEFAULTYDISK, instance,
2216 "disk/%s on %s is faulty", idx, nname)
2218 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2219 """Verify if there are any unknown volumes in the cluster.
2221 The .os, .swap and backup volumes are ignored. All other volumes are
2222 reported as unknown.
2224 @type reserved: L{ganeti.utils.FieldSet}
2225 @param reserved: a FieldSet of reserved volume names
2228 for node, n_img in node_image.items():
2229 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2230 # skip non-healthy nodes
2232 for volume in n_img.volumes:
2233 test = ((node not in node_vol_should or
2234 volume not in node_vol_should[node]) and
2235 not reserved.Matches(volume))
2236 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2237 "volume %s is unknown", volume)
2239 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2240 """Verify N+1 Memory Resilience.
2242 Check that if one single node dies we can still start all the
2243 instances it was primary for.
2246 cluster_info = self.cfg.GetClusterInfo()
2247 for node, n_img in node_image.items():
2248 # This code checks that every node which is now listed as
2249 # secondary has enough memory to host all instances it is
2250 # supposed to should a single other node in the cluster fail.
2251 # FIXME: not ready for failover to an arbitrary node
2252 # FIXME: does not support file-backed instances
2253 # WARNING: we currently take into account down instances as well
2254 # as up ones, considering that even if they're down someone
2255 # might want to start them even in the event of a node failure.
2257 # we're skipping offline nodes from the N+1 warning, since
2258 # most likely we don't have good memory infromation from them;
2259 # we already list instances living on such nodes, and that's
2262 #TODO(dynmem): use MINMEM for checking
2263 #TODO(dynmem): also consider ballooning out other instances
2264 for prinode, instances in n_img.sbp.items():
2266 for instance in instances:
2267 bep = cluster_info.FillBE(instance_cfg[instance])
2268 if bep[constants.BE_AUTO_BALANCE]:
2269 needed_mem += bep[constants.BE_MAXMEM]
2270 test = n_img.mfree < needed_mem
2271 self._ErrorIf(test, constants.CV_ENODEN1, node,
2272 "not enough memory to accomodate instance failovers"
2273 " should node %s fail (%dMiB needed, %dMiB available)",
2274 prinode, needed_mem, n_img.mfree)
2277 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2278 (files_all, files_opt, files_mc, files_vm)):
2279 """Verifies file checksums collected from all nodes.
2281 @param errorif: Callback for reporting errors
2282 @param nodeinfo: List of L{objects.Node} objects
2283 @param master_node: Name of master node
2284 @param all_nvinfo: RPC results
2287 # Define functions determining which nodes to consider for a file
2290 (files_mc, lambda node: (node.master_candidate or
2291 node.name == master_node)),
2292 (files_vm, lambda node: node.vm_capable),
2295 # Build mapping from filename to list of nodes which should have the file
2297 for (files, fn) in files2nodefn:
2299 filenodes = nodeinfo
2301 filenodes = filter(fn, nodeinfo)
2302 nodefiles.update((filename,
2303 frozenset(map(operator.attrgetter("name"), filenodes)))
2304 for filename in files)
2306 assert set(nodefiles) == (files_all | files_mc | files_vm)
2308 fileinfo = dict((filename, {}) for filename in nodefiles)
2309 ignore_nodes = set()
2311 for node in nodeinfo:
2313 ignore_nodes.add(node.name)
2316 nresult = all_nvinfo[node.name]
2318 if nresult.fail_msg or not nresult.payload:
2321 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2323 test = not (node_files and isinstance(node_files, dict))
2324 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2325 "Node did not return file checksum data")
2327 ignore_nodes.add(node.name)
2330 # Build per-checksum mapping from filename to nodes having it
2331 for (filename, checksum) in node_files.items():
2332 assert filename in nodefiles
2333 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2335 for (filename, checksums) in fileinfo.items():
2336 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2338 # Nodes having the file
2339 with_file = frozenset(node_name
2340 for nodes in fileinfo[filename].values()
2341 for node_name in nodes) - ignore_nodes
2343 expected_nodes = nodefiles[filename] - ignore_nodes
2345 # Nodes missing file
2346 missing_file = expected_nodes - with_file
2348 if filename in files_opt:
2350 errorif(missing_file and missing_file != expected_nodes,
2351 constants.CV_ECLUSTERFILECHECK, None,
2352 "File %s is optional, but it must exist on all or no"
2353 " nodes (not found on %s)",
2354 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2356 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2357 "File %s is missing from node(s) %s", filename,
2358 utils.CommaJoin(utils.NiceSort(missing_file)))
2360 # Warn if a node has a file it shouldn't
2361 unexpected = with_file - expected_nodes
2363 constants.CV_ECLUSTERFILECHECK, None,
2364 "File %s should not exist on node(s) %s",
2365 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2367 # See if there are multiple versions of the file
2368 test = len(checksums) > 1
2370 variants = ["variant %s on %s" %
2371 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2372 for (idx, (checksum, nodes)) in
2373 enumerate(sorted(checksums.items()))]
2377 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2378 "File %s found with %s different checksums (%s)",
2379 filename, len(checksums), "; ".join(variants))
2381 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2383 """Verifies and the node DRBD status.
2385 @type ninfo: L{objects.Node}
2386 @param ninfo: the node to check
2387 @param nresult: the remote results for the node
2388 @param instanceinfo: the dict of instances
2389 @param drbd_helper: the configured DRBD usermode helper
2390 @param drbd_map: the DRBD map as returned by
2391 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2395 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2398 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2399 test = (helper_result == None)
2400 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2401 "no drbd usermode helper returned")
2403 status, payload = helper_result
2405 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2406 "drbd usermode helper check unsuccessful: %s", payload)
2407 test = status and (payload != drbd_helper)
2408 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2409 "wrong drbd usermode helper: %s", payload)
2411 # compute the DRBD minors
2413 for minor, instance in drbd_map[node].items():
2414 test = instance not in instanceinfo
2415 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2416 "ghost instance '%s' in temporary DRBD map", instance)
2417 # ghost instance should not be running, but otherwise we
2418 # don't give double warnings (both ghost instance and
2419 # unallocated minor in use)
2421 node_drbd[minor] = (instance, False)
2423 instance = instanceinfo[instance]
2424 node_drbd[minor] = (instance.name,
2425 instance.admin_state == constants.ADMINST_UP)
2427 # and now check them
2428 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2429 test = not isinstance(used_minors, (tuple, list))
2430 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2431 "cannot parse drbd status file: %s", str(used_minors))
2433 # we cannot check drbd status
2436 for minor, (iname, must_exist) in node_drbd.items():
2437 test = minor not in used_minors and must_exist
2438 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2439 "drbd minor %d of instance %s is not active", minor, iname)
2440 for minor in used_minors:
2441 test = minor not in node_drbd
2442 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2443 "unallocated drbd minor %d is in use", minor)
2445 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2446 """Builds the node OS structures.
2448 @type ninfo: L{objects.Node}
2449 @param ninfo: the node to check
2450 @param nresult: the remote results for the node
2451 @param nimg: the node image object
2455 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2457 remote_os = nresult.get(constants.NV_OSLIST, None)
2458 test = (not isinstance(remote_os, list) or
2459 not compat.all(isinstance(v, list) and len(v) == 7
2460 for v in remote_os))
2462 _ErrorIf(test, constants.CV_ENODEOS, node,
2463 "node hasn't returned valid OS data")
2472 for (name, os_path, status, diagnose,
2473 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2475 if name not in os_dict:
2478 # parameters is a list of lists instead of list of tuples due to
2479 # JSON lacking a real tuple type, fix it:
2480 parameters = [tuple(v) for v in parameters]
2481 os_dict[name].append((os_path, status, diagnose,
2482 set(variants), set(parameters), set(api_ver)))
2484 nimg.oslist = os_dict
2486 def _VerifyNodeOS(self, ninfo, nimg, base):
2487 """Verifies the node OS list.
2489 @type ninfo: L{objects.Node}
2490 @param ninfo: the node to check
2491 @param nimg: the node image object
2492 @param base: the 'template' node we match against (e.g. from the master)
2496 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2498 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2500 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2501 for os_name, os_data in nimg.oslist.items():
2502 assert os_data, "Empty OS status for OS %s?!" % os_name
2503 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2504 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2505 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2506 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2507 "OS '%s' has multiple entries (first one shadows the rest): %s",
2508 os_name, utils.CommaJoin([v[0] for v in os_data]))
2509 # comparisons with the 'base' image
2510 test = os_name not in base.oslist
2511 _ErrorIf(test, constants.CV_ENODEOS, node,
2512 "Extra OS %s not present on reference node (%s)",
2516 assert base.oslist[os_name], "Base node has empty OS status?"
2517 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2519 # base OS is invalid, skipping
2521 for kind, a, b in [("API version", f_api, b_api),
2522 ("variants list", f_var, b_var),
2523 ("parameters", beautify_params(f_param),
2524 beautify_params(b_param))]:
2525 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2526 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2527 kind, os_name, base.name,
2528 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2530 # check any missing OSes
2531 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2532 _ErrorIf(missing, constants.CV_ENODEOS, node,
2533 "OSes present on reference node %s but missing on this node: %s",
2534 base.name, utils.CommaJoin(missing))
2536 def _VerifyOob(self, ninfo, nresult):
2537 """Verifies out of band functionality of a node.
2539 @type ninfo: L{objects.Node}
2540 @param ninfo: the node to check
2541 @param nresult: the remote results for the node
2545 # We just have to verify the paths on master and/or master candidates
2546 # as the oob helper is invoked on the master
2547 if ((ninfo.master_candidate or ninfo.master_capable) and
2548 constants.NV_OOB_PATHS in nresult):
2549 for path_result in nresult[constants.NV_OOB_PATHS]:
2550 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2552 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2553 """Verifies and updates the node volume data.
2555 This function will update a L{NodeImage}'s internal structures
2556 with data from the remote call.
2558 @type ninfo: L{objects.Node}
2559 @param ninfo: the node to check
2560 @param nresult: the remote results for the node
2561 @param nimg: the node image object
2562 @param vg_name: the configured VG name
2566 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2568 nimg.lvm_fail = True
2569 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2572 elif isinstance(lvdata, basestring):
2573 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2574 utils.SafeEncode(lvdata))
2575 elif not isinstance(lvdata, dict):
2576 _ErrorIf(True, constants.CV_ENODELVM, node,
2577 "rpc call to node failed (lvlist)")
2579 nimg.volumes = lvdata
2580 nimg.lvm_fail = False
2582 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2583 """Verifies and updates the node instance list.
2585 If the listing was successful, then updates this node's instance
2586 list. Otherwise, it marks the RPC call as failed for the instance
2589 @type ninfo: L{objects.Node}
2590 @param ninfo: the node to check
2591 @param nresult: the remote results for the node
2592 @param nimg: the node image object
2595 idata = nresult.get(constants.NV_INSTANCELIST, None)
2596 test = not isinstance(idata, list)
2597 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2598 "rpc call to node failed (instancelist): %s",
2599 utils.SafeEncode(str(idata)))
2601 nimg.hyp_fail = True
2603 nimg.instances = idata
2605 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2606 """Verifies and computes a node information map
2608 @type ninfo: L{objects.Node}
2609 @param ninfo: the node to check
2610 @param nresult: the remote results for the node
2611 @param nimg: the node image object
2612 @param vg_name: the configured VG name
2616 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2618 # try to read free memory (from the hypervisor)
2619 hv_info = nresult.get(constants.NV_HVINFO, None)
2620 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2621 _ErrorIf(test, constants.CV_ENODEHV, node,
2622 "rpc call to node failed (hvinfo)")
2625 nimg.mfree = int(hv_info["memory_free"])
2626 except (ValueError, TypeError):
2627 _ErrorIf(True, constants.CV_ENODERPC, node,
2628 "node returned invalid nodeinfo, check hypervisor")
2630 # FIXME: devise a free space model for file based instances as well
2631 if vg_name is not None:
2632 test = (constants.NV_VGLIST not in nresult or
2633 vg_name not in nresult[constants.NV_VGLIST])
2634 _ErrorIf(test, constants.CV_ENODELVM, node,
2635 "node didn't return data for the volume group '%s'"
2636 " - it is either missing or broken", vg_name)
2639 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2640 except (ValueError, TypeError):
2641 _ErrorIf(True, constants.CV_ENODERPC, node,
2642 "node returned invalid LVM info, check LVM status")
2644 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2645 """Gets per-disk status information for all instances.
2647 @type nodelist: list of strings
2648 @param nodelist: Node names
2649 @type node_image: dict of (name, L{objects.Node})
2650 @param node_image: Node objects
2651 @type instanceinfo: dict of (name, L{objects.Instance})
2652 @param instanceinfo: Instance objects
2653 @rtype: {instance: {node: [(succes, payload)]}}
2654 @return: a dictionary of per-instance dictionaries with nodes as
2655 keys and disk information as values; the disk information is a
2656 list of tuples (success, payload)
2659 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2662 node_disks_devonly = {}
2663 diskless_instances = set()
2664 diskless = constants.DT_DISKLESS
2666 for nname in nodelist:
2667 node_instances = list(itertools.chain(node_image[nname].pinst,
2668 node_image[nname].sinst))
2669 diskless_instances.update(inst for inst in node_instances
2670 if instanceinfo[inst].disk_template == diskless)
2671 disks = [(inst, disk)
2672 for inst in node_instances
2673 for disk in instanceinfo[inst].disks]
2676 # No need to collect data
2679 node_disks[nname] = disks
2681 # Creating copies as SetDiskID below will modify the objects and that can
2682 # lead to incorrect data returned from nodes
2683 devonly = [dev.Copy() for (_, dev) in disks]
2686 self.cfg.SetDiskID(dev, nname)
2688 node_disks_devonly[nname] = devonly
2690 assert len(node_disks) == len(node_disks_devonly)
2692 # Collect data from all nodes with disks
2693 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2696 assert len(result) == len(node_disks)
2700 for (nname, nres) in result.items():
2701 disks = node_disks[nname]
2704 # No data from this node
2705 data = len(disks) * [(False, "node offline")]
2708 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2709 "while getting disk information: %s", msg)
2711 # No data from this node
2712 data = len(disks) * [(False, msg)]
2715 for idx, i in enumerate(nres.payload):
2716 if isinstance(i, (tuple, list)) and len(i) == 2:
2719 logging.warning("Invalid result from node %s, entry %d: %s",
2721 data.append((False, "Invalid result from the remote node"))
2723 for ((inst, _), status) in zip(disks, data):
2724 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2726 # Add empty entries for diskless instances.
2727 for inst in diskless_instances:
2728 assert inst not in instdisk
2731 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2732 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2733 compat.all(isinstance(s, (tuple, list)) and
2734 len(s) == 2 for s in statuses)
2735 for inst, nnames in instdisk.items()
2736 for nname, statuses in nnames.items())
2737 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2742 def _SshNodeSelector(group_uuid, all_nodes):
2743 """Create endless iterators for all potential SSH check hosts.
2746 nodes = [node for node in all_nodes
2747 if (node.group != group_uuid and
2749 keyfunc = operator.attrgetter("group")
2751 return map(itertools.cycle,
2752 [sorted(map(operator.attrgetter("name"), names))
2753 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2757 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2758 """Choose which nodes should talk to which other nodes.
2760 We will make nodes contact all nodes in their group, and one node from
2763 @warning: This algorithm has a known issue if one node group is much
2764 smaller than others (e.g. just one node). In such a case all other
2765 nodes will talk to the single node.
2768 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2769 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2771 return (online_nodes,
2772 dict((name, sorted([i.next() for i in sel]))
2773 for name in online_nodes))
2775 def BuildHooksEnv(self):
2778 Cluster-Verify hooks just ran in the post phase and their failure makes
2779 the output be logged in the verify output and the verification to fail.
2783 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2786 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2787 for node in self.my_node_info.values())
2791 def BuildHooksNodes(self):
2792 """Build hooks nodes.
2795 return ([], self.my_node_names)
2797 def Exec(self, feedback_fn):
2798 """Verify integrity of the node group, performing various test on nodes.
2801 # This method has too many local variables. pylint: disable=R0914
2802 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2804 if not self.my_node_names:
2806 feedback_fn("* Empty node group, skipping verification")
2810 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2811 verbose = self.op.verbose
2812 self._feedback_fn = feedback_fn
2814 vg_name = self.cfg.GetVGName()
2815 drbd_helper = self.cfg.GetDRBDHelper()
2816 cluster = self.cfg.GetClusterInfo()
2817 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2818 hypervisors = cluster.enabled_hypervisors
2819 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2821 i_non_redundant = [] # Non redundant instances
2822 i_non_a_balanced = [] # Non auto-balanced instances
2823 i_offline = 0 # Count of offline instances
2824 n_offline = 0 # Count of offline nodes
2825 n_drained = 0 # Count of nodes being drained
2826 node_vol_should = {}
2828 # FIXME: verify OS list
2831 filemap = _ComputeAncillaryFiles(cluster, False)
2833 # do local checksums
2834 master_node = self.master_node = self.cfg.GetMasterNode()
2835 master_ip = self.cfg.GetMasterIP()
2837 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2840 if self.cfg.GetUseExternalMipScript():
2841 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2843 node_verify_param = {
2844 constants.NV_FILELIST:
2845 utils.UniqueSequence(filename
2846 for files in filemap
2847 for filename in files),
2848 constants.NV_NODELIST:
2849 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2850 self.all_node_info.values()),
2851 constants.NV_HYPERVISOR: hypervisors,
2852 constants.NV_HVPARAMS:
2853 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2854 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2855 for node in node_data_list
2856 if not node.offline],
2857 constants.NV_INSTANCELIST: hypervisors,
2858 constants.NV_VERSION: None,
2859 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2860 constants.NV_NODESETUP: None,
2861 constants.NV_TIME: None,
2862 constants.NV_MASTERIP: (master_node, master_ip),
2863 constants.NV_OSLIST: None,
2864 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2865 constants.NV_USERSCRIPTS: user_scripts,
2868 if vg_name is not None:
2869 node_verify_param[constants.NV_VGLIST] = None
2870 node_verify_param[constants.NV_LVLIST] = vg_name
2871 node_verify_param[constants.NV_PVLIST] = [vg_name]
2872 node_verify_param[constants.NV_DRBDLIST] = None
2875 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2878 # FIXME: this needs to be changed per node-group, not cluster-wide
2880 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2881 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2882 bridges.add(default_nicpp[constants.NIC_LINK])
2883 for instance in self.my_inst_info.values():
2884 for nic in instance.nics:
2885 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2886 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2887 bridges.add(full_nic[constants.NIC_LINK])
2890 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2892 # Build our expected cluster state
2893 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2895 vm_capable=node.vm_capable))
2896 for node in node_data_list)
2900 for node in self.all_node_info.values():
2901 path = _SupportsOob(self.cfg, node)
2902 if path and path not in oob_paths:
2903 oob_paths.append(path)
2906 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2908 for instance in self.my_inst_names:
2909 inst_config = self.my_inst_info[instance]
2911 for nname in inst_config.all_nodes:
2912 if nname not in node_image:
2913 gnode = self.NodeImage(name=nname)
2914 gnode.ghost = (nname not in self.all_node_info)
2915 node_image[nname] = gnode
2917 inst_config.MapLVsByNode(node_vol_should)
2919 pnode = inst_config.primary_node
2920 node_image[pnode].pinst.append(instance)
2922 for snode in inst_config.secondary_nodes:
2923 nimg = node_image[snode]
2924 nimg.sinst.append(instance)
2925 if pnode not in nimg.sbp:
2926 nimg.sbp[pnode] = []
2927 nimg.sbp[pnode].append(instance)
2929 # At this point, we have the in-memory data structures complete,
2930 # except for the runtime information, which we'll gather next
2932 # Due to the way our RPC system works, exact response times cannot be
2933 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2934 # time before and after executing the request, we can at least have a time
2936 nvinfo_starttime = time.time()
2937 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2939 self.cfg.GetClusterName())
2940 nvinfo_endtime = time.time()
2942 if self.extra_lv_nodes and vg_name is not None:
2944 self.rpc.call_node_verify(self.extra_lv_nodes,
2945 {constants.NV_LVLIST: vg_name},
2946 self.cfg.GetClusterName())
2948 extra_lv_nvinfo = {}
2950 all_drbd_map = self.cfg.ComputeDRBDMap()
2952 feedback_fn("* Gathering disk information (%s nodes)" %
2953 len(self.my_node_names))
2954 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2957 feedback_fn("* Verifying configuration file consistency")
2959 # If not all nodes are being checked, we need to make sure the master node
2960 # and a non-checked vm_capable node are in the list.
2961 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2963 vf_nvinfo = all_nvinfo.copy()
2964 vf_node_info = list(self.my_node_info.values())
2965 additional_nodes = []
2966 if master_node not in self.my_node_info:
2967 additional_nodes.append(master_node)
2968 vf_node_info.append(self.all_node_info[master_node])
2969 # Add the first vm_capable node we find which is not included
2970 for node in absent_nodes:
2971 nodeinfo = self.all_node_info[node]
2972 if nodeinfo.vm_capable and not nodeinfo.offline:
2973 additional_nodes.append(node)
2974 vf_node_info.append(self.all_node_info[node])
2976 key = constants.NV_FILELIST
2977 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2978 {key: node_verify_param[key]},
2979 self.cfg.GetClusterName()))
2981 vf_nvinfo = all_nvinfo
2982 vf_node_info = self.my_node_info.values()
2984 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2986 feedback_fn("* Verifying node status")
2990 for node_i in node_data_list:
2992 nimg = node_image[node]
2996 feedback_fn("* Skipping offline node %s" % (node,))
3000 if node == master_node:
3002 elif node_i.master_candidate:
3003 ntype = "master candidate"
3004 elif node_i.drained:
3010 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3012 msg = all_nvinfo[node].fail_msg
3013 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3016 nimg.rpc_fail = True
3019 nresult = all_nvinfo[node].payload
3021 nimg.call_ok = self._VerifyNode(node_i, nresult)
3022 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3023 self._VerifyNodeNetwork(node_i, nresult)
3024 self._VerifyNodeUserScripts(node_i, nresult)
3025 self._VerifyOob(node_i, nresult)
3028 self._VerifyNodeLVM(node_i, nresult, vg_name)
3029 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3032 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3033 self._UpdateNodeInstances(node_i, nresult, nimg)
3034 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3035 self._UpdateNodeOS(node_i, nresult, nimg)
3037 if not nimg.os_fail:
3038 if refos_img is None:
3040 self._VerifyNodeOS(node_i, nimg, refos_img)
3041 self._VerifyNodeBridges(node_i, nresult, bridges)
3043 # Check whether all running instancies are primary for the node. (This
3044 # can no longer be done from _VerifyInstance below, since some of the
3045 # wrong instances could be from other node groups.)
3046 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3048 for inst in non_primary_inst:
3049 # FIXME: investigate best way to handle offline insts
3050 if inst.admin_state == constants.ADMINST_OFFLINE:
3052 feedback_fn("* Skipping offline instance %s" % inst.name)
3055 test = inst in self.all_inst_info
3056 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3057 "instance should not run on node %s", node_i.name)
3058 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3059 "node is running unknown instance %s", inst)
3061 for node, result in extra_lv_nvinfo.items():
3062 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3063 node_image[node], vg_name)
3065 feedback_fn("* Verifying instance status")
3066 for instance in self.my_inst_names:
3068 feedback_fn("* Verifying instance %s" % instance)
3069 inst_config = self.my_inst_info[instance]
3070 self._VerifyInstance(instance, inst_config, node_image,
3072 inst_nodes_offline = []
3074 pnode = inst_config.primary_node
3075 pnode_img = node_image[pnode]
3076 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3077 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3078 " primary node failed", instance)
3080 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3082 constants.CV_EINSTANCEBADNODE, instance,
3083 "instance is marked as running and lives on offline node %s",
3084 inst_config.primary_node)
3086 # If the instance is non-redundant we cannot survive losing its primary
3087 # node, so we are not N+1 compliant. On the other hand we have no disk
3088 # templates with more than one secondary so that situation is not well
3090 # FIXME: does not support file-backed instances
3091 if not inst_config.secondary_nodes:
3092 i_non_redundant.append(instance)
3094 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3095 constants.CV_EINSTANCELAYOUT,
3096 instance, "instance has multiple secondary nodes: %s",
3097 utils.CommaJoin(inst_config.secondary_nodes),
3098 code=self.ETYPE_WARNING)
3100 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3101 pnode = inst_config.primary_node
3102 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3103 instance_groups = {}
3105 for node in instance_nodes:
3106 instance_groups.setdefault(self.all_node_info[node].group,
3110 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3111 # Sort so that we always list the primary node first.
3112 for group, nodes in sorted(instance_groups.items(),
3113 key=lambda (_, nodes): pnode in nodes,
3116 self._ErrorIf(len(instance_groups) > 1,
3117 constants.CV_EINSTANCESPLITGROUPS,
3118 instance, "instance has primary and secondary nodes in"
3119 " different groups: %s", utils.CommaJoin(pretty_list),
3120 code=self.ETYPE_WARNING)
3122 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3123 i_non_a_balanced.append(instance)
3125 for snode in inst_config.secondary_nodes:
3126 s_img = node_image[snode]
3127 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3128 snode, "instance %s, connection to secondary node failed",
3132 inst_nodes_offline.append(snode)
3134 # warn that the instance lives on offline nodes
3135 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3136 "instance has offline secondary node(s) %s",
3137 utils.CommaJoin(inst_nodes_offline))
3138 # ... or ghost/non-vm_capable nodes
3139 for node in inst_config.all_nodes:
3140 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3141 instance, "instance lives on ghost node %s", node)
3142 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3143 instance, "instance lives on non-vm_capable node %s", node)
3145 feedback_fn("* Verifying orphan volumes")
3146 reserved = utils.FieldSet(*cluster.reserved_lvs)
3148 # We will get spurious "unknown volume" warnings if any node of this group
3149 # is secondary for an instance whose primary is in another group. To avoid
3150 # them, we find these instances and add their volumes to node_vol_should.
3151 for inst in self.all_inst_info.values():
3152 for secondary in inst.secondary_nodes:
3153 if (secondary in self.my_node_info
3154 and inst.name not in self.my_inst_info):
3155 inst.MapLVsByNode(node_vol_should)
3158 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3160 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3161 feedback_fn("* Verifying N+1 Memory redundancy")
3162 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3164 feedback_fn("* Other Notes")
3166 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3167 % len(i_non_redundant))
3169 if i_non_a_balanced:
3170 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3171 % len(i_non_a_balanced))
3174 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3177 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3180 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3184 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3185 """Analyze the post-hooks' result
3187 This method analyses the hook result, handles it, and sends some
3188 nicely-formatted feedback back to the user.
3190 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3191 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3192 @param hooks_results: the results of the multi-node hooks rpc call
3193 @param feedback_fn: function used send feedback back to the caller
3194 @param lu_result: previous Exec result
3195 @return: the new Exec result, based on the previous result
3199 # We only really run POST phase hooks, only for non-empty groups,
3200 # and are only interested in their results
3201 if not self.my_node_names:
3204 elif phase == constants.HOOKS_PHASE_POST:
3205 # Used to change hooks' output to proper indentation
3206 feedback_fn("* Hooks Results")
3207 assert hooks_results, "invalid result from hooks"
3209 for node_name in hooks_results:
3210 res = hooks_results[node_name]
3212 test = msg and not res.offline
3213 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3214 "Communication failure in hooks execution: %s", msg)
3215 if res.offline or msg:
3216 # No need to investigate payload if node is offline or gave
3219 for script, hkr, output in res.payload:
3220 test = hkr == constants.HKR_FAIL
3221 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3222 "Script %s failed, output:", script)
3224 output = self._HOOKS_INDENT_RE.sub(" ", output)
3225 feedback_fn("%s" % output)
3231 class LUClusterVerifyDisks(NoHooksLU):
3232 """Verifies the cluster disks status.
3237 def ExpandNames(self):
3238 self.share_locks = _ShareAll()
3239 self.needed_locks = {
3240 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3243 def Exec(self, feedback_fn):
3244 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3246 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3247 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3248 for group in group_names])
3251 class LUGroupVerifyDisks(NoHooksLU):
3252 """Verifies the status of all disks in a node group.
3257 def ExpandNames(self):
3258 # Raises errors.OpPrereqError on its own if group can't be found
3259 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3261 self.share_locks = _ShareAll()
3262 self.needed_locks = {
3263 locking.LEVEL_INSTANCE: [],
3264 locking.LEVEL_NODEGROUP: [],
3265 locking.LEVEL_NODE: [],
3268 def DeclareLocks(self, level):
3269 if level == locking.LEVEL_INSTANCE:
3270 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3272 # Lock instances optimistically, needs verification once node and group
3273 # locks have been acquired
3274 self.needed_locks[locking.LEVEL_INSTANCE] = \
3275 self.cfg.GetNodeGroupInstances(self.group_uuid)
3277 elif level == locking.LEVEL_NODEGROUP:
3278 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3280 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3281 set([self.group_uuid] +
3282 # Lock all groups used by instances optimistically; this requires
3283 # going via the node before it's locked, requiring verification
3286 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3287 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3289 elif level == locking.LEVEL_NODE:
3290 # This will only lock the nodes in the group to be verified which contain
3292 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3293 self._LockInstancesNodes()
3295 # Lock all nodes in group to be verified
3296 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3297 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3298 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3300 def CheckPrereq(self):
3301 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3302 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3303 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3305 assert self.group_uuid in owned_groups
3307 # Check if locked instances are still correct
3308 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3310 # Get instance information
3311 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3313 # Check if node groups for locked instances are still correct
3314 for (instance_name, inst) in self.instances.items():
3315 assert owned_nodes.issuperset(inst.all_nodes), \
3316 "Instance %s's nodes changed while we kept the lock" % instance_name
3318 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3321 assert self.group_uuid in inst_groups, \
3322 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3324 def Exec(self, feedback_fn):
3325 """Verify integrity of cluster disks.
3327 @rtype: tuple of three items
3328 @return: a tuple of (dict of node-to-node_error, list of instances
3329 which need activate-disks, dict of instance: (node, volume) for
3334 res_instances = set()
3337 nv_dict = _MapInstanceDisksToNodes([inst
3338 for inst in self.instances.values()
3339 if inst.admin_state == constants.ADMINST_UP])
3342 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3343 set(self.cfg.GetVmCapableNodeList()))
3345 node_lvs = self.rpc.call_lv_list(nodes, [])
3347 for (node, node_res) in node_lvs.items():
3348 if node_res.offline:
3351 msg = node_res.fail_msg
3353 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3354 res_nodes[node] = msg
3357 for lv_name, (_, _, lv_online) in node_res.payload.items():
3358 inst = nv_dict.pop((node, lv_name), None)
3359 if not (lv_online or inst is None):
3360 res_instances.add(inst)
3362 # any leftover items in nv_dict are missing LVs, let's arrange the data
3364 for key, inst in nv_dict.iteritems():
3365 res_missing.setdefault(inst, []).append(list(key))
3367 return (res_nodes, list(res_instances), res_missing)
3370 class LUClusterRepairDiskSizes(NoHooksLU):
3371 """Verifies the cluster disks sizes.
3376 def ExpandNames(self):
3377 if self.op.instances:
3378 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3379 self.needed_locks = {
3380 locking.LEVEL_NODE_RES: [],
3381 locking.LEVEL_INSTANCE: self.wanted_names,
3383 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3385 self.wanted_names = None
3386 self.needed_locks = {
3387 locking.LEVEL_NODE_RES: locking.ALL_SET,
3388 locking.LEVEL_INSTANCE: locking.ALL_SET,
3390 self.share_locks = {
3391 locking.LEVEL_NODE_RES: 1,
3392 locking.LEVEL_INSTANCE: 0,
3395 def DeclareLocks(self, level):
3396 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3397 self._LockInstancesNodes(primary_only=True, level=level)
3399 def CheckPrereq(self):
3400 """Check prerequisites.
3402 This only checks the optional instance list against the existing names.
3405 if self.wanted_names is None:
3406 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3408 self.wanted_instances = \
3409 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3411 def _EnsureChildSizes(self, disk):
3412 """Ensure children of the disk have the needed disk size.
3414 This is valid mainly for DRBD8 and fixes an issue where the
3415 children have smaller disk size.
3417 @param disk: an L{ganeti.objects.Disk} object
3420 if disk.dev_type == constants.LD_DRBD8:
3421 assert disk.children, "Empty children for DRBD8?"
3422 fchild = disk.children[0]
3423 mismatch = fchild.size < disk.size
3425 self.LogInfo("Child disk has size %d, parent %d, fixing",
3426 fchild.size, disk.size)
3427 fchild.size = disk.size
3429 # and we recurse on this child only, not on the metadev
3430 return self._EnsureChildSizes(fchild) or mismatch
3434 def Exec(self, feedback_fn):
3435 """Verify the size of cluster disks.
3438 # TODO: check child disks too
3439 # TODO: check differences in size between primary/secondary nodes
3441 for instance in self.wanted_instances:
3442 pnode = instance.primary_node
3443 if pnode not in per_node_disks:
3444 per_node_disks[pnode] = []
3445 for idx, disk in enumerate(instance.disks):
3446 per_node_disks[pnode].append((instance, idx, disk))
3448 assert not (frozenset(per_node_disks.keys()) -
3449 self.owned_locks(locking.LEVEL_NODE_RES)), \
3450 "Not owning correct locks"
3451 assert not self.owned_locks(locking.LEVEL_NODE)
3454 for node, dskl in per_node_disks.items():
3455 newl = [v[2].Copy() for v in dskl]
3457 self.cfg.SetDiskID(dsk, node)
3458 result = self.rpc.call_blockdev_getsize(node, newl)
3460 self.LogWarning("Failure in blockdev_getsize call to node"
3461 " %s, ignoring", node)
3463 if len(result.payload) != len(dskl):
3464 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3465 " result.payload=%s", node, len(dskl), result.payload)
3466 self.LogWarning("Invalid result from node %s, ignoring node results",
3469 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3471 self.LogWarning("Disk %d of instance %s did not return size"
3472 " information, ignoring", idx, instance.name)
3474 if not isinstance(size, (int, long)):
3475 self.LogWarning("Disk %d of instance %s did not return valid"
3476 " size information, ignoring", idx, instance.name)
3479 if size != disk.size:
3480 self.LogInfo("Disk %d of instance %s has mismatched size,"
3481 " correcting: recorded %d, actual %d", idx,
3482 instance.name, disk.size, size)
3484 self.cfg.Update(instance, feedback_fn)
3485 changed.append((instance.name, idx, size))
3486 if self._EnsureChildSizes(disk):
3487 self.cfg.Update(instance, feedback_fn)
3488 changed.append((instance.name, idx, disk.size))
3492 class LUClusterRename(LogicalUnit):
3493 """Rename the cluster.
3496 HPATH = "cluster-rename"
3497 HTYPE = constants.HTYPE_CLUSTER
3499 def BuildHooksEnv(self):
3504 "OP_TARGET": self.cfg.GetClusterName(),
3505 "NEW_NAME": self.op.name,
3508 def BuildHooksNodes(self):
3509 """Build hooks nodes.
3512 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3514 def CheckPrereq(self):
3515 """Verify that the passed name is a valid one.
3518 hostname = netutils.GetHostname(name=self.op.name,
3519 family=self.cfg.GetPrimaryIPFamily())
3521 new_name = hostname.name
3522 self.ip = new_ip = hostname.ip
3523 old_name = self.cfg.GetClusterName()
3524 old_ip = self.cfg.GetMasterIP()
3525 if new_name == old_name and new_ip == old_ip:
3526 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3527 " cluster has changed",
3529 if new_ip != old_ip:
3530 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3531 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3532 " reachable on the network" %
3533 new_ip, errors.ECODE_NOTUNIQUE)
3535 self.op.name = new_name
3537 def Exec(self, feedback_fn):
3538 """Rename the cluster.
3541 clustername = self.op.name
3544 # shutdown the master IP
3545 master_params = self.cfg.GetMasterNetworkParameters()
3546 ems = self.cfg.GetUseExternalMipScript()
3547 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3549 result.Raise("Could not disable the master role")
3552 cluster = self.cfg.GetClusterInfo()
3553 cluster.cluster_name = clustername
3554 cluster.master_ip = new_ip
3555 self.cfg.Update(cluster, feedback_fn)
3557 # update the known hosts file
3558 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3559 node_list = self.cfg.GetOnlineNodeList()
3561 node_list.remove(master_params.name)
3564 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3566 master_params.ip = new_ip
3567 result = self.rpc.call_node_activate_master_ip(master_params.name,
3569 msg = result.fail_msg
3571 self.LogWarning("Could not re-enable the master role on"
3572 " the master, please restart manually: %s", msg)
3577 def _ValidateNetmask(cfg, netmask):
3578 """Checks if a netmask is valid.
3580 @type cfg: L{config.ConfigWriter}
3581 @param cfg: The cluster configuration
3583 @param netmask: the netmask to be verified
3584 @raise errors.OpPrereqError: if the validation fails
3587 ip_family = cfg.GetPrimaryIPFamily()
3589 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3590 except errors.ProgrammerError:
3591 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3593 if not ipcls.ValidateNetmask(netmask):
3594 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3598 class LUClusterSetParams(LogicalUnit):
3599 """Change the parameters of the cluster.
3602 HPATH = "cluster-modify"
3603 HTYPE = constants.HTYPE_CLUSTER
3606 def CheckArguments(self):
3610 if self.op.uid_pool:
3611 uidpool.CheckUidPool(self.op.uid_pool)
3613 if self.op.add_uids:
3614 uidpool.CheckUidPool(self.op.add_uids)
3616 if self.op.remove_uids:
3617 uidpool.CheckUidPool(self.op.remove_uids)
3619 if self.op.master_netmask is not None:
3620 _ValidateNetmask(self.cfg, self.op.master_netmask)
3622 if self.op.diskparams:
3623 for dt_params in self.op.diskparams.values():
3624 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3626 def ExpandNames(self):
3627 # FIXME: in the future maybe other cluster params won't require checking on
3628 # all nodes to be modified.
3629 self.needed_locks = {
3630 locking.LEVEL_NODE: locking.ALL_SET,
3632 self.share_locks[locking.LEVEL_NODE] = 1
3634 def BuildHooksEnv(self):
3639 "OP_TARGET": self.cfg.GetClusterName(),
3640 "NEW_VG_NAME": self.op.vg_name,
3643 def BuildHooksNodes(self):
3644 """Build hooks nodes.
3647 mn = self.cfg.GetMasterNode()
3650 def CheckPrereq(self):
3651 """Check prerequisites.
3653 This checks whether the given params don't conflict and
3654 if the given volume group is valid.
3657 if self.op.vg_name is not None and not self.op.vg_name:
3658 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3659 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3660 " instances exist", errors.ECODE_INVAL)
3662 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3663 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3664 raise errors.OpPrereqError("Cannot disable drbd helper while"
3665 " drbd-based instances exist",
3668 node_list = self.owned_locks(locking.LEVEL_NODE)
3670 # if vg_name not None, checks given volume group on all nodes
3672 vglist = self.rpc.call_vg_list(node_list)
3673 for node in node_list:
3674 msg = vglist[node].fail_msg
3676 # ignoring down node
3677 self.LogWarning("Error while gathering data on node %s"
3678 " (ignoring node): %s", node, msg)
3680 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3682 constants.MIN_VG_SIZE)
3684 raise errors.OpPrereqError("Error on node '%s': %s" %
3685 (node, vgstatus), errors.ECODE_ENVIRON)
3687 if self.op.drbd_helper:
3688 # checks given drbd helper on all nodes
3689 helpers = self.rpc.call_drbd_helper(node_list)
3690 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3692 self.LogInfo("Not checking drbd helper on offline node %s", node)
3694 msg = helpers[node].fail_msg
3696 raise errors.OpPrereqError("Error checking drbd helper on node"
3697 " '%s': %s" % (node, msg),
3698 errors.ECODE_ENVIRON)
3699 node_helper = helpers[node].payload
3700 if node_helper != self.op.drbd_helper:
3701 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3702 (node, node_helper), errors.ECODE_ENVIRON)
3704 self.cluster = cluster = self.cfg.GetClusterInfo()
3705 # validate params changes
3706 if self.op.beparams:
3707 objects.UpgradeBeParams(self.op.beparams)
3708 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3709 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3711 if self.op.ndparams:
3712 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3713 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3715 # TODO: we need a more general way to handle resetting
3716 # cluster-level parameters to default values
3717 if self.new_ndparams["oob_program"] == "":
3718 self.new_ndparams["oob_program"] = \
3719 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3721 if self.op.hv_state:
3722 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3723 self.cluster.hv_state_static)
3724 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3725 for hv, values in new_hv_state.items())
3727 if self.op.disk_state:
3728 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3729 self.cluster.disk_state_static)
3730 self.new_disk_state = \
3731 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3732 for name, values in svalues.items()))
3733 for storage, svalues in new_disk_state.items())
3737 for key, value in self.op.ipolicy.items():
3738 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3739 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3741 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3742 self.new_ipolicy = ipolicy
3744 if self.op.nicparams:
3745 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3746 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3747 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3750 # check all instances for consistency
3751 for instance in self.cfg.GetAllInstancesInfo().values():
3752 for nic_idx, nic in enumerate(instance.nics):
3753 params_copy = copy.deepcopy(nic.nicparams)
3754 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3756 # check parameter syntax
3758 objects.NIC.CheckParameterSyntax(params_filled)
3759 except errors.ConfigurationError, err:
3760 nic_errors.append("Instance %s, nic/%d: %s" %
3761 (instance.name, nic_idx, err))
3763 # if we're moving instances to routed, check that they have an ip
3764 target_mode = params_filled[constants.NIC_MODE]
3765 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3766 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3767 " address" % (instance.name, nic_idx))
3769 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3770 "\n".join(nic_errors))
3772 # hypervisor list/parameters
3773 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3774 if self.op.hvparams:
3775 for hv_name, hv_dict in self.op.hvparams.items():
3776 if hv_name not in self.new_hvparams:
3777 self.new_hvparams[hv_name] = hv_dict
3779 self.new_hvparams[hv_name].update(hv_dict)
3781 # disk template parameters
3782 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3783 if self.op.diskparams:
3784 for dt_name, dt_params in self.op.diskparams.items():
3785 if dt_name not in self.op.diskparams:
3786 self.new_diskparams[dt_name] = dt_params
3788 self.new_diskparams[dt_name].update(dt_params)
3790 # os hypervisor parameters
3791 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3793 for os_name, hvs in self.op.os_hvp.items():
3794 if os_name not in self.new_os_hvp:
3795 self.new_os_hvp[os_name] = hvs
3797 for hv_name, hv_dict in hvs.items():
3798 if hv_name not in self.new_os_hvp[os_name]:
3799 self.new_os_hvp[os_name][hv_name] = hv_dict
3801 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3804 self.new_osp = objects.FillDict(cluster.osparams, {})
3805 if self.op.osparams:
3806 for os_name, osp in self.op.osparams.items():
3807 if os_name not in self.new_osp:
3808 self.new_osp[os_name] = {}
3810 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3813 if not self.new_osp[os_name]:
3814 # we removed all parameters
3815 del self.new_osp[os_name]
3817 # check the parameter validity (remote check)
3818 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3819 os_name, self.new_osp[os_name])
3821 # changes to the hypervisor list
3822 if self.op.enabled_hypervisors is not None:
3823 self.hv_list = self.op.enabled_hypervisors
3824 for hv in self.hv_list:
3825 # if the hypervisor doesn't already exist in the cluster
3826 # hvparams, we initialize it to empty, and then (in both
3827 # cases) we make sure to fill the defaults, as we might not
3828 # have a complete defaults list if the hypervisor wasn't
3830 if hv not in new_hvp:
3832 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3833 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3835 self.hv_list = cluster.enabled_hypervisors
3837 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3838 # either the enabled list has changed, or the parameters have, validate
3839 for hv_name, hv_params in self.new_hvparams.items():
3840 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3841 (self.op.enabled_hypervisors and
3842 hv_name in self.op.enabled_hypervisors)):
3843 # either this is a new hypervisor, or its parameters have changed
3844 hv_class = hypervisor.GetHypervisor(hv_name)
3845 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3846 hv_class.CheckParameterSyntax(hv_params)
3847 _CheckHVParams(self, node_list, hv_name, hv_params)
3850 # no need to check any newly-enabled hypervisors, since the
3851 # defaults have already been checked in the above code-block
3852 for os_name, os_hvp in self.new_os_hvp.items():
3853 for hv_name, hv_params in os_hvp.items():
3854 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3855 # we need to fill in the new os_hvp on top of the actual hv_p
3856 cluster_defaults = self.new_hvparams.get(hv_name, {})
3857 new_osp = objects.FillDict(cluster_defaults, hv_params)
3858 hv_class = hypervisor.GetHypervisor(hv_name)
3859 hv_class.CheckParameterSyntax(new_osp)
3860 _CheckHVParams(self, node_list, hv_name, new_osp)
3862 if self.op.default_iallocator:
3863 alloc_script = utils.FindFile(self.op.default_iallocator,
3864 constants.IALLOCATOR_SEARCH_PATH,
3866 if alloc_script is None:
3867 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3868 " specified" % self.op.default_iallocator,
3871 def Exec(self, feedback_fn):
3872 """Change the parameters of the cluster.
3875 if self.op.vg_name is not None:
3876 new_volume = self.op.vg_name
3879 if new_volume != self.cfg.GetVGName():
3880 self.cfg.SetVGName(new_volume)
3882 feedback_fn("Cluster LVM configuration already in desired"
3883 " state, not changing")
3884 if self.op.drbd_helper is not None:
3885 new_helper = self.op.drbd_helper
3888 if new_helper != self.cfg.GetDRBDHelper():
3889 self.cfg.SetDRBDHelper(new_helper)
3891 feedback_fn("Cluster DRBD helper already in desired state,"
3893 if self.op.hvparams:
3894 self.cluster.hvparams = self.new_hvparams
3896 self.cluster.os_hvp = self.new_os_hvp
3897 if self.op.enabled_hypervisors is not None:
3898 self.cluster.hvparams = self.new_hvparams
3899 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3900 if self.op.beparams:
3901 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3902 if self.op.nicparams:
3903 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3905 self.cluster.ipolicy = self.new_ipolicy
3906 if self.op.osparams:
3907 self.cluster.osparams = self.new_osp
3908 if self.op.ndparams:
3909 self.cluster.ndparams = self.new_ndparams
3910 if self.op.diskparams:
3911 self.cluster.diskparams = self.new_diskparams
3912 if self.op.hv_state:
3913 self.cluster.hv_state_static = self.new_hv_state
3914 if self.op.disk_state:
3915 self.cluster.disk_state_static = self.new_disk_state
3917 if self.op.candidate_pool_size is not None:
3918 self.cluster.candidate_pool_size = self.op.candidate_pool_size
3919 # we need to update the pool size here, otherwise the save will fail
3920 _AdjustCandidatePool(self, [])
3922 if self.op.maintain_node_health is not None:
3923 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3924 feedback_fn("Note: CONFD was disabled at build time, node health"
3925 " maintenance is not useful (still enabling it)")
3926 self.cluster.maintain_node_health = self.op.maintain_node_health
3928 if self.op.prealloc_wipe_disks is not None:
3929 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3931 if self.op.add_uids is not None:
3932 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3934 if self.op.remove_uids is not None:
3935 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3937 if self.op.uid_pool is not None:
3938 self.cluster.uid_pool = self.op.uid_pool
3940 if self.op.default_iallocator is not None:
3941 self.cluster.default_iallocator = self.op.default_iallocator
3943 if self.op.reserved_lvs is not None:
3944 self.cluster.reserved_lvs = self.op.reserved_lvs
3946 if self.op.use_external_mip_script is not None:
3947 self.cluster.use_external_mip_script = self.op.use_external_mip_script
3949 def helper_os(aname, mods, desc):
3951 lst = getattr(self.cluster, aname)
3952 for key, val in mods:
3953 if key == constants.DDM_ADD:
3955 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3958 elif key == constants.DDM_REMOVE:
3962 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3964 raise errors.ProgrammerError("Invalid modification '%s'" % key)
3966 if self.op.hidden_os:
3967 helper_os("hidden_os", self.op.hidden_os, "hidden")
3969 if self.op.blacklisted_os:
3970 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3972 if self.op.master_netdev:
3973 master_params = self.cfg.GetMasterNetworkParameters()
3974 ems = self.cfg.GetUseExternalMipScript()
3975 feedback_fn("Shutting down master ip on the current netdev (%s)" %
3976 self.cluster.master_netdev)
3977 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3979 result.Raise("Could not disable the master ip")
3980 feedback_fn("Changing master_netdev from %s to %s" %
3981 (master_params.netdev, self.op.master_netdev))
3982 self.cluster.master_netdev = self.op.master_netdev
3984 if self.op.master_netmask:
3985 master_params = self.cfg.GetMasterNetworkParameters()
3986 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3987 result = self.rpc.call_node_change_master_netmask(master_params.name,
3988 master_params.netmask,
3989 self.op.master_netmask,
3991 master_params.netdev)
3993 msg = "Could not change the master IP netmask: %s" % result.fail_msg
3996 self.cluster.master_netmask = self.op.master_netmask
3998 self.cfg.Update(self.cluster, feedback_fn)
4000 if self.op.master_netdev:
4001 master_params = self.cfg.GetMasterNetworkParameters()
4002 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4003 self.op.master_netdev)
4004 ems = self.cfg.GetUseExternalMipScript()
4005 result = self.rpc.call_node_activate_master_ip(master_params.name,
4008 self.LogWarning("Could not re-enable the master ip on"
4009 " the master, please restart manually: %s",
4013 def _UploadHelper(lu, nodes, fname):
4014 """Helper for uploading a file and showing warnings.
4017 if os.path.exists(fname):
4018 result = lu.rpc.call_upload_file(nodes, fname)
4019 for to_node, to_result in result.items():
4020 msg = to_result.fail_msg
4022 msg = ("Copy of file %s to node %s failed: %s" %
4023 (fname, to_node, msg))
4024 lu.proc.LogWarning(msg)
4027 def _ComputeAncillaryFiles(cluster, redist):
4028 """Compute files external to Ganeti which need to be consistent.
4030 @type redist: boolean
4031 @param redist: Whether to include files which need to be redistributed
4034 # Compute files for all nodes
4036 constants.SSH_KNOWN_HOSTS_FILE,
4037 constants.CONFD_HMAC_KEY,
4038 constants.CLUSTER_DOMAIN_SECRET_FILE,
4039 constants.SPICE_CERT_FILE,
4040 constants.SPICE_CACERT_FILE,
4041 constants.RAPI_USERS_FILE,
4045 files_all.update(constants.ALL_CERT_FILES)
4046 files_all.update(ssconf.SimpleStore().GetFileList())
4048 # we need to ship at least the RAPI certificate
4049 files_all.add(constants.RAPI_CERT_FILE)
4051 if cluster.modify_etc_hosts:
4052 files_all.add(constants.ETC_HOSTS)
4054 # Files which are optional, these must:
4055 # - be present in one other category as well
4056 # - either exist or not exist on all nodes of that category (mc, vm all)
4058 constants.RAPI_USERS_FILE,
4061 # Files which should only be on master candidates
4065 files_mc.add(constants.CLUSTER_CONF_FILE)
4067 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4069 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4071 # Files which should only be on VM-capable nodes
4072 files_vm = set(filename
4073 for hv_name in cluster.enabled_hypervisors
4074 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4076 files_opt |= set(filename
4077 for hv_name in cluster.enabled_hypervisors
4078 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4080 # Filenames in each category must be unique
4081 all_files_set = files_all | files_mc | files_vm
4082 assert (len(all_files_set) ==
4083 sum(map(len, [files_all, files_mc, files_vm]))), \
4084 "Found file listed in more than one file list"
4086 # Optional files must be present in one other category
4087 assert all_files_set.issuperset(files_opt), \
4088 "Optional file not in a different required list"
4090 return (files_all, files_opt, files_mc, files_vm)
4093 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4094 """Distribute additional files which are part of the cluster configuration.
4096 ConfigWriter takes care of distributing the config and ssconf files, but
4097 there are more files which should be distributed to all nodes. This function
4098 makes sure those are copied.
4100 @param lu: calling logical unit
4101 @param additional_nodes: list of nodes not in the config to distribute to
4102 @type additional_vm: boolean
4103 @param additional_vm: whether the additional nodes are vm-capable or not
4106 # Gather target nodes
4107 cluster = lu.cfg.GetClusterInfo()
4108 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4110 online_nodes = lu.cfg.GetOnlineNodeList()
4111 vm_nodes = lu.cfg.GetVmCapableNodeList()
4113 if additional_nodes is not None:
4114 online_nodes.extend(additional_nodes)
4116 vm_nodes.extend(additional_nodes)
4118 # Never distribute to master node
4119 for nodelist in [online_nodes, vm_nodes]:
4120 if master_info.name in nodelist:
4121 nodelist.remove(master_info.name)
4124 (files_all, _, files_mc, files_vm) = \
4125 _ComputeAncillaryFiles(cluster, True)
4127 # Never re-distribute configuration file from here
4128 assert not (constants.CLUSTER_CONF_FILE in files_all or
4129 constants.CLUSTER_CONF_FILE in files_vm)
4130 assert not files_mc, "Master candidates not handled in this function"
4133 (online_nodes, files_all),
4134 (vm_nodes, files_vm),
4138 for (node_list, files) in filemap:
4140 _UploadHelper(lu, node_list, fname)
4143 class LUClusterRedistConf(NoHooksLU):
4144 """Force the redistribution of cluster configuration.
4146 This is a very simple LU.
4151 def ExpandNames(self):
4152 self.needed_locks = {
4153 locking.LEVEL_NODE: locking.ALL_SET,
4155 self.share_locks[locking.LEVEL_NODE] = 1
4157 def Exec(self, feedback_fn):
4158 """Redistribute the configuration.
4161 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4162 _RedistributeAncillaryFiles(self)
4165 class LUClusterActivateMasterIp(NoHooksLU):
4166 """Activate the master IP on the master node.
4169 def Exec(self, feedback_fn):
4170 """Activate the master IP.
4173 master_params = self.cfg.GetMasterNetworkParameters()
4174 ems = self.cfg.GetUseExternalMipScript()
4175 result = self.rpc.call_node_activate_master_ip(master_params.name,
4177 result.Raise("Could not activate the master IP")
4180 class LUClusterDeactivateMasterIp(NoHooksLU):
4181 """Deactivate the master IP on the master node.
4184 def Exec(self, feedback_fn):
4185 """Deactivate the master IP.
4188 master_params = self.cfg.GetMasterNetworkParameters()
4189 ems = self.cfg.GetUseExternalMipScript()
4190 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4192 result.Raise("Could not deactivate the master IP")
4195 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4196 """Sleep and poll for an instance's disk to sync.
4199 if not instance.disks or disks is not None and not disks:
4202 disks = _ExpandCheckDisks(instance, disks)
4205 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4207 node = instance.primary_node
4210 lu.cfg.SetDiskID(dev, node)
4212 # TODO: Convert to utils.Retry
4215 degr_retries = 10 # in seconds, as we sleep 1 second each time
4219 cumul_degraded = False
4220 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4221 msg = rstats.fail_msg
4223 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4226 raise errors.RemoteError("Can't contact node %s for mirror data,"
4227 " aborting." % node)
4230 rstats = rstats.payload
4232 for i, mstat in enumerate(rstats):
4234 lu.LogWarning("Can't compute data for node %s/%s",
4235 node, disks[i].iv_name)
4238 cumul_degraded = (cumul_degraded or
4239 (mstat.is_degraded and mstat.sync_percent is None))
4240 if mstat.sync_percent is not None:
4242 if mstat.estimated_time is not None:
4243 rem_time = ("%s remaining (estimated)" %
4244 utils.FormatSeconds(mstat.estimated_time))
4245 max_time = mstat.estimated_time
4247 rem_time = "no time estimate"
4248 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4249 (disks[i].iv_name, mstat.sync_percent, rem_time))
4251 # if we're done but degraded, let's do a few small retries, to
4252 # make sure we see a stable and not transient situation; therefore
4253 # we force restart of the loop
4254 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4255 logging.info("Degraded disks found, %d retries left", degr_retries)
4263 time.sleep(min(60, max_time))
4266 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4267 return not cumul_degraded
4270 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4271 """Check that mirrors are not degraded.
4273 The ldisk parameter, if True, will change the test from the
4274 is_degraded attribute (which represents overall non-ok status for
4275 the device(s)) to the ldisk (representing the local storage status).
4278 lu.cfg.SetDiskID(dev, node)
4282 if on_primary or dev.AssembleOnSecondary():
4283 rstats = lu.rpc.call_blockdev_find(node, dev)
4284 msg = rstats.fail_msg
4286 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4288 elif not rstats.payload:
4289 lu.LogWarning("Can't find disk on node %s", node)
4293 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4295 result = result and not rstats.payload.is_degraded
4298 for child in dev.children:
4299 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4304 class LUOobCommand(NoHooksLU):
4305 """Logical unit for OOB handling.
4309 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4311 def ExpandNames(self):
4312 """Gather locks we need.
4315 if self.op.node_names:
4316 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4317 lock_names = self.op.node_names
4319 lock_names = locking.ALL_SET
4321 self.needed_locks = {
4322 locking.LEVEL_NODE: lock_names,
4325 def CheckPrereq(self):
4326 """Check prerequisites.
4329 - the node exists in the configuration
4332 Any errors are signaled by raising errors.OpPrereqError.
4336 self.master_node = self.cfg.GetMasterNode()
4338 assert self.op.power_delay >= 0.0
4340 if self.op.node_names:
4341 if (self.op.command in self._SKIP_MASTER and
4342 self.master_node in self.op.node_names):
4343 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4344 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4346 if master_oob_handler:
4347 additional_text = ("run '%s %s %s' if you want to operate on the"
4348 " master regardless") % (master_oob_handler,
4352 additional_text = "it does not support out-of-band operations"
4354 raise errors.OpPrereqError(("Operating on the master node %s is not"
4355 " allowed for %s; %s") %
4356 (self.master_node, self.op.command,
4357 additional_text), errors.ECODE_INVAL)
4359 self.op.node_names = self.cfg.GetNodeList()
4360 if self.op.command in self._SKIP_MASTER:
4361 self.op.node_names.remove(self.master_node)
4363 if self.op.command in self._SKIP_MASTER:
4364 assert self.master_node not in self.op.node_names
4366 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4368 raise errors.OpPrereqError("Node %s not found" % node_name,
4371 self.nodes.append(node)
4373 if (not self.op.ignore_status and
4374 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4375 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4376 " not marked offline") % node_name,
4379 def Exec(self, feedback_fn):
4380 """Execute OOB and return result if we expect any.
4383 master_node = self.master_node
4386 for idx, node in enumerate(utils.NiceSort(self.nodes,
4387 key=lambda node: node.name)):
4388 node_entry = [(constants.RS_NORMAL, node.name)]
4389 ret.append(node_entry)
4391 oob_program = _SupportsOob(self.cfg, node)
4394 node_entry.append((constants.RS_UNAVAIL, None))
4397 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4398 self.op.command, oob_program, node.name)
4399 result = self.rpc.call_run_oob(master_node, oob_program,
4400 self.op.command, node.name,
4404 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4405 node.name, result.fail_msg)
4406 node_entry.append((constants.RS_NODATA, None))
4409 self._CheckPayload(result)
4410 except errors.OpExecError, err:
4411 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4413 node_entry.append((constants.RS_NODATA, None))
4415 if self.op.command == constants.OOB_HEALTH:
4416 # For health we should log important events
4417 for item, status in result.payload:
4418 if status in [constants.OOB_STATUS_WARNING,
4419 constants.OOB_STATUS_CRITICAL]:
4420 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4421 item, node.name, status)
4423 if self.op.command == constants.OOB_POWER_ON:
4425 elif self.op.command == constants.OOB_POWER_OFF:
4426 node.powered = False
4427 elif self.op.command == constants.OOB_POWER_STATUS:
4428 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4429 if powered != node.powered:
4430 logging.warning(("Recorded power state (%s) of node '%s' does not"
4431 " match actual power state (%s)"), node.powered,
4434 # For configuration changing commands we should update the node
4435 if self.op.command in (constants.OOB_POWER_ON,
4436 constants.OOB_POWER_OFF):
4437 self.cfg.Update(node, feedback_fn)
4439 node_entry.append((constants.RS_NORMAL, result.payload))
4441 if (self.op.command == constants.OOB_POWER_ON and
4442 idx < len(self.nodes) - 1):
4443 time.sleep(self.op.power_delay)
4447 def _CheckPayload(self, result):
4448 """Checks if the payload is valid.
4450 @param result: RPC result
4451 @raises errors.OpExecError: If payload is not valid
4455 if self.op.command == constants.OOB_HEALTH:
4456 if not isinstance(result.payload, list):
4457 errs.append("command 'health' is expected to return a list but got %s" %
4458 type(result.payload))
4460 for item, status in result.payload:
4461 if status not in constants.OOB_STATUSES:
4462 errs.append("health item '%s' has invalid status '%s'" %
4465 if self.op.command == constants.OOB_POWER_STATUS:
4466 if not isinstance(result.payload, dict):
4467 errs.append("power-status is expected to return a dict but got %s" %
4468 type(result.payload))
4470 if self.op.command in [
4471 constants.OOB_POWER_ON,
4472 constants.OOB_POWER_OFF,
4473 constants.OOB_POWER_CYCLE,
4475 if result.payload is not None:
4476 errs.append("%s is expected to not return payload but got '%s'" %
4477 (self.op.command, result.payload))
4480 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4481 utils.CommaJoin(errs))
4484 class _OsQuery(_QueryBase):
4485 FIELDS = query.OS_FIELDS
4487 def ExpandNames(self, lu):
4488 # Lock all nodes in shared mode
4489 # Temporary removal of locks, should be reverted later
4490 # TODO: reintroduce locks when they are lighter-weight
4491 lu.needed_locks = {}
4492 #self.share_locks[locking.LEVEL_NODE] = 1
4493 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4495 # The following variables interact with _QueryBase._GetNames
4497 self.wanted = self.names
4499 self.wanted = locking.ALL_SET
4501 self.do_locking = self.use_locking
4503 def DeclareLocks(self, lu, level):
4507 def _DiagnoseByOS(rlist):
4508 """Remaps a per-node return list into an a per-os per-node dictionary
4510 @param rlist: a map with node names as keys and OS objects as values
4513 @return: a dictionary with osnames as keys and as value another
4514 map, with nodes as keys and tuples of (path, status, diagnose,
4515 variants, parameters, api_versions) as values, eg::
4517 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4518 (/srv/..., False, "invalid api")],
4519 "node2": [(/srv/..., True, "", [], [])]}
4524 # we build here the list of nodes that didn't fail the RPC (at RPC
4525 # level), so that nodes with a non-responding node daemon don't
4526 # make all OSes invalid
4527 good_nodes = [node_name for node_name in rlist
4528 if not rlist[node_name].fail_msg]
4529 for node_name, nr in rlist.items():
4530 if nr.fail_msg or not nr.payload:
4532 for (name, path, status, diagnose, variants,
4533 params, api_versions) in nr.payload:
4534 if name not in all_os:
4535 # build a list of nodes for this os containing empty lists
4536 # for each node in node_list
4538 for nname in good_nodes:
4539 all_os[name][nname] = []
4540 # convert params from [name, help] to (name, help)
4541 params = [tuple(v) for v in params]
4542 all_os[name][node_name].append((path, status, diagnose,
4543 variants, params, api_versions))
4546 def _GetQueryData(self, lu):
4547 """Computes the list of nodes and their attributes.
4550 # Locking is not used
4551 assert not (compat.any(lu.glm.is_owned(level)
4552 for level in locking.LEVELS
4553 if level != locking.LEVEL_CLUSTER) or
4554 self.do_locking or self.use_locking)
4556 valid_nodes = [node.name
4557 for node in lu.cfg.GetAllNodesInfo().values()
4558 if not node.offline and node.vm_capable]
4559 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4560 cluster = lu.cfg.GetClusterInfo()
4564 for (os_name, os_data) in pol.items():
4565 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4566 hidden=(os_name in cluster.hidden_os),
4567 blacklisted=(os_name in cluster.blacklisted_os))
4571 api_versions = set()
4573 for idx, osl in enumerate(os_data.values()):
4574 info.valid = bool(info.valid and osl and osl[0][1])
4578 (node_variants, node_params, node_api) = osl[0][3:6]
4581 variants.update(node_variants)
4582 parameters.update(node_params)
4583 api_versions.update(node_api)
4585 # Filter out inconsistent values
4586 variants.intersection_update(node_variants)
4587 parameters.intersection_update(node_params)
4588 api_versions.intersection_update(node_api)
4590 info.variants = list(variants)
4591 info.parameters = list(parameters)
4592 info.api_versions = list(api_versions)
4594 data[os_name] = info
4596 # Prepare data in requested order
4597 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4601 class LUOsDiagnose(NoHooksLU):
4602 """Logical unit for OS diagnose/query.
4608 def _BuildFilter(fields, names):
4609 """Builds a filter for querying OSes.
4612 name_filter = qlang.MakeSimpleFilter("name", names)
4614 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4615 # respective field is not requested
4616 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4617 for fname in ["hidden", "blacklisted"]
4618 if fname not in fields]
4619 if "valid" not in fields:
4620 status_filter.append([qlang.OP_TRUE, "valid"])
4623 status_filter.insert(0, qlang.OP_AND)
4625 status_filter = None
4627 if name_filter and status_filter:
4628 return [qlang.OP_AND, name_filter, status_filter]
4632 return status_filter
4634 def CheckArguments(self):
4635 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4636 self.op.output_fields, False)
4638 def ExpandNames(self):
4639 self.oq.ExpandNames(self)
4641 def Exec(self, feedback_fn):
4642 return self.oq.OldStyleQuery(self)
4645 class LUNodeRemove(LogicalUnit):
4646 """Logical unit for removing a node.
4649 HPATH = "node-remove"
4650 HTYPE = constants.HTYPE_NODE
4652 def BuildHooksEnv(self):
4655 This doesn't run on the target node in the pre phase as a failed
4656 node would then be impossible to remove.
4660 "OP_TARGET": self.op.node_name,
4661 "NODE_NAME": self.op.node_name,
4664 def BuildHooksNodes(self):
4665 """Build hooks nodes.
4668 all_nodes = self.cfg.GetNodeList()
4670 all_nodes.remove(self.op.node_name)
4672 logging.warning("Node '%s', which is about to be removed, was not found"
4673 " in the list of all nodes", self.op.node_name)
4674 return (all_nodes, all_nodes)
4676 def CheckPrereq(self):
4677 """Check prerequisites.
4680 - the node exists in the configuration
4681 - it does not have primary or secondary instances
4682 - it's not the master
4684 Any errors are signaled by raising errors.OpPrereqError.
4687 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4688 node = self.cfg.GetNodeInfo(self.op.node_name)
4689 assert node is not None
4691 masternode = self.cfg.GetMasterNode()
4692 if node.name == masternode:
4693 raise errors.OpPrereqError("Node is the master node, failover to another"
4694 " node is required", errors.ECODE_INVAL)
4696 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4697 if node.name in instance.all_nodes:
4698 raise errors.OpPrereqError("Instance %s is still running on the node,"
4699 " please remove first" % instance_name,
4701 self.op.node_name = node.name
4704 def Exec(self, feedback_fn):
4705 """Removes the node from the cluster.
4709 logging.info("Stopping the node daemon and removing configs from node %s",
4712 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4714 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4717 # Promote nodes to master candidate as needed
4718 _AdjustCandidatePool(self, exceptions=[node.name])
4719 self.context.RemoveNode(node.name)
4721 # Run post hooks on the node before it's removed
4722 _RunPostHook(self, node.name)
4724 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4725 msg = result.fail_msg
4727 self.LogWarning("Errors encountered on the remote node while leaving"
4728 " the cluster: %s", msg)
4730 # Remove node from our /etc/hosts
4731 if self.cfg.GetClusterInfo().modify_etc_hosts:
4732 master_node = self.cfg.GetMasterNode()
4733 result = self.rpc.call_etc_hosts_modify(master_node,
4734 constants.ETC_HOSTS_REMOVE,
4736 result.Raise("Can't update hosts file with new host data")
4737 _RedistributeAncillaryFiles(self)
4740 class _NodeQuery(_QueryBase):
4741 FIELDS = query.NODE_FIELDS
4743 def ExpandNames(self, lu):
4744 lu.needed_locks = {}
4745 lu.share_locks = _ShareAll()
4748 self.wanted = _GetWantedNodes(lu, self.names)
4750 self.wanted = locking.ALL_SET
4752 self.do_locking = (self.use_locking and
4753 query.NQ_LIVE in self.requested_data)
4756 # If any non-static field is requested we need to lock the nodes
4757 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4759 def DeclareLocks(self, lu, level):
4762 def _GetQueryData(self, lu):
4763 """Computes the list of nodes and their attributes.
4766 all_info = lu.cfg.GetAllNodesInfo()
4768 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4770 # Gather data as requested
4771 if query.NQ_LIVE in self.requested_data:
4772 # filter out non-vm_capable nodes
4773 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4775 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4776 [lu.cfg.GetHypervisorType()])
4777 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4778 for (name, nresult) in node_data.items()
4779 if not nresult.fail_msg and nresult.payload)
4783 if query.NQ_INST in self.requested_data:
4784 node_to_primary = dict([(name, set()) for name in nodenames])
4785 node_to_secondary = dict([(name, set()) for name in nodenames])
4787 inst_data = lu.cfg.GetAllInstancesInfo()
4789 for inst in inst_data.values():
4790 if inst.primary_node in node_to_primary:
4791 node_to_primary[inst.primary_node].add(inst.name)
4792 for secnode in inst.secondary_nodes:
4793 if secnode in node_to_secondary:
4794 node_to_secondary[secnode].add(inst.name)
4796 node_to_primary = None
4797 node_to_secondary = None
4799 if query.NQ_OOB in self.requested_data:
4800 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4801 for name, node in all_info.iteritems())
4805 if query.NQ_GROUP in self.requested_data:
4806 groups = lu.cfg.GetAllNodeGroupsInfo()
4810 return query.NodeQueryData([all_info[name] for name in nodenames],
4811 live_data, lu.cfg.GetMasterNode(),
4812 node_to_primary, node_to_secondary, groups,
4813 oob_support, lu.cfg.GetClusterInfo())
4816 class LUNodeQuery(NoHooksLU):
4817 """Logical unit for querying nodes.
4820 # pylint: disable=W0142
4823 def CheckArguments(self):
4824 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4825 self.op.output_fields, self.op.use_locking)
4827 def ExpandNames(self):
4828 self.nq.ExpandNames(self)
4830 def DeclareLocks(self, level):
4831 self.nq.DeclareLocks(self, level)
4833 def Exec(self, feedback_fn):
4834 return self.nq.OldStyleQuery(self)
4837 class LUNodeQueryvols(NoHooksLU):
4838 """Logical unit for getting volumes on node(s).
4842 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4843 _FIELDS_STATIC = utils.FieldSet("node")
4845 def CheckArguments(self):
4846 _CheckOutputFields(static=self._FIELDS_STATIC,
4847 dynamic=self._FIELDS_DYNAMIC,
4848 selected=self.op.output_fields)
4850 def ExpandNames(self):
4851 self.share_locks = _ShareAll()
4852 self.needed_locks = {}
4854 if not self.op.nodes:
4855 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4857 self.needed_locks[locking.LEVEL_NODE] = \
4858 _GetWantedNodes(self, self.op.nodes)
4860 def Exec(self, feedback_fn):
4861 """Computes the list of nodes and their attributes.
4864 nodenames = self.owned_locks(locking.LEVEL_NODE)
4865 volumes = self.rpc.call_node_volumes(nodenames)
4867 ilist = self.cfg.GetAllInstancesInfo()
4868 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4871 for node in nodenames:
4872 nresult = volumes[node]
4875 msg = nresult.fail_msg
4877 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4880 node_vols = sorted(nresult.payload,
4881 key=operator.itemgetter("dev"))
4883 for vol in node_vols:
4885 for field in self.op.output_fields:
4888 elif field == "phys":
4892 elif field == "name":
4894 elif field == "size":
4895 val = int(float(vol["size"]))
4896 elif field == "instance":
4897 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4899 raise errors.ParameterError(field)
4900 node_output.append(str(val))
4902 output.append(node_output)
4907 class LUNodeQueryStorage(NoHooksLU):
4908 """Logical unit for getting information on storage units on node(s).
4911 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4914 def CheckArguments(self):
4915 _CheckOutputFields(static=self._FIELDS_STATIC,
4916 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4917 selected=self.op.output_fields)
4919 def ExpandNames(self):
4920 self.share_locks = _ShareAll()
4921 self.needed_locks = {}
4924 self.needed_locks[locking.LEVEL_NODE] = \
4925 _GetWantedNodes(self, self.op.nodes)
4927 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4929 def Exec(self, feedback_fn):
4930 """Computes the list of nodes and their attributes.
4933 self.nodes = self.owned_locks(locking.LEVEL_NODE)
4935 # Always get name to sort by
4936 if constants.SF_NAME in self.op.output_fields:
4937 fields = self.op.output_fields[:]
4939 fields = [constants.SF_NAME] + self.op.output_fields
4941 # Never ask for node or type as it's only known to the LU
4942 for extra in [constants.SF_NODE, constants.SF_TYPE]:
4943 while extra in fields:
4944 fields.remove(extra)
4946 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4947 name_idx = field_idx[constants.SF_NAME]
4949 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4950 data = self.rpc.call_storage_list(self.nodes,
4951 self.op.storage_type, st_args,
4952 self.op.name, fields)
4956 for node in utils.NiceSort(self.nodes):
4957 nresult = data[node]
4961 msg = nresult.fail_msg
4963 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4966 rows = dict([(row[name_idx], row) for row in nresult.payload])
4968 for name in utils.NiceSort(rows.keys()):
4973 for field in self.op.output_fields:
4974 if field == constants.SF_NODE:
4976 elif field == constants.SF_TYPE:
4977 val = self.op.storage_type
4978 elif field in field_idx:
4979 val = row[field_idx[field]]
4981 raise errors.ParameterError(field)
4990 class _InstanceQuery(_QueryBase):
4991 FIELDS = query.INSTANCE_FIELDS
4993 def ExpandNames(self, lu):
4994 lu.needed_locks = {}
4995 lu.share_locks = _ShareAll()
4998 self.wanted = _GetWantedInstances(lu, self.names)
5000 self.wanted = locking.ALL_SET
5002 self.do_locking = (self.use_locking and
5003 query.IQ_LIVE in self.requested_data)
5005 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5006 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5007 lu.needed_locks[locking.LEVEL_NODE] = []
5008 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5010 self.do_grouplocks = (self.do_locking and
5011 query.IQ_NODES in self.requested_data)
5013 def DeclareLocks(self, lu, level):
5015 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5016 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5018 # Lock all groups used by instances optimistically; this requires going
5019 # via the node before it's locked, requiring verification later on
5020 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5022 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5023 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5024 elif level == locking.LEVEL_NODE:
5025 lu._LockInstancesNodes() # pylint: disable=W0212
5028 def _CheckGroupLocks(lu):
5029 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5030 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5032 # Check if node groups for locked instances are still correct
5033 for instance_name in owned_instances:
5034 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5036 def _GetQueryData(self, lu):
5037 """Computes the list of instances and their attributes.
5040 if self.do_grouplocks:
5041 self._CheckGroupLocks(lu)
5043 cluster = lu.cfg.GetClusterInfo()
5044 all_info = lu.cfg.GetAllInstancesInfo()
5046 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5048 instance_list = [all_info[name] for name in instance_names]
5049 nodes = frozenset(itertools.chain(*(inst.all_nodes
5050 for inst in instance_list)))
5051 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5054 wrongnode_inst = set()
5056 # Gather data as requested
5057 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5059 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5061 result = node_data[name]
5063 # offline nodes will be in both lists
5064 assert result.fail_msg
5065 offline_nodes.append(name)
5067 bad_nodes.append(name)
5068 elif result.payload:
5069 for inst in result.payload:
5070 if inst in all_info:
5071 if all_info[inst].primary_node == name:
5072 live_data.update(result.payload)
5074 wrongnode_inst.add(inst)
5076 # orphan instance; we don't list it here as we don't
5077 # handle this case yet in the output of instance listing
5078 logging.warning("Orphan instance '%s' found on node %s",
5080 # else no instance is alive
5084 if query.IQ_DISKUSAGE in self.requested_data:
5085 disk_usage = dict((inst.name,
5086 _ComputeDiskSize(inst.disk_template,
5087 [{constants.IDISK_SIZE: disk.size}
5088 for disk in inst.disks]))
5089 for inst in instance_list)
5093 if query.IQ_CONSOLE in self.requested_data:
5095 for inst in instance_list:
5096 if inst.name in live_data:
5097 # Instance is running
5098 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5100 consinfo[inst.name] = None
5101 assert set(consinfo.keys()) == set(instance_names)
5105 if query.IQ_NODES in self.requested_data:
5106 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5108 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5109 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5110 for uuid in set(map(operator.attrgetter("group"),
5116 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5117 disk_usage, offline_nodes, bad_nodes,
5118 live_data, wrongnode_inst, consinfo,
5122 class LUQuery(NoHooksLU):
5123 """Query for resources/items of a certain kind.
5126 # pylint: disable=W0142
5129 def CheckArguments(self):
5130 qcls = _GetQueryImplementation(self.op.what)
5132 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5134 def ExpandNames(self):
5135 self.impl.ExpandNames(self)
5137 def DeclareLocks(self, level):
5138 self.impl.DeclareLocks(self, level)
5140 def Exec(self, feedback_fn):
5141 return self.impl.NewStyleQuery(self)
5144 class LUQueryFields(NoHooksLU):
5145 """Query for resources/items of a certain kind.
5148 # pylint: disable=W0142
5151 def CheckArguments(self):
5152 self.qcls = _GetQueryImplementation(self.op.what)
5154 def ExpandNames(self):
5155 self.needed_locks = {}
5157 def Exec(self, feedback_fn):
5158 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5161 class LUNodeModifyStorage(NoHooksLU):
5162 """Logical unit for modifying a storage volume on a node.
5167 def CheckArguments(self):
5168 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5170 storage_type = self.op.storage_type
5173 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5175 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5176 " modified" % storage_type,
5179 diff = set(self.op.changes.keys()) - modifiable
5181 raise errors.OpPrereqError("The following fields can not be modified for"
5182 " storage units of type '%s': %r" %
5183 (storage_type, list(diff)),
5186 def ExpandNames(self):
5187 self.needed_locks = {
5188 locking.LEVEL_NODE: self.op.node_name,
5191 def Exec(self, feedback_fn):
5192 """Computes the list of nodes and their attributes.
5195 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5196 result = self.rpc.call_storage_modify(self.op.node_name,
5197 self.op.storage_type, st_args,
5198 self.op.name, self.op.changes)
5199 result.Raise("Failed to modify storage unit '%s' on %s" %
5200 (self.op.name, self.op.node_name))
5203 class LUNodeAdd(LogicalUnit):
5204 """Logical unit for adding node to the cluster.
5208 HTYPE = constants.HTYPE_NODE
5209 _NFLAGS = ["master_capable", "vm_capable"]
5211 def CheckArguments(self):
5212 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5213 # validate/normalize the node name
5214 self.hostname = netutils.GetHostname(name=self.op.node_name,
5215 family=self.primary_ip_family)
5216 self.op.node_name = self.hostname.name
5218 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5219 raise errors.OpPrereqError("Cannot readd the master node",
5222 if self.op.readd and self.op.group:
5223 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5224 " being readded", errors.ECODE_INVAL)
5226 def BuildHooksEnv(self):
5229 This will run on all nodes before, and on all nodes + the new node after.
5233 "OP_TARGET": self.op.node_name,
5234 "NODE_NAME": self.op.node_name,
5235 "NODE_PIP": self.op.primary_ip,
5236 "NODE_SIP": self.op.secondary_ip,
5237 "MASTER_CAPABLE": str(self.op.master_capable),
5238 "VM_CAPABLE": str(self.op.vm_capable),
5241 def BuildHooksNodes(self):
5242 """Build hooks nodes.
5245 # Exclude added node
5246 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5247 post_nodes = pre_nodes + [self.op.node_name, ]
5249 return (pre_nodes, post_nodes)
5251 def CheckPrereq(self):
5252 """Check prerequisites.
5255 - the new node is not already in the config
5257 - its parameters (single/dual homed) matches the cluster
5259 Any errors are signaled by raising errors.OpPrereqError.
5263 hostname = self.hostname
5264 node = hostname.name
5265 primary_ip = self.op.primary_ip = hostname.ip
5266 if self.op.secondary_ip is None:
5267 if self.primary_ip_family == netutils.IP6Address.family:
5268 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5269 " IPv4 address must be given as secondary",
5271 self.op.secondary_ip = primary_ip
5273 secondary_ip = self.op.secondary_ip
5274 if not netutils.IP4Address.IsValid(secondary_ip):
5275 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5276 " address" % secondary_ip, errors.ECODE_INVAL)
5278 node_list = cfg.GetNodeList()
5279 if not self.op.readd and node in node_list:
5280 raise errors.OpPrereqError("Node %s is already in the configuration" %
5281 node, errors.ECODE_EXISTS)
5282 elif self.op.readd and node not in node_list:
5283 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5286 self.changed_primary_ip = False
5288 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5289 if self.op.readd and node == existing_node_name:
5290 if existing_node.secondary_ip != secondary_ip:
5291 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5292 " address configuration as before",
5294 if existing_node.primary_ip != primary_ip:
5295 self.changed_primary_ip = True
5299 if (existing_node.primary_ip == primary_ip or
5300 existing_node.secondary_ip == primary_ip or
5301 existing_node.primary_ip == secondary_ip or
5302 existing_node.secondary_ip == secondary_ip):
5303 raise errors.OpPrereqError("New node ip address(es) conflict with"
5304 " existing node %s" % existing_node.name,
5305 errors.ECODE_NOTUNIQUE)
5307 # After this 'if' block, None is no longer a valid value for the
5308 # _capable op attributes
5310 old_node = self.cfg.GetNodeInfo(node)
5311 assert old_node is not None, "Can't retrieve locked node %s" % node
5312 for attr in self._NFLAGS:
5313 if getattr(self.op, attr) is None:
5314 setattr(self.op, attr, getattr(old_node, attr))
5316 for attr in self._NFLAGS:
5317 if getattr(self.op, attr) is None:
5318 setattr(self.op, attr, True)
5320 if self.op.readd and not self.op.vm_capable:
5321 pri, sec = cfg.GetNodeInstances(node)
5323 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5324 " flag set to false, but it already holds"
5325 " instances" % node,
5328 # check that the type of the node (single versus dual homed) is the
5329 # same as for the master
5330 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5331 master_singlehomed = myself.secondary_ip == myself.primary_ip
5332 newbie_singlehomed = secondary_ip == primary_ip
5333 if master_singlehomed != newbie_singlehomed:
5334 if master_singlehomed:
5335 raise errors.OpPrereqError("The master has no secondary ip but the"
5336 " new node has one",
5339 raise errors.OpPrereqError("The master has a secondary ip but the"
5340 " new node doesn't have one",
5343 # checks reachability
5344 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5345 raise errors.OpPrereqError("Node not reachable by ping",
5346 errors.ECODE_ENVIRON)
5348 if not newbie_singlehomed:
5349 # check reachability from my secondary ip to newbie's secondary ip
5350 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5351 source=myself.secondary_ip):
5352 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5353 " based ping to node daemon port",
5354 errors.ECODE_ENVIRON)
5361 if self.op.master_capable:
5362 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5364 self.master_candidate = False
5367 self.new_node = old_node
5369 node_group = cfg.LookupNodeGroup(self.op.group)
5370 self.new_node = objects.Node(name=node,
5371 primary_ip=primary_ip,
5372 secondary_ip=secondary_ip,
5373 master_candidate=self.master_candidate,
5374 offline=False, drained=False,
5377 if self.op.ndparams:
5378 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5380 def Exec(self, feedback_fn):
5381 """Adds the new node to the cluster.
5384 new_node = self.new_node
5385 node = new_node.name
5387 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5390 # We adding a new node so we assume it's powered
5391 new_node.powered = True
5393 # for re-adds, reset the offline/drained/master-candidate flags;
5394 # we need to reset here, otherwise offline would prevent RPC calls
5395 # later in the procedure; this also means that if the re-add
5396 # fails, we are left with a non-offlined, broken node
5398 new_node.drained = new_node.offline = False # pylint: disable=W0201
5399 self.LogInfo("Readding a node, the offline/drained flags were reset")
5400 # if we demote the node, we do cleanup later in the procedure
5401 new_node.master_candidate = self.master_candidate
5402 if self.changed_primary_ip:
5403 new_node.primary_ip = self.op.primary_ip
5405 # copy the master/vm_capable flags
5406 for attr in self._NFLAGS:
5407 setattr(new_node, attr, getattr(self.op, attr))
5409 # notify the user about any possible mc promotion
5410 if new_node.master_candidate:
5411 self.LogInfo("Node will be a master candidate")
5413 if self.op.ndparams:
5414 new_node.ndparams = self.op.ndparams
5416 new_node.ndparams = {}
5418 # check connectivity
5419 result = self.rpc.call_version([node])[node]
5420 result.Raise("Can't get version information from node %s" % node)
5421 if constants.PROTOCOL_VERSION == result.payload:
5422 logging.info("Communication to node %s fine, sw version %s match",
5423 node, result.payload)
5425 raise errors.OpExecError("Version mismatch master version %s,"
5426 " node version %s" %
5427 (constants.PROTOCOL_VERSION, result.payload))
5429 # Add node to our /etc/hosts, and add key to known_hosts
5430 if self.cfg.GetClusterInfo().modify_etc_hosts:
5431 master_node = self.cfg.GetMasterNode()
5432 result = self.rpc.call_etc_hosts_modify(master_node,
5433 constants.ETC_HOSTS_ADD,
5436 result.Raise("Can't update hosts file with new host data")
5438 if new_node.secondary_ip != new_node.primary_ip:
5439 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5442 node_verify_list = [self.cfg.GetMasterNode()]
5443 node_verify_param = {
5444 constants.NV_NODELIST: ([node], {}),
5445 # TODO: do a node-net-test as well?
5448 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5449 self.cfg.GetClusterName())
5450 for verifier in node_verify_list:
5451 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5452 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5454 for failed in nl_payload:
5455 feedback_fn("ssh/hostname verification failed"
5456 " (checking from %s): %s" %
5457 (verifier, nl_payload[failed]))
5458 raise errors.OpExecError("ssh/hostname verification failed")
5461 _RedistributeAncillaryFiles(self)
5462 self.context.ReaddNode(new_node)
5463 # make sure we redistribute the config
5464 self.cfg.Update(new_node, feedback_fn)
5465 # and make sure the new node will not have old files around
5466 if not new_node.master_candidate:
5467 result = self.rpc.call_node_demote_from_mc(new_node.name)
5468 msg = result.fail_msg
5470 self.LogWarning("Node failed to demote itself from master"
5471 " candidate status: %s" % msg)
5473 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5474 additional_vm=self.op.vm_capable)
5475 self.context.AddNode(new_node, self.proc.GetECId())
5478 class LUNodeSetParams(LogicalUnit):
5479 """Modifies the parameters of a node.
5481 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5482 to the node role (as _ROLE_*)
5483 @cvar _R2F: a dictionary from node role to tuples of flags
5484 @cvar _FLAGS: a list of attribute names corresponding to the flags
5487 HPATH = "node-modify"
5488 HTYPE = constants.HTYPE_NODE
5490 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5492 (True, False, False): _ROLE_CANDIDATE,
5493 (False, True, False): _ROLE_DRAINED,
5494 (False, False, True): _ROLE_OFFLINE,
5495 (False, False, False): _ROLE_REGULAR,
5497 _R2F = dict((v, k) for k, v in _F2R.items())
5498 _FLAGS = ["master_candidate", "drained", "offline"]
5500 def CheckArguments(self):
5501 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5502 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5503 self.op.master_capable, self.op.vm_capable,
5504 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5506 if all_mods.count(None) == len(all_mods):
5507 raise errors.OpPrereqError("Please pass at least one modification",
5509 if all_mods.count(True) > 1:
5510 raise errors.OpPrereqError("Can't set the node into more than one"
5511 " state at the same time",
5514 # Boolean value that tells us whether we might be demoting from MC
5515 self.might_demote = (self.op.master_candidate == False or
5516 self.op.offline == True or
5517 self.op.drained == True or
5518 self.op.master_capable == False)
5520 if self.op.secondary_ip:
5521 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5522 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5523 " address" % self.op.secondary_ip,
5526 self.lock_all = self.op.auto_promote and self.might_demote
5527 self.lock_instances = self.op.secondary_ip is not None
5529 def _InstanceFilter(self, instance):
5530 """Filter for getting affected instances.
5533 return (instance.disk_template in constants.DTS_INT_MIRROR and
5534 self.op.node_name in instance.all_nodes)
5536 def ExpandNames(self):
5538 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5540 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5542 # Since modifying a node can have severe effects on currently running
5543 # operations the resource lock is at least acquired in shared mode
5544 self.needed_locks[locking.LEVEL_NODE_RES] = \
5545 self.needed_locks[locking.LEVEL_NODE]
5547 # Get node resource and instance locks in shared mode; they are not used
5548 # for anything but read-only access
5549 self.share_locks[locking.LEVEL_NODE_RES] = 1
5550 self.share_locks[locking.LEVEL_INSTANCE] = 1
5552 if self.lock_instances:
5553 self.needed_locks[locking.LEVEL_INSTANCE] = \
5554 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5556 def BuildHooksEnv(self):
5559 This runs on the master node.
5563 "OP_TARGET": self.op.node_name,
5564 "MASTER_CANDIDATE": str(self.op.master_candidate),
5565 "OFFLINE": str(self.op.offline),
5566 "DRAINED": str(self.op.drained),
5567 "MASTER_CAPABLE": str(self.op.master_capable),
5568 "VM_CAPABLE": str(self.op.vm_capable),
5571 def BuildHooksNodes(self):
5572 """Build hooks nodes.
5575 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5578 def CheckPrereq(self):
5579 """Check prerequisites.
5581 This only checks the instance list against the existing names.
5584 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5586 if self.lock_instances:
5587 affected_instances = \
5588 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5590 # Verify instance locks
5591 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5592 wanted_instances = frozenset(affected_instances.keys())
5593 if wanted_instances - owned_instances:
5594 raise errors.OpPrereqError("Instances affected by changing node %s's"
5595 " secondary IP address have changed since"
5596 " locks were acquired, wanted '%s', have"
5597 " '%s'; retry the operation" %
5599 utils.CommaJoin(wanted_instances),
5600 utils.CommaJoin(owned_instances)),
5603 affected_instances = None
5605 if (self.op.master_candidate is not None or
5606 self.op.drained is not None or
5607 self.op.offline is not None):
5608 # we can't change the master's node flags
5609 if self.op.node_name == self.cfg.GetMasterNode():
5610 raise errors.OpPrereqError("The master role can be changed"
5611 " only via master-failover",
5614 if self.op.master_candidate and not node.master_capable:
5615 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5616 " it a master candidate" % node.name,
5619 if self.op.vm_capable == False:
5620 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5622 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5623 " the vm_capable flag" % node.name,
5626 if node.master_candidate and self.might_demote and not self.lock_all:
5627 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5628 # check if after removing the current node, we're missing master
5630 (mc_remaining, mc_should, _) = \
5631 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5632 if mc_remaining < mc_should:
5633 raise errors.OpPrereqError("Not enough master candidates, please"
5634 " pass auto promote option to allow"
5635 " promotion", errors.ECODE_STATE)
5637 self.old_flags = old_flags = (node.master_candidate,
5638 node.drained, node.offline)
5639 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5640 self.old_role = old_role = self._F2R[old_flags]
5642 # Check for ineffective changes
5643 for attr in self._FLAGS:
5644 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5645 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5646 setattr(self.op, attr, None)
5648 # Past this point, any flag change to False means a transition
5649 # away from the respective state, as only real changes are kept
5651 # TODO: We might query the real power state if it supports OOB
5652 if _SupportsOob(self.cfg, node):
5653 if self.op.offline is False and not (node.powered or
5654 self.op.powered == True):
5655 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5656 " offline status can be reset") %
5658 elif self.op.powered is not None:
5659 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5660 " as it does not support out-of-band"
5661 " handling") % self.op.node_name)
5663 # If we're being deofflined/drained, we'll MC ourself if needed
5664 if (self.op.drained == False or self.op.offline == False or
5665 (self.op.master_capable and not node.master_capable)):
5666 if _DecideSelfPromotion(self):
5667 self.op.master_candidate = True
5668 self.LogInfo("Auto-promoting node to master candidate")
5670 # If we're no longer master capable, we'll demote ourselves from MC
5671 if self.op.master_capable == False and node.master_candidate:
5672 self.LogInfo("Demoting from master candidate")
5673 self.op.master_candidate = False
5676 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5677 if self.op.master_candidate:
5678 new_role = self._ROLE_CANDIDATE
5679 elif self.op.drained:
5680 new_role = self._ROLE_DRAINED
5681 elif self.op.offline:
5682 new_role = self._ROLE_OFFLINE
5683 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5684 # False is still in new flags, which means we're un-setting (the
5686 new_role = self._ROLE_REGULAR
5687 else: # no new flags, nothing, keep old role
5690 self.new_role = new_role
5692 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5693 # Trying to transition out of offline status
5694 # TODO: Use standard RPC runner, but make sure it works when the node is
5695 # still marked offline
5696 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5698 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5699 " to report its version: %s" %
5700 (node.name, result.fail_msg),
5703 self.LogWarning("Transitioning node from offline to online state"
5704 " without using re-add. Please make sure the node"
5707 if self.op.secondary_ip:
5708 # Ok even without locking, because this can't be changed by any LU
5709 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5710 master_singlehomed = master.secondary_ip == master.primary_ip
5711 if master_singlehomed and self.op.secondary_ip:
5712 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5713 " homed cluster", errors.ECODE_INVAL)
5715 assert not (frozenset(affected_instances) -
5716 self.owned_locks(locking.LEVEL_INSTANCE))
5719 if affected_instances:
5720 raise errors.OpPrereqError("Cannot change secondary IP address:"
5721 " offline node has instances (%s)"
5722 " configured to use it" %
5723 utils.CommaJoin(affected_instances.keys()))
5725 # On online nodes, check that no instances are running, and that
5726 # the node has the new ip and we can reach it.
5727 for instance in affected_instances.values():
5728 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5729 msg="cannot change secondary ip")
5731 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5732 if master.name != node.name:
5733 # check reachability from master secondary ip to new secondary ip
5734 if not netutils.TcpPing(self.op.secondary_ip,
5735 constants.DEFAULT_NODED_PORT,
5736 source=master.secondary_ip):
5737 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5738 " based ping to node daemon port",
5739 errors.ECODE_ENVIRON)
5741 if self.op.ndparams:
5742 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5743 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5744 self.new_ndparams = new_ndparams
5746 if self.op.hv_state:
5747 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5748 self.node.hv_state_static)
5750 if self.op.disk_state:
5751 self.new_disk_state = \
5752 _MergeAndVerifyDiskState(self.op.disk_state,
5753 self.node.disk_state_static)
5755 def Exec(self, feedback_fn):
5760 old_role = self.old_role
5761 new_role = self.new_role
5765 if self.op.ndparams:
5766 node.ndparams = self.new_ndparams
5768 if self.op.powered is not None:
5769 node.powered = self.op.powered
5771 if self.op.hv_state:
5772 node.hv_state_static = self.new_hv_state
5774 if self.op.disk_state:
5775 node.disk_state_static = self.new_disk_state
5777 for attr in ["master_capable", "vm_capable"]:
5778 val = getattr(self.op, attr)
5780 setattr(node, attr, val)
5781 result.append((attr, str(val)))
5783 if new_role != old_role:
5784 # Tell the node to demote itself, if no longer MC and not offline
5785 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5786 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5788 self.LogWarning("Node failed to demote itself: %s", msg)
5790 new_flags = self._R2F[new_role]
5791 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5793 result.append((desc, str(nf)))
5794 (node.master_candidate, node.drained, node.offline) = new_flags
5796 # we locked all nodes, we adjust the CP before updating this node
5798 _AdjustCandidatePool(self, [node.name])
5800 if self.op.secondary_ip:
5801 node.secondary_ip = self.op.secondary_ip
5802 result.append(("secondary_ip", self.op.secondary_ip))
5804 # this will trigger configuration file update, if needed
5805 self.cfg.Update(node, feedback_fn)
5807 # this will trigger job queue propagation or cleanup if the mc
5809 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5810 self.context.ReaddNode(node)
5815 class LUNodePowercycle(NoHooksLU):
5816 """Powercycles a node.
5821 def CheckArguments(self):
5822 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5823 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5824 raise errors.OpPrereqError("The node is the master and the force"
5825 " parameter was not set",
5828 def ExpandNames(self):
5829 """Locking for PowercycleNode.
5831 This is a last-resort option and shouldn't block on other
5832 jobs. Therefore, we grab no locks.
5835 self.needed_locks = {}
5837 def Exec(self, feedback_fn):
5841 result = self.rpc.call_node_powercycle(self.op.node_name,
5842 self.cfg.GetHypervisorType())
5843 result.Raise("Failed to schedule the reboot")
5844 return result.payload
5847 class LUClusterQuery(NoHooksLU):
5848 """Query cluster configuration.
5853 def ExpandNames(self):
5854 self.needed_locks = {}
5856 def Exec(self, feedback_fn):
5857 """Return cluster config.
5860 cluster = self.cfg.GetClusterInfo()
5863 # Filter just for enabled hypervisors
5864 for os_name, hv_dict in cluster.os_hvp.items():
5865 os_hvp[os_name] = {}
5866 for hv_name, hv_params in hv_dict.items():
5867 if hv_name in cluster.enabled_hypervisors:
5868 os_hvp[os_name][hv_name] = hv_params
5870 # Convert ip_family to ip_version
5871 primary_ip_version = constants.IP4_VERSION
5872 if cluster.primary_ip_family == netutils.IP6Address.family:
5873 primary_ip_version = constants.IP6_VERSION
5876 "software_version": constants.RELEASE_VERSION,
5877 "protocol_version": constants.PROTOCOL_VERSION,
5878 "config_version": constants.CONFIG_VERSION,
5879 "os_api_version": max(constants.OS_API_VERSIONS),
5880 "export_version": constants.EXPORT_VERSION,
5881 "architecture": (platform.architecture()[0], platform.machine()),
5882 "name": cluster.cluster_name,
5883 "master": cluster.master_node,
5884 "default_hypervisor": cluster.primary_hypervisor,
5885 "enabled_hypervisors": cluster.enabled_hypervisors,
5886 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5887 for hypervisor_name in cluster.enabled_hypervisors]),
5889 "beparams": cluster.beparams,
5890 "osparams": cluster.osparams,
5891 "ipolicy": cluster.ipolicy,
5892 "nicparams": cluster.nicparams,
5893 "ndparams": cluster.ndparams,
5894 "candidate_pool_size": cluster.candidate_pool_size,
5895 "master_netdev": cluster.master_netdev,
5896 "master_netmask": cluster.master_netmask,
5897 "use_external_mip_script": cluster.use_external_mip_script,
5898 "volume_group_name": cluster.volume_group_name,
5899 "drbd_usermode_helper": cluster.drbd_usermode_helper,
5900 "file_storage_dir": cluster.file_storage_dir,
5901 "shared_file_storage_dir": cluster.shared_file_storage_dir,
5902 "maintain_node_health": cluster.maintain_node_health,
5903 "ctime": cluster.ctime,
5904 "mtime": cluster.mtime,
5905 "uuid": cluster.uuid,
5906 "tags": list(cluster.GetTags()),
5907 "uid_pool": cluster.uid_pool,
5908 "default_iallocator": cluster.default_iallocator,
5909 "reserved_lvs": cluster.reserved_lvs,
5910 "primary_ip_version": primary_ip_version,
5911 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5912 "hidden_os": cluster.hidden_os,
5913 "blacklisted_os": cluster.blacklisted_os,
5919 class LUClusterConfigQuery(NoHooksLU):
5920 """Return configuration values.
5924 _FIELDS_DYNAMIC = utils.FieldSet()
5925 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5926 "watcher_pause", "volume_group_name")
5928 def CheckArguments(self):
5929 _CheckOutputFields(static=self._FIELDS_STATIC,
5930 dynamic=self._FIELDS_DYNAMIC,
5931 selected=self.op.output_fields)
5933 def ExpandNames(self):
5934 self.needed_locks = {}
5936 def Exec(self, feedback_fn):
5937 """Dump a representation of the cluster config to the standard output.
5941 for field in self.op.output_fields:
5942 if field == "cluster_name":
5943 entry = self.cfg.GetClusterName()
5944 elif field == "master_node":
5945 entry = self.cfg.GetMasterNode()
5946 elif field == "drain_flag":
5947 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5948 elif field == "watcher_pause":
5949 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5950 elif field == "volume_group_name":
5951 entry = self.cfg.GetVGName()
5953 raise errors.ParameterError(field)
5954 values.append(entry)
5958 class LUInstanceActivateDisks(NoHooksLU):
5959 """Bring up an instance's disks.
5964 def ExpandNames(self):
5965 self._ExpandAndLockInstance()
5966 self.needed_locks[locking.LEVEL_NODE] = []
5967 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5969 def DeclareLocks(self, level):
5970 if level == locking.LEVEL_NODE:
5971 self._LockInstancesNodes()
5973 def CheckPrereq(self):
5974 """Check prerequisites.
5976 This checks that the instance is in the cluster.
5979 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5980 assert self.instance is not None, \
5981 "Cannot retrieve locked instance %s" % self.op.instance_name
5982 _CheckNodeOnline(self, self.instance.primary_node)
5984 def Exec(self, feedback_fn):
5985 """Activate the disks.
5988 disks_ok, disks_info = \
5989 _AssembleInstanceDisks(self, self.instance,
5990 ignore_size=self.op.ignore_size)
5992 raise errors.OpExecError("Cannot activate block devices")
5997 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5999 """Prepare the block devices for an instance.
6001 This sets up the block devices on all nodes.
6003 @type lu: L{LogicalUnit}
6004 @param lu: the logical unit on whose behalf we execute
6005 @type instance: L{objects.Instance}
6006 @param instance: the instance for whose disks we assemble
6007 @type disks: list of L{objects.Disk} or None
6008 @param disks: which disks to assemble (or all, if None)
6009 @type ignore_secondaries: boolean
6010 @param ignore_secondaries: if true, errors on secondary nodes
6011 won't result in an error return from the function
6012 @type ignore_size: boolean
6013 @param ignore_size: if true, the current known size of the disk
6014 will not be used during the disk activation, useful for cases
6015 when the size is wrong
6016 @return: False if the operation failed, otherwise a list of
6017 (host, instance_visible_name, node_visible_name)
6018 with the mapping from node devices to instance devices
6023 iname = instance.name
6024 disks = _ExpandCheckDisks(instance, disks)
6026 # With the two passes mechanism we try to reduce the window of
6027 # opportunity for the race condition of switching DRBD to primary
6028 # before handshaking occured, but we do not eliminate it
6030 # The proper fix would be to wait (with some limits) until the
6031 # connection has been made and drbd transitions from WFConnection
6032 # into any other network-connected state (Connected, SyncTarget,
6035 # 1st pass, assemble on all nodes in secondary mode
6036 for idx, inst_disk in enumerate(disks):
6037 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6039 node_disk = node_disk.Copy()
6040 node_disk.UnsetSize()
6041 lu.cfg.SetDiskID(node_disk, node)
6042 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6043 msg = result.fail_msg
6045 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6046 " (is_primary=False, pass=1): %s",
6047 inst_disk.iv_name, node, msg)
6048 if not ignore_secondaries:
6051 # FIXME: race condition on drbd migration to primary
6053 # 2nd pass, do only the primary node
6054 for idx, inst_disk in enumerate(disks):
6057 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6058 if node != instance.primary_node:
6061 node_disk = node_disk.Copy()
6062 node_disk.UnsetSize()
6063 lu.cfg.SetDiskID(node_disk, node)
6064 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6065 msg = result.fail_msg
6067 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6068 " (is_primary=True, pass=2): %s",
6069 inst_disk.iv_name, node, msg)
6072 dev_path = result.payload
6074 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6076 # leave the disks configured for the primary node
6077 # this is a workaround that would be fixed better by
6078 # improving the logical/physical id handling
6080 lu.cfg.SetDiskID(disk, instance.primary_node)
6082 return disks_ok, device_info
6085 def _StartInstanceDisks(lu, instance, force):
6086 """Start the disks of an instance.
6089 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6090 ignore_secondaries=force)
6092 _ShutdownInstanceDisks(lu, instance)
6093 if force is not None and not force:
6094 lu.proc.LogWarning("", hint="If the message above refers to a"
6096 " you can retry the operation using '--force'.")
6097 raise errors.OpExecError("Disk consistency error")
6100 class LUInstanceDeactivateDisks(NoHooksLU):
6101 """Shutdown an instance's disks.
6106 def ExpandNames(self):
6107 self._ExpandAndLockInstance()
6108 self.needed_locks[locking.LEVEL_NODE] = []
6109 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6111 def DeclareLocks(self, level):
6112 if level == locking.LEVEL_NODE:
6113 self._LockInstancesNodes()
6115 def CheckPrereq(self):
6116 """Check prerequisites.
6118 This checks that the instance is in the cluster.
6121 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6122 assert self.instance is not None, \
6123 "Cannot retrieve locked instance %s" % self.op.instance_name
6125 def Exec(self, feedback_fn):
6126 """Deactivate the disks
6129 instance = self.instance
6131 _ShutdownInstanceDisks(self, instance)
6133 _SafeShutdownInstanceDisks(self, instance)
6136 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6137 """Shutdown block devices of an instance.
6139 This function checks if an instance is running, before calling
6140 _ShutdownInstanceDisks.
6143 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6144 _ShutdownInstanceDisks(lu, instance, disks=disks)
6147 def _ExpandCheckDisks(instance, disks):
6148 """Return the instance disks selected by the disks list
6150 @type disks: list of L{objects.Disk} or None
6151 @param disks: selected disks
6152 @rtype: list of L{objects.Disk}
6153 @return: selected instance disks to act on
6157 return instance.disks
6159 if not set(disks).issubset(instance.disks):
6160 raise errors.ProgrammerError("Can only act on disks belonging to the"
6165 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6166 """Shutdown block devices of an instance.
6168 This does the shutdown on all nodes of the instance.
6170 If the ignore_primary is false, errors on the primary node are
6175 disks = _ExpandCheckDisks(instance, disks)
6178 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6179 lu.cfg.SetDiskID(top_disk, node)
6180 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6181 msg = result.fail_msg
6183 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6184 disk.iv_name, node, msg)
6185 if ((node == instance.primary_node and not ignore_primary) or
6186 (node != instance.primary_node and not result.offline)):
6191 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6192 """Checks if a node has enough free memory.
6194 This function check if a given node has the needed amount of free
6195 memory. In case the node has less memory or we cannot get the
6196 information from the node, this function raise an OpPrereqError
6199 @type lu: C{LogicalUnit}
6200 @param lu: a logical unit from which we get configuration data
6202 @param node: the node to check
6203 @type reason: C{str}
6204 @param reason: string to use in the error message
6205 @type requested: C{int}
6206 @param requested: the amount of memory in MiB to check for
6207 @type hypervisor_name: C{str}
6208 @param hypervisor_name: the hypervisor to ask for memory stats
6209 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6210 we cannot check the node
6213 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6214 nodeinfo[node].Raise("Can't get data from node %s" % node,
6215 prereq=True, ecode=errors.ECODE_ENVIRON)
6216 (_, _, (hv_info, )) = nodeinfo[node].payload
6218 free_mem = hv_info.get("memory_free", None)
6219 if not isinstance(free_mem, int):
6220 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6221 " was '%s'" % (node, free_mem),
6222 errors.ECODE_ENVIRON)
6223 if requested > free_mem:
6224 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6225 " needed %s MiB, available %s MiB" %
6226 (node, reason, requested, free_mem),
6230 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6231 """Checks if nodes have enough free disk space in the all VGs.
6233 This function check if all given nodes have the needed amount of
6234 free disk. In case any node has less disk or we cannot get the
6235 information from the node, this function raise an OpPrereqError
6238 @type lu: C{LogicalUnit}
6239 @param lu: a logical unit from which we get configuration data
6240 @type nodenames: C{list}
6241 @param nodenames: the list of node names to check
6242 @type req_sizes: C{dict}
6243 @param req_sizes: the hash of vg and corresponding amount of disk in
6245 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6246 or we cannot check the node
6249 for vg, req_size in req_sizes.items():
6250 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6253 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6254 """Checks if nodes have enough free disk space in the specified VG.
6256 This function check if all given nodes have the needed amount of
6257 free disk. In case any node has less disk or we cannot get the
6258 information from the node, this function raise an OpPrereqError
6261 @type lu: C{LogicalUnit}
6262 @param lu: a logical unit from which we get configuration data
6263 @type nodenames: C{list}
6264 @param nodenames: the list of node names to check
6266 @param vg: the volume group to check
6267 @type requested: C{int}
6268 @param requested: the amount of disk in MiB to check for
6269 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6270 or we cannot check the node
6273 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6274 for node in nodenames:
6275 info = nodeinfo[node]
6276 info.Raise("Cannot get current information from node %s" % node,
6277 prereq=True, ecode=errors.ECODE_ENVIRON)
6278 (_, (vg_info, ), _) = info.payload
6279 vg_free = vg_info.get("vg_free", None)
6280 if not isinstance(vg_free, int):
6281 raise errors.OpPrereqError("Can't compute free disk space on node"
6282 " %s for vg %s, result was '%s'" %
6283 (node, vg, vg_free), errors.ECODE_ENVIRON)
6284 if requested > vg_free:
6285 raise errors.OpPrereqError("Not enough disk space on target node %s"
6286 " vg %s: required %d MiB, available %d MiB" %
6287 (node, vg, requested, vg_free),
6291 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6292 """Checks if nodes have enough physical CPUs
6294 This function checks if all given nodes have the needed number of
6295 physical CPUs. In case any node has less CPUs or we cannot get the
6296 information from the node, this function raises an OpPrereqError
6299 @type lu: C{LogicalUnit}
6300 @param lu: a logical unit from which we get configuration data
6301 @type nodenames: C{list}
6302 @param nodenames: the list of node names to check
6303 @type requested: C{int}
6304 @param requested: the minimum acceptable number of physical CPUs
6305 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6306 or we cannot check the node
6309 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6310 for node in nodenames:
6311 info = nodeinfo[node]
6312 info.Raise("Cannot get current information from node %s" % node,
6313 prereq=True, ecode=errors.ECODE_ENVIRON)
6314 (_, _, (hv_info, )) = info.payload
6315 num_cpus = hv_info.get("cpu_total", None)
6316 if not isinstance(num_cpus, int):
6317 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6318 " on node %s, result was '%s'" %
6319 (node, num_cpus), errors.ECODE_ENVIRON)
6320 if requested > num_cpus:
6321 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6322 "required" % (node, num_cpus, requested),
6326 class LUInstanceStartup(LogicalUnit):
6327 """Starts an instance.
6330 HPATH = "instance-start"
6331 HTYPE = constants.HTYPE_INSTANCE
6334 def CheckArguments(self):
6336 if self.op.beparams:
6337 # fill the beparams dict
6338 objects.UpgradeBeParams(self.op.beparams)
6339 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6341 def ExpandNames(self):
6342 self._ExpandAndLockInstance()
6344 def BuildHooksEnv(self):
6347 This runs on master, primary and secondary nodes of the instance.
6351 "FORCE": self.op.force,
6354 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6358 def BuildHooksNodes(self):
6359 """Build hooks nodes.
6362 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6365 def CheckPrereq(self):
6366 """Check prerequisites.
6368 This checks that the instance is in the cluster.
6371 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6372 assert self.instance is not None, \
6373 "Cannot retrieve locked instance %s" % self.op.instance_name
6376 if self.op.hvparams:
6377 # check hypervisor parameter syntax (locally)
6378 cluster = self.cfg.GetClusterInfo()
6379 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6380 filled_hvp = cluster.FillHV(instance)
6381 filled_hvp.update(self.op.hvparams)
6382 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6383 hv_type.CheckParameterSyntax(filled_hvp)
6384 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6386 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6388 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6390 if self.primary_offline and self.op.ignore_offline_nodes:
6391 self.proc.LogWarning("Ignoring offline primary node")
6393 if self.op.hvparams or self.op.beparams:
6394 self.proc.LogWarning("Overridden parameters are ignored")
6396 _CheckNodeOnline(self, instance.primary_node)
6398 bep = self.cfg.GetClusterInfo().FillBE(instance)
6400 # check bridges existence
6401 _CheckInstanceBridgesExist(self, instance)
6403 remote_info = self.rpc.call_instance_info(instance.primary_node,
6405 instance.hypervisor)
6406 remote_info.Raise("Error checking node %s" % instance.primary_node,
6407 prereq=True, ecode=errors.ECODE_ENVIRON)
6408 if not remote_info.payload: # not running already
6409 _CheckNodeFreeMemory(self, instance.primary_node,
6410 "starting instance %s" % instance.name,
6411 bep[constants.BE_MAXMEM], instance.hypervisor)
6413 def Exec(self, feedback_fn):
6414 """Start the instance.
6417 instance = self.instance
6418 force = self.op.force
6420 if not self.op.no_remember:
6421 self.cfg.MarkInstanceUp(instance.name)
6423 if self.primary_offline:
6424 assert self.op.ignore_offline_nodes
6425 self.proc.LogInfo("Primary node offline, marked instance as started")
6427 node_current = instance.primary_node
6429 _StartInstanceDisks(self, instance, force)
6432 self.rpc.call_instance_start(node_current,
6433 (instance, self.op.hvparams,
6435 self.op.startup_paused)
6436 msg = result.fail_msg
6438 _ShutdownInstanceDisks(self, instance)
6439 raise errors.OpExecError("Could not start instance: %s" % msg)
6442 class LUInstanceReboot(LogicalUnit):
6443 """Reboot an instance.
6446 HPATH = "instance-reboot"
6447 HTYPE = constants.HTYPE_INSTANCE
6450 def ExpandNames(self):
6451 self._ExpandAndLockInstance()
6453 def BuildHooksEnv(self):
6456 This runs on master, primary and secondary nodes of the instance.
6460 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6461 "REBOOT_TYPE": self.op.reboot_type,
6462 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6465 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6469 def BuildHooksNodes(self):
6470 """Build hooks nodes.
6473 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6476 def CheckPrereq(self):
6477 """Check prerequisites.
6479 This checks that the instance is in the cluster.
6482 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6483 assert self.instance is not None, \
6484 "Cannot retrieve locked instance %s" % self.op.instance_name
6485 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6486 _CheckNodeOnline(self, instance.primary_node)
6488 # check bridges existence
6489 _CheckInstanceBridgesExist(self, instance)
6491 def Exec(self, feedback_fn):
6492 """Reboot the instance.
6495 instance = self.instance
6496 ignore_secondaries = self.op.ignore_secondaries
6497 reboot_type = self.op.reboot_type
6499 remote_info = self.rpc.call_instance_info(instance.primary_node,
6501 instance.hypervisor)
6502 remote_info.Raise("Error checking node %s" % instance.primary_node)
6503 instance_running = bool(remote_info.payload)
6505 node_current = instance.primary_node
6507 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6508 constants.INSTANCE_REBOOT_HARD]:
6509 for disk in instance.disks:
6510 self.cfg.SetDiskID(disk, node_current)
6511 result = self.rpc.call_instance_reboot(node_current, instance,
6513 self.op.shutdown_timeout)
6514 result.Raise("Could not reboot instance")
6516 if instance_running:
6517 result = self.rpc.call_instance_shutdown(node_current, instance,
6518 self.op.shutdown_timeout)
6519 result.Raise("Could not shutdown instance for full reboot")
6520 _ShutdownInstanceDisks(self, instance)
6522 self.LogInfo("Instance %s was already stopped, starting now",
6524 _StartInstanceDisks(self, instance, ignore_secondaries)
6525 result = self.rpc.call_instance_start(node_current,
6526 (instance, None, None), False)
6527 msg = result.fail_msg
6529 _ShutdownInstanceDisks(self, instance)
6530 raise errors.OpExecError("Could not start instance for"
6531 " full reboot: %s" % msg)
6533 self.cfg.MarkInstanceUp(instance.name)
6536 class LUInstanceShutdown(LogicalUnit):
6537 """Shutdown an instance.
6540 HPATH = "instance-stop"
6541 HTYPE = constants.HTYPE_INSTANCE
6544 def ExpandNames(self):
6545 self._ExpandAndLockInstance()
6547 def BuildHooksEnv(self):
6550 This runs on master, primary and secondary nodes of the instance.
6553 env = _BuildInstanceHookEnvByObject(self, self.instance)
6554 env["TIMEOUT"] = self.op.timeout
6557 def BuildHooksNodes(self):
6558 """Build hooks nodes.
6561 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6564 def CheckPrereq(self):
6565 """Check prerequisites.
6567 This checks that the instance is in the cluster.
6570 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6571 assert self.instance is not None, \
6572 "Cannot retrieve locked instance %s" % self.op.instance_name
6574 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6576 self.primary_offline = \
6577 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6579 if self.primary_offline and self.op.ignore_offline_nodes:
6580 self.proc.LogWarning("Ignoring offline primary node")
6582 _CheckNodeOnline(self, self.instance.primary_node)
6584 def Exec(self, feedback_fn):
6585 """Shutdown the instance.
6588 instance = self.instance
6589 node_current = instance.primary_node
6590 timeout = self.op.timeout
6592 if not self.op.no_remember:
6593 self.cfg.MarkInstanceDown(instance.name)
6595 if self.primary_offline:
6596 assert self.op.ignore_offline_nodes
6597 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6599 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6600 msg = result.fail_msg
6602 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6604 _ShutdownInstanceDisks(self, instance)
6607 class LUInstanceReinstall(LogicalUnit):
6608 """Reinstall an instance.
6611 HPATH = "instance-reinstall"
6612 HTYPE = constants.HTYPE_INSTANCE
6615 def ExpandNames(self):
6616 self._ExpandAndLockInstance()
6618 def BuildHooksEnv(self):
6621 This runs on master, primary and secondary nodes of the instance.
6624 return _BuildInstanceHookEnvByObject(self, self.instance)
6626 def BuildHooksNodes(self):
6627 """Build hooks nodes.
6630 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6633 def CheckPrereq(self):
6634 """Check prerequisites.
6636 This checks that the instance is in the cluster and is not running.
6639 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6640 assert instance is not None, \
6641 "Cannot retrieve locked instance %s" % self.op.instance_name
6642 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6643 " offline, cannot reinstall")
6644 for node in instance.secondary_nodes:
6645 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6646 " cannot reinstall")
6648 if instance.disk_template == constants.DT_DISKLESS:
6649 raise errors.OpPrereqError("Instance '%s' has no disks" %
6650 self.op.instance_name,
6652 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6654 if self.op.os_type is not None:
6656 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6657 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6658 instance_os = self.op.os_type
6660 instance_os = instance.os
6662 nodelist = list(instance.all_nodes)
6664 if self.op.osparams:
6665 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6666 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6667 self.os_inst = i_osdict # the new dict (without defaults)
6671 self.instance = instance
6673 def Exec(self, feedback_fn):
6674 """Reinstall the instance.
6677 inst = self.instance
6679 if self.op.os_type is not None:
6680 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6681 inst.os = self.op.os_type
6682 # Write to configuration
6683 self.cfg.Update(inst, feedback_fn)
6685 _StartInstanceDisks(self, inst, None)
6687 feedback_fn("Running the instance OS create scripts...")
6688 # FIXME: pass debug option from opcode to backend
6689 result = self.rpc.call_instance_os_add(inst.primary_node,
6690 (inst, self.os_inst), True,
6691 self.op.debug_level)
6692 result.Raise("Could not install OS for instance %s on node %s" %
6693 (inst.name, inst.primary_node))
6695 _ShutdownInstanceDisks(self, inst)
6698 class LUInstanceRecreateDisks(LogicalUnit):
6699 """Recreate an instance's missing disks.
6702 HPATH = "instance-recreate-disks"
6703 HTYPE = constants.HTYPE_INSTANCE
6706 def CheckArguments(self):
6707 # normalise the disk list
6708 self.op.disks = sorted(frozenset(self.op.disks))
6710 def ExpandNames(self):
6711 self._ExpandAndLockInstance()
6712 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6714 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6715 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6717 self.needed_locks[locking.LEVEL_NODE] = []
6719 def DeclareLocks(self, level):
6720 if level == locking.LEVEL_NODE:
6721 # if we replace the nodes, we only need to lock the old primary,
6722 # otherwise we need to lock all nodes for disk re-creation
6723 primary_only = bool(self.op.nodes)
6724 self._LockInstancesNodes(primary_only=primary_only)
6725 elif level == locking.LEVEL_NODE_RES:
6727 self.needed_locks[locking.LEVEL_NODE_RES] = \
6728 self.needed_locks[locking.LEVEL_NODE][:]
6730 def BuildHooksEnv(self):
6733 This runs on master, primary and secondary nodes of the instance.
6736 return _BuildInstanceHookEnvByObject(self, self.instance)
6738 def BuildHooksNodes(self):
6739 """Build hooks nodes.
6742 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6745 def CheckPrereq(self):
6746 """Check prerequisites.
6748 This checks that the instance is in the cluster and is not running.
6751 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6752 assert instance is not None, \
6753 "Cannot retrieve locked instance %s" % self.op.instance_name
6755 if len(self.op.nodes) != len(instance.all_nodes):
6756 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6757 " %d replacement nodes were specified" %
6758 (instance.name, len(instance.all_nodes),
6759 len(self.op.nodes)),
6761 assert instance.disk_template != constants.DT_DRBD8 or \
6762 len(self.op.nodes) == 2
6763 assert instance.disk_template != constants.DT_PLAIN or \
6764 len(self.op.nodes) == 1
6765 primary_node = self.op.nodes[0]
6767 primary_node = instance.primary_node
6768 _CheckNodeOnline(self, primary_node)
6770 if instance.disk_template == constants.DT_DISKLESS:
6771 raise errors.OpPrereqError("Instance '%s' has no disks" %
6772 self.op.instance_name, errors.ECODE_INVAL)
6773 # if we replace nodes *and* the old primary is offline, we don't
6775 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6776 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6777 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6778 if not (self.op.nodes and old_pnode.offline):
6779 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6780 msg="cannot recreate disks")
6782 if not self.op.disks:
6783 self.op.disks = range(len(instance.disks))
6785 for idx in self.op.disks:
6786 if idx >= len(instance.disks):
6787 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6789 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6790 raise errors.OpPrereqError("Can't recreate disks partially and"
6791 " change the nodes at the same time",
6793 self.instance = instance
6795 def Exec(self, feedback_fn):
6796 """Recreate the disks.
6799 instance = self.instance
6801 assert (self.owned_locks(locking.LEVEL_NODE) ==
6802 self.owned_locks(locking.LEVEL_NODE_RES))
6805 mods = [] # keeps track of needed logical_id changes
6807 for idx, disk in enumerate(instance.disks):
6808 if idx not in self.op.disks: # disk idx has not been passed in
6811 # update secondaries for disks, if needed
6813 if disk.dev_type == constants.LD_DRBD8:
6814 # need to update the nodes and minors
6815 assert len(self.op.nodes) == 2
6816 assert len(disk.logical_id) == 6 # otherwise disk internals
6818 (_, _, old_port, _, _, old_secret) = disk.logical_id
6819 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6820 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6821 new_minors[0], new_minors[1], old_secret)
6822 assert len(disk.logical_id) == len(new_id)
6823 mods.append((idx, new_id))
6825 # now that we have passed all asserts above, we can apply the mods
6826 # in a single run (to avoid partial changes)
6827 for idx, new_id in mods:
6828 instance.disks[idx].logical_id = new_id
6830 # change primary node, if needed
6832 instance.primary_node = self.op.nodes[0]
6833 self.LogWarning("Changing the instance's nodes, you will have to"
6834 " remove any disks left on the older nodes manually")
6837 self.cfg.Update(instance, feedback_fn)
6839 _CreateDisks(self, instance, to_skip=to_skip)
6842 class LUInstanceRename(LogicalUnit):
6843 """Rename an instance.
6846 HPATH = "instance-rename"
6847 HTYPE = constants.HTYPE_INSTANCE
6849 def CheckArguments(self):
6853 if self.op.ip_check and not self.op.name_check:
6854 # TODO: make the ip check more flexible and not depend on the name check
6855 raise errors.OpPrereqError("IP address check requires a name check",
6858 def BuildHooksEnv(self):
6861 This runs on master, primary and secondary nodes of the instance.
6864 env = _BuildInstanceHookEnvByObject(self, self.instance)
6865 env["INSTANCE_NEW_NAME"] = self.op.new_name
6868 def BuildHooksNodes(self):
6869 """Build hooks nodes.
6872 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6875 def CheckPrereq(self):
6876 """Check prerequisites.
6878 This checks that the instance is in the cluster and is not running.
6881 self.op.instance_name = _ExpandInstanceName(self.cfg,
6882 self.op.instance_name)
6883 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884 assert instance is not None
6885 _CheckNodeOnline(self, instance.primary_node)
6886 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6887 msg="cannot rename")
6888 self.instance = instance
6890 new_name = self.op.new_name
6891 if self.op.name_check:
6892 hostname = netutils.GetHostname(name=new_name)
6893 if hostname.name != new_name:
6894 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6896 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6897 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6898 " same as given hostname '%s'") %
6899 (hostname.name, self.op.new_name),
6901 new_name = self.op.new_name = hostname.name
6902 if (self.op.ip_check and
6903 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6904 raise errors.OpPrereqError("IP %s of instance %s already in use" %
6905 (hostname.ip, new_name),
6906 errors.ECODE_NOTUNIQUE)
6908 instance_list = self.cfg.GetInstanceList()
6909 if new_name in instance_list and new_name != instance.name:
6910 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6911 new_name, errors.ECODE_EXISTS)
6913 def Exec(self, feedback_fn):
6914 """Rename the instance.
6917 inst = self.instance
6918 old_name = inst.name
6920 rename_file_storage = False
6921 if (inst.disk_template in constants.DTS_FILEBASED and
6922 self.op.new_name != inst.name):
6923 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6924 rename_file_storage = True
6926 self.cfg.RenameInstance(inst.name, self.op.new_name)
6927 # Change the instance lock. This is definitely safe while we hold the BGL.
6928 # Otherwise the new lock would have to be added in acquired mode.
6930 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6931 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6933 # re-read the instance from the configuration after rename
6934 inst = self.cfg.GetInstanceInfo(self.op.new_name)
6936 if rename_file_storage:
6937 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6938 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6939 old_file_storage_dir,
6940 new_file_storage_dir)
6941 result.Raise("Could not rename on node %s directory '%s' to '%s'"
6942 " (but the instance has been renamed in Ganeti)" %
6943 (inst.primary_node, old_file_storage_dir,
6944 new_file_storage_dir))
6946 _StartInstanceDisks(self, inst, None)
6948 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6949 old_name, self.op.debug_level)
6950 msg = result.fail_msg
6952 msg = ("Could not run OS rename script for instance %s on node %s"
6953 " (but the instance has been renamed in Ganeti): %s" %
6954 (inst.name, inst.primary_node, msg))
6955 self.proc.LogWarning(msg)
6957 _ShutdownInstanceDisks(self, inst)
6962 class LUInstanceRemove(LogicalUnit):
6963 """Remove an instance.
6966 HPATH = "instance-remove"
6967 HTYPE = constants.HTYPE_INSTANCE
6970 def ExpandNames(self):
6971 self._ExpandAndLockInstance()
6972 self.needed_locks[locking.LEVEL_NODE] = []
6973 self.needed_locks[locking.LEVEL_NODE_RES] = []
6974 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6976 def DeclareLocks(self, level):
6977 if level == locking.LEVEL_NODE:
6978 self._LockInstancesNodes()
6979 elif level == locking.LEVEL_NODE_RES:
6981 self.needed_locks[locking.LEVEL_NODE_RES] = \
6982 self.needed_locks[locking.LEVEL_NODE][:]
6984 def BuildHooksEnv(self):
6987 This runs on master, primary and secondary nodes of the instance.
6990 env = _BuildInstanceHookEnvByObject(self, self.instance)
6991 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6994 def BuildHooksNodes(self):
6995 """Build hooks nodes.
6998 nl = [self.cfg.GetMasterNode()]
6999 nl_post = list(self.instance.all_nodes) + nl
7000 return (nl, nl_post)
7002 def CheckPrereq(self):
7003 """Check prerequisites.
7005 This checks that the instance is in the cluster.
7008 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7009 assert self.instance is not None, \
7010 "Cannot retrieve locked instance %s" % self.op.instance_name
7012 def Exec(self, feedback_fn):
7013 """Remove the instance.
7016 instance = self.instance
7017 logging.info("Shutting down instance %s on node %s",
7018 instance.name, instance.primary_node)
7020 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7021 self.op.shutdown_timeout)
7022 msg = result.fail_msg
7024 if self.op.ignore_failures:
7025 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7027 raise errors.OpExecError("Could not shutdown instance %s on"
7029 (instance.name, instance.primary_node, msg))
7031 assert (self.owned_locks(locking.LEVEL_NODE) ==
7032 self.owned_locks(locking.LEVEL_NODE_RES))
7033 assert not (set(instance.all_nodes) -
7034 self.owned_locks(locking.LEVEL_NODE)), \
7035 "Not owning correct locks"
7037 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7040 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7041 """Utility function to remove an instance.
7044 logging.info("Removing block devices for instance %s", instance.name)
7046 if not _RemoveDisks(lu, instance):
7047 if not ignore_failures:
7048 raise errors.OpExecError("Can't remove instance's disks")
7049 feedback_fn("Warning: can't remove instance's disks")
7051 logging.info("Removing instance %s out of cluster config", instance.name)
7053 lu.cfg.RemoveInstance(instance.name)
7055 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7056 "Instance lock removal conflict"
7058 # Remove lock for the instance
7059 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7062 class LUInstanceQuery(NoHooksLU):
7063 """Logical unit for querying instances.
7066 # pylint: disable=W0142
7069 def CheckArguments(self):
7070 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7071 self.op.output_fields, self.op.use_locking)
7073 def ExpandNames(self):
7074 self.iq.ExpandNames(self)
7076 def DeclareLocks(self, level):
7077 self.iq.DeclareLocks(self, level)
7079 def Exec(self, feedback_fn):
7080 return self.iq.OldStyleQuery(self)
7083 class LUInstanceFailover(LogicalUnit):
7084 """Failover an instance.
7087 HPATH = "instance-failover"
7088 HTYPE = constants.HTYPE_INSTANCE
7091 def CheckArguments(self):
7092 """Check the arguments.
7095 self.iallocator = getattr(self.op, "iallocator", None)
7096 self.target_node = getattr(self.op, "target_node", None)
7098 def ExpandNames(self):
7099 self._ExpandAndLockInstance()
7101 if self.op.target_node is not None:
7102 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7104 self.needed_locks[locking.LEVEL_NODE] = []
7105 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7107 ignore_consistency = self.op.ignore_consistency
7108 shutdown_timeout = self.op.shutdown_timeout
7109 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7112 ignore_consistency=ignore_consistency,
7113 shutdown_timeout=shutdown_timeout)
7114 self.tasklets = [self._migrater]
7116 def DeclareLocks(self, level):
7117 if level == locking.LEVEL_NODE:
7118 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7119 if instance.disk_template in constants.DTS_EXT_MIRROR:
7120 if self.op.target_node is None:
7121 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7123 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7124 self.op.target_node]
7125 del self.recalculate_locks[locking.LEVEL_NODE]
7127 self._LockInstancesNodes()
7129 def BuildHooksEnv(self):
7132 This runs on master, primary and secondary nodes of the instance.
7135 instance = self._migrater.instance
7136 source_node = instance.primary_node
7137 target_node = self.op.target_node
7139 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7140 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7141 "OLD_PRIMARY": source_node,
7142 "NEW_PRIMARY": target_node,
7145 if instance.disk_template in constants.DTS_INT_MIRROR:
7146 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7147 env["NEW_SECONDARY"] = source_node
7149 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7151 env.update(_BuildInstanceHookEnvByObject(self, instance))
7155 def BuildHooksNodes(self):
7156 """Build hooks nodes.
7159 instance = self._migrater.instance
7160 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7161 return (nl, nl + [instance.primary_node])
7164 class LUInstanceMigrate(LogicalUnit):
7165 """Migrate an instance.
7167 This is migration without shutting down, compared to the failover,
7168 which is done with shutdown.
7171 HPATH = "instance-migrate"
7172 HTYPE = constants.HTYPE_INSTANCE
7175 def ExpandNames(self):
7176 self._ExpandAndLockInstance()
7178 if self.op.target_node is not None:
7179 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7181 self.needed_locks[locking.LEVEL_NODE] = []
7182 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7184 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7185 cleanup=self.op.cleanup,
7187 fallback=self.op.allow_failover)
7188 self.tasklets = [self._migrater]
7190 def DeclareLocks(self, level):
7191 if level == locking.LEVEL_NODE:
7192 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7193 if instance.disk_template in constants.DTS_EXT_MIRROR:
7194 if self.op.target_node is None:
7195 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7197 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7198 self.op.target_node]
7199 del self.recalculate_locks[locking.LEVEL_NODE]
7201 self._LockInstancesNodes()
7203 def BuildHooksEnv(self):
7206 This runs on master, primary and secondary nodes of the instance.
7209 instance = self._migrater.instance
7210 source_node = instance.primary_node
7211 target_node = self.op.target_node
7212 env = _BuildInstanceHookEnvByObject(self, instance)
7214 "MIGRATE_LIVE": self._migrater.live,
7215 "MIGRATE_CLEANUP": self.op.cleanup,
7216 "OLD_PRIMARY": source_node,
7217 "NEW_PRIMARY": target_node,
7220 if instance.disk_template in constants.DTS_INT_MIRROR:
7221 env["OLD_SECONDARY"] = target_node
7222 env["NEW_SECONDARY"] = source_node
7224 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7228 def BuildHooksNodes(self):
7229 """Build hooks nodes.
7232 instance = self._migrater.instance
7233 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7234 return (nl, nl + [instance.primary_node])
7237 class LUInstanceMove(LogicalUnit):
7238 """Move an instance by data-copying.
7241 HPATH = "instance-move"
7242 HTYPE = constants.HTYPE_INSTANCE
7245 def ExpandNames(self):
7246 self._ExpandAndLockInstance()
7247 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7248 self.op.target_node = target_node
7249 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7250 self.needed_locks[locking.LEVEL_NODE_RES] = []
7251 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7253 def DeclareLocks(self, level):
7254 if level == locking.LEVEL_NODE:
7255 self._LockInstancesNodes(primary_only=True)
7256 elif level == locking.LEVEL_NODE_RES:
7258 self.needed_locks[locking.LEVEL_NODE_RES] = \
7259 self.needed_locks[locking.LEVEL_NODE][:]
7261 def BuildHooksEnv(self):
7264 This runs on master, primary and secondary nodes of the instance.
7268 "TARGET_NODE": self.op.target_node,
7269 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7271 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7274 def BuildHooksNodes(self):
7275 """Build hooks nodes.
7279 self.cfg.GetMasterNode(),
7280 self.instance.primary_node,
7281 self.op.target_node,
7285 def CheckPrereq(self):
7286 """Check prerequisites.
7288 This checks that the instance is in the cluster.
7291 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7292 assert self.instance is not None, \
7293 "Cannot retrieve locked instance %s" % self.op.instance_name
7295 node = self.cfg.GetNodeInfo(self.op.target_node)
7296 assert node is not None, \
7297 "Cannot retrieve locked node %s" % self.op.target_node
7299 self.target_node = target_node = node.name
7301 if target_node == instance.primary_node:
7302 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7303 (instance.name, target_node),
7306 bep = self.cfg.GetClusterInfo().FillBE(instance)
7308 for idx, dsk in enumerate(instance.disks):
7309 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7310 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7311 " cannot copy" % idx, errors.ECODE_STATE)
7313 _CheckNodeOnline(self, target_node)
7314 _CheckNodeNotDrained(self, target_node)
7315 _CheckNodeVmCapable(self, target_node)
7317 if instance.admin_state == constants.ADMINST_UP:
7318 # check memory requirements on the secondary node
7319 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7320 instance.name, bep[constants.BE_MAXMEM],
7321 instance.hypervisor)
7323 self.LogInfo("Not checking memory on the secondary node as"
7324 " instance will not be started")
7326 # check bridge existance
7327 _CheckInstanceBridgesExist(self, instance, node=target_node)
7329 def Exec(self, feedback_fn):
7330 """Move an instance.
7332 The move is done by shutting it down on its present node, copying
7333 the data over (slow) and starting it on the new node.
7336 instance = self.instance
7338 source_node = instance.primary_node
7339 target_node = self.target_node
7341 self.LogInfo("Shutting down instance %s on source node %s",
7342 instance.name, source_node)
7344 assert (self.owned_locks(locking.LEVEL_NODE) ==
7345 self.owned_locks(locking.LEVEL_NODE_RES))
7347 result = self.rpc.call_instance_shutdown(source_node, instance,
7348 self.op.shutdown_timeout)
7349 msg = result.fail_msg
7351 if self.op.ignore_consistency:
7352 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7353 " Proceeding anyway. Please make sure node"
7354 " %s is down. Error details: %s",
7355 instance.name, source_node, source_node, msg)
7357 raise errors.OpExecError("Could not shutdown instance %s on"
7359 (instance.name, source_node, msg))
7361 # create the target disks
7363 _CreateDisks(self, instance, target_node=target_node)
7364 except errors.OpExecError:
7365 self.LogWarning("Device creation failed, reverting...")
7367 _RemoveDisks(self, instance, target_node=target_node)
7369 self.cfg.ReleaseDRBDMinors(instance.name)
7372 cluster_name = self.cfg.GetClusterInfo().cluster_name
7375 # activate, get path, copy the data over
7376 for idx, disk in enumerate(instance.disks):
7377 self.LogInfo("Copying data for disk %d", idx)
7378 result = self.rpc.call_blockdev_assemble(target_node, disk,
7379 instance.name, True, idx)
7381 self.LogWarning("Can't assemble newly created disk %d: %s",
7382 idx, result.fail_msg)
7383 errs.append(result.fail_msg)
7385 dev_path = result.payload
7386 result = self.rpc.call_blockdev_export(source_node, disk,
7387 target_node, dev_path,
7390 self.LogWarning("Can't copy data over for disk %d: %s",
7391 idx, result.fail_msg)
7392 errs.append(result.fail_msg)
7396 self.LogWarning("Some disks failed to copy, aborting")
7398 _RemoveDisks(self, instance, target_node=target_node)
7400 self.cfg.ReleaseDRBDMinors(instance.name)
7401 raise errors.OpExecError("Errors during disk copy: %s" %
7404 instance.primary_node = target_node
7405 self.cfg.Update(instance, feedback_fn)
7407 self.LogInfo("Removing the disks on the original node")
7408 _RemoveDisks(self, instance, target_node=source_node)
7410 # Only start the instance if it's marked as up
7411 if instance.admin_state == constants.ADMINST_UP:
7412 self.LogInfo("Starting instance %s on node %s",
7413 instance.name, target_node)
7415 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7416 ignore_secondaries=True)
7418 _ShutdownInstanceDisks(self, instance)
7419 raise errors.OpExecError("Can't activate the instance's disks")
7421 result = self.rpc.call_instance_start(target_node,
7422 (instance, None, None), False)
7423 msg = result.fail_msg
7425 _ShutdownInstanceDisks(self, instance)
7426 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7427 (instance.name, target_node, msg))
7430 class LUNodeMigrate(LogicalUnit):
7431 """Migrate all instances from a node.
7434 HPATH = "node-migrate"
7435 HTYPE = constants.HTYPE_NODE
7438 def CheckArguments(self):
7441 def ExpandNames(self):
7442 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7444 self.share_locks = _ShareAll()
7445 self.needed_locks = {
7446 locking.LEVEL_NODE: [self.op.node_name],
7449 def BuildHooksEnv(self):
7452 This runs on the master, the primary and all the secondaries.
7456 "NODE_NAME": self.op.node_name,
7459 def BuildHooksNodes(self):
7460 """Build hooks nodes.
7463 nl = [self.cfg.GetMasterNode()]
7466 def CheckPrereq(self):
7469 def Exec(self, feedback_fn):
7470 # Prepare jobs for migration instances
7472 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7475 iallocator=self.op.iallocator,
7476 target_node=self.op.target_node)]
7477 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7480 # TODO: Run iallocator in this opcode and pass correct placement options to
7481 # OpInstanceMigrate. Since other jobs can modify the cluster between
7482 # running the iallocator and the actual migration, a good consistency model
7483 # will have to be found.
7485 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7486 frozenset([self.op.node_name]))
7488 return ResultWithJobs(jobs)
7491 class TLMigrateInstance(Tasklet):
7492 """Tasklet class for instance migration.
7495 @ivar live: whether the migration will be done live or non-live;
7496 this variable is initalized only after CheckPrereq has run
7497 @type cleanup: boolean
7498 @ivar cleanup: Wheater we cleanup from a failed migration
7499 @type iallocator: string
7500 @ivar iallocator: The iallocator used to determine target_node
7501 @type target_node: string
7502 @ivar target_node: If given, the target_node to reallocate the instance to
7503 @type failover: boolean
7504 @ivar failover: Whether operation results in failover or migration
7505 @type fallback: boolean
7506 @ivar fallback: Whether fallback to failover is allowed if migration not
7508 @type ignore_consistency: boolean
7509 @ivar ignore_consistency: Wheter we should ignore consistency between source
7511 @type shutdown_timeout: int
7512 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7517 _MIGRATION_POLL_INTERVAL = 1 # seconds
7518 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7520 def __init__(self, lu, instance_name, cleanup=False,
7521 failover=False, fallback=False,
7522 ignore_consistency=False,
7523 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7524 """Initializes this class.
7527 Tasklet.__init__(self, lu)
7530 self.instance_name = instance_name
7531 self.cleanup = cleanup
7532 self.live = False # will be overridden later
7533 self.failover = failover
7534 self.fallback = fallback
7535 self.ignore_consistency = ignore_consistency
7536 self.shutdown_timeout = shutdown_timeout
7538 def CheckPrereq(self):
7539 """Check prerequisites.
7541 This checks that the instance is in the cluster.
7544 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7545 instance = self.cfg.GetInstanceInfo(instance_name)
7546 assert instance is not None
7547 self.instance = instance
7549 if (not self.cleanup and
7550 not instance.admin_state == constants.ADMINST_UP and
7551 not self.failover and self.fallback):
7552 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7553 " switching to failover")
7554 self.failover = True
7556 if instance.disk_template not in constants.DTS_MIRRORED:
7561 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7562 " %s" % (instance.disk_template, text),
7565 if instance.disk_template in constants.DTS_EXT_MIRROR:
7566 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7568 if self.lu.op.iallocator:
7569 self._RunAllocator()
7571 # We set set self.target_node as it is required by
7573 self.target_node = self.lu.op.target_node
7575 # self.target_node is already populated, either directly or by the
7577 target_node = self.target_node
7578 if self.target_node == instance.primary_node:
7579 raise errors.OpPrereqError("Cannot migrate instance %s"
7580 " to its primary (%s)" %
7581 (instance.name, instance.primary_node))
7583 if len(self.lu.tasklets) == 1:
7584 # It is safe to release locks only when we're the only tasklet
7586 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7587 keep=[instance.primary_node, self.target_node])
7590 secondary_nodes = instance.secondary_nodes
7591 if not secondary_nodes:
7592 raise errors.ConfigurationError("No secondary node but using"
7593 " %s disk template" %
7594 instance.disk_template)
7595 target_node = secondary_nodes[0]
7596 if self.lu.op.iallocator or (self.lu.op.target_node and
7597 self.lu.op.target_node != target_node):
7599 text = "failed over"
7602 raise errors.OpPrereqError("Instances with disk template %s cannot"
7603 " be %s to arbitrary nodes"
7604 " (neither an iallocator nor a target"
7605 " node can be passed)" %
7606 (instance.disk_template, text),
7609 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7611 # check memory requirements on the secondary node
7612 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7613 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7614 instance.name, i_be[constants.BE_MAXMEM],
7615 instance.hypervisor)
7617 self.lu.LogInfo("Not checking memory on the secondary node as"
7618 " instance will not be started")
7620 # check if failover must be forced instead of migration
7621 if (not self.cleanup and not self.failover and
7622 i_be[constants.BE_ALWAYS_FAILOVER]):
7624 self.lu.LogInfo("Instance configured to always failover; fallback"
7626 self.failover = True
7628 raise errors.OpPrereqError("This instance has been configured to"
7629 " always failover, please allow failover",
7632 # check bridge existance
7633 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7635 if not self.cleanup:
7636 _CheckNodeNotDrained(self.lu, target_node)
7637 if not self.failover:
7638 result = self.rpc.call_instance_migratable(instance.primary_node,
7640 if result.fail_msg and self.fallback:
7641 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7643 self.failover = True
7645 result.Raise("Can't migrate, please use failover",
7646 prereq=True, ecode=errors.ECODE_STATE)
7648 assert not (self.failover and self.cleanup)
7650 if not self.failover:
7651 if self.lu.op.live is not None and self.lu.op.mode is not None:
7652 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7653 " parameters are accepted",
7655 if self.lu.op.live is not None:
7657 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7659 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7660 # reset the 'live' parameter to None so that repeated
7661 # invocations of CheckPrereq do not raise an exception
7662 self.lu.op.live = None
7663 elif self.lu.op.mode is None:
7664 # read the default value from the hypervisor
7665 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7667 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7669 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7671 # Failover is never live
7674 def _RunAllocator(self):
7675 """Run the allocator based on input opcode.
7678 ial = IAllocator(self.cfg, self.rpc,
7679 mode=constants.IALLOCATOR_MODE_RELOC,
7680 name=self.instance_name,
7681 # TODO See why hail breaks with a single node below
7682 relocate_from=[self.instance.primary_node,
7683 self.instance.primary_node],
7686 ial.Run(self.lu.op.iallocator)
7689 raise errors.OpPrereqError("Can't compute nodes using"
7690 " iallocator '%s': %s" %
7691 (self.lu.op.iallocator, ial.info),
7693 if len(ial.result) != ial.required_nodes:
7694 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7695 " of nodes (%s), required %s" %
7696 (self.lu.op.iallocator, len(ial.result),
7697 ial.required_nodes), errors.ECODE_FAULT)
7698 self.target_node = ial.result[0]
7699 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7700 self.instance_name, self.lu.op.iallocator,
7701 utils.CommaJoin(ial.result))
7703 def _WaitUntilSync(self):
7704 """Poll with custom rpc for disk sync.
7706 This uses our own step-based rpc call.
7709 self.feedback_fn("* wait until resync is done")
7713 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7715 self.instance.disks)
7717 for node, nres in result.items():
7718 nres.Raise("Cannot resync disks on node %s" % node)
7719 node_done, node_percent = nres.payload
7720 all_done = all_done and node_done
7721 if node_percent is not None:
7722 min_percent = min(min_percent, node_percent)
7724 if min_percent < 100:
7725 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7728 def _EnsureSecondary(self, node):
7729 """Demote a node to secondary.
7732 self.feedback_fn("* switching node %s to secondary mode" % node)
7734 for dev in self.instance.disks:
7735 self.cfg.SetDiskID(dev, node)
7737 result = self.rpc.call_blockdev_close(node, self.instance.name,
7738 self.instance.disks)
7739 result.Raise("Cannot change disk to secondary on node %s" % node)
7741 def _GoStandalone(self):
7742 """Disconnect from the network.
7745 self.feedback_fn("* changing into standalone mode")
7746 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7747 self.instance.disks)
7748 for node, nres in result.items():
7749 nres.Raise("Cannot disconnect disks node %s" % node)
7751 def _GoReconnect(self, multimaster):
7752 """Reconnect to the network.
7758 msg = "single-master"
7759 self.feedback_fn("* changing disks into %s mode" % msg)
7760 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7761 self.instance.disks,
7762 self.instance.name, multimaster)
7763 for node, nres in result.items():
7764 nres.Raise("Cannot change disks config on node %s" % node)
7766 def _ExecCleanup(self):
7767 """Try to cleanup after a failed migration.
7769 The cleanup is done by:
7770 - check that the instance is running only on one node
7771 (and update the config if needed)
7772 - change disks on its secondary node to secondary
7773 - wait until disks are fully synchronized
7774 - disconnect from the network
7775 - change disks into single-master mode
7776 - wait again until disks are fully synchronized
7779 instance = self.instance
7780 target_node = self.target_node
7781 source_node = self.source_node
7783 # check running on only one node
7784 self.feedback_fn("* checking where the instance actually runs"
7785 " (if this hangs, the hypervisor might be in"
7787 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7788 for node, result in ins_l.items():
7789 result.Raise("Can't contact node %s" % node)
7791 runningon_source = instance.name in ins_l[source_node].payload
7792 runningon_target = instance.name in ins_l[target_node].payload
7794 if runningon_source and runningon_target:
7795 raise errors.OpExecError("Instance seems to be running on two nodes,"
7796 " or the hypervisor is confused; you will have"
7797 " to ensure manually that it runs only on one"
7798 " and restart this operation")
7800 if not (runningon_source or runningon_target):
7801 raise errors.OpExecError("Instance does not seem to be running at all;"
7802 " in this case it's safer to repair by"
7803 " running 'gnt-instance stop' to ensure disk"
7804 " shutdown, and then restarting it")
7806 if runningon_target:
7807 # the migration has actually succeeded, we need to update the config
7808 self.feedback_fn("* instance running on secondary node (%s),"
7809 " updating config" % target_node)
7810 instance.primary_node = target_node
7811 self.cfg.Update(instance, self.feedback_fn)
7812 demoted_node = source_node
7814 self.feedback_fn("* instance confirmed to be running on its"
7815 " primary node (%s)" % source_node)
7816 demoted_node = target_node
7818 if instance.disk_template in constants.DTS_INT_MIRROR:
7819 self._EnsureSecondary(demoted_node)
7821 self._WaitUntilSync()
7822 except errors.OpExecError:
7823 # we ignore here errors, since if the device is standalone, it
7824 # won't be able to sync
7826 self._GoStandalone()
7827 self._GoReconnect(False)
7828 self._WaitUntilSync()
7830 self.feedback_fn("* done")
7832 def _RevertDiskStatus(self):
7833 """Try to revert the disk status after a failed migration.
7836 target_node = self.target_node
7837 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7841 self._EnsureSecondary(target_node)
7842 self._GoStandalone()
7843 self._GoReconnect(False)
7844 self._WaitUntilSync()
7845 except errors.OpExecError, err:
7846 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7847 " please try to recover the instance manually;"
7848 " error '%s'" % str(err))
7850 def _AbortMigration(self):
7851 """Call the hypervisor code to abort a started migration.
7854 instance = self.instance
7855 target_node = self.target_node
7856 source_node = self.source_node
7857 migration_info = self.migration_info
7859 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7863 abort_msg = abort_result.fail_msg
7865 logging.error("Aborting migration failed on target node %s: %s",
7866 target_node, abort_msg)
7867 # Don't raise an exception here, as we stil have to try to revert the
7868 # disk status, even if this step failed.
7870 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7871 instance, False, self.live)
7872 abort_msg = abort_result.fail_msg
7874 logging.error("Aborting migration failed on source node %s: %s",
7875 source_node, abort_msg)
7877 def _ExecMigration(self):
7878 """Migrate an instance.
7880 The migrate is done by:
7881 - change the disks into dual-master mode
7882 - wait until disks are fully synchronized again
7883 - migrate the instance
7884 - change disks on the new secondary node (the old primary) to secondary
7885 - wait until disks are fully synchronized
7886 - change disks into single-master mode
7889 instance = self.instance
7890 target_node = self.target_node
7891 source_node = self.source_node
7893 # Check for hypervisor version mismatch and warn the user.
7894 nodeinfo = self.rpc.call_node_info([source_node, target_node],
7895 None, [self.instance.hypervisor])
7896 for ninfo in nodeinfo.values():
7897 ninfo.Raise("Unable to retrieve node information from node '%s'" %
7899 (_, _, (src_info, )) = nodeinfo[source_node].payload
7900 (_, _, (dst_info, )) = nodeinfo[target_node].payload
7902 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7903 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7904 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7905 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7906 if src_version != dst_version:
7907 self.feedback_fn("* warning: hypervisor version mismatch between"
7908 " source (%s) and target (%s) node" %
7909 (src_version, dst_version))
7911 self.feedback_fn("* checking disk consistency between source and target")
7912 for dev in instance.disks:
7913 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7914 raise errors.OpExecError("Disk %s is degraded or not fully"
7915 " synchronized on target node,"
7916 " aborting migration" % dev.iv_name)
7918 # First get the migration information from the remote node
7919 result = self.rpc.call_migration_info(source_node, instance)
7920 msg = result.fail_msg
7922 log_err = ("Failed fetching source migration information from %s: %s" %
7924 logging.error(log_err)
7925 raise errors.OpExecError(log_err)
7927 self.migration_info = migration_info = result.payload
7929 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7930 # Then switch the disks to master/master mode
7931 self._EnsureSecondary(target_node)
7932 self._GoStandalone()
7933 self._GoReconnect(True)
7934 self._WaitUntilSync()
7936 self.feedback_fn("* preparing %s to accept the instance" % target_node)
7937 result = self.rpc.call_accept_instance(target_node,
7940 self.nodes_ip[target_node])
7942 msg = result.fail_msg
7944 logging.error("Instance pre-migration failed, trying to revert"
7945 " disk status: %s", msg)
7946 self.feedback_fn("Pre-migration failed, aborting")
7947 self._AbortMigration()
7948 self._RevertDiskStatus()
7949 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7950 (instance.name, msg))
7952 self.feedback_fn("* migrating instance to %s" % target_node)
7953 result = self.rpc.call_instance_migrate(source_node, instance,
7954 self.nodes_ip[target_node],
7956 msg = result.fail_msg
7958 logging.error("Instance migration failed, trying to revert"
7959 " disk status: %s", msg)
7960 self.feedback_fn("Migration failed, aborting")
7961 self._AbortMigration()
7962 self._RevertDiskStatus()
7963 raise errors.OpExecError("Could not migrate instance %s: %s" %
7964 (instance.name, msg))
7966 self.feedback_fn("* starting memory transfer")
7967 last_feedback = time.time()
7969 result = self.rpc.call_instance_get_migration_status(source_node,
7971 msg = result.fail_msg
7972 ms = result.payload # MigrationStatus instance
7973 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7974 logging.error("Instance migration failed, trying to revert"
7975 " disk status: %s", msg)
7976 self.feedback_fn("Migration failed, aborting")
7977 self._AbortMigration()
7978 self._RevertDiskStatus()
7979 raise errors.OpExecError("Could not migrate instance %s: %s" %
7980 (instance.name, msg))
7982 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7983 self.feedback_fn("* memory transfer complete")
7986 if (utils.TimeoutExpired(last_feedback,
7987 self._MIGRATION_FEEDBACK_INTERVAL) and
7988 ms.transferred_ram is not None):
7989 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7990 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7991 last_feedback = time.time()
7993 time.sleep(self._MIGRATION_POLL_INTERVAL)
7995 result = self.rpc.call_instance_finalize_migration_src(source_node,
7999 msg = result.fail_msg
8001 logging.error("Instance migration succeeded, but finalization failed"
8002 " on the source node: %s", msg)
8003 raise errors.OpExecError("Could not finalize instance migration: %s" %
8006 instance.primary_node = target_node
8008 # distribute new instance config to the other nodes
8009 self.cfg.Update(instance, self.feedback_fn)
8011 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8015 msg = result.fail_msg
8017 logging.error("Instance migration succeeded, but finalization failed"
8018 " on the target node: %s", msg)
8019 raise errors.OpExecError("Could not finalize instance migration: %s" %
8022 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8023 self._EnsureSecondary(source_node)
8024 self._WaitUntilSync()
8025 self._GoStandalone()
8026 self._GoReconnect(False)
8027 self._WaitUntilSync()
8029 self.feedback_fn("* done")
8031 def _ExecFailover(self):
8032 """Failover an instance.
8034 The failover is done by shutting it down on its present node and
8035 starting it on the secondary.
8038 instance = self.instance
8039 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8041 source_node = instance.primary_node
8042 target_node = self.target_node
8044 if instance.admin_state == constants.ADMINST_UP:
8045 self.feedback_fn("* checking disk consistency between source and target")
8046 for dev in instance.disks:
8047 # for drbd, these are drbd over lvm
8048 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8049 if primary_node.offline:
8050 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8052 (primary_node.name, dev.iv_name, target_node))
8053 elif not self.ignore_consistency:
8054 raise errors.OpExecError("Disk %s is degraded on target node,"
8055 " aborting failover" % dev.iv_name)
8057 self.feedback_fn("* not checking disk consistency as instance is not"
8060 self.feedback_fn("* shutting down instance on source node")
8061 logging.info("Shutting down instance %s on node %s",
8062 instance.name, source_node)
8064 result = self.rpc.call_instance_shutdown(source_node, instance,
8065 self.shutdown_timeout)
8066 msg = result.fail_msg
8068 if self.ignore_consistency or primary_node.offline:
8069 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8070 " proceeding anyway; please make sure node"
8071 " %s is down; error details: %s",
8072 instance.name, source_node, source_node, msg)
8074 raise errors.OpExecError("Could not shutdown instance %s on"
8076 (instance.name, source_node, msg))
8078 self.feedback_fn("* deactivating the instance's disks on source node")
8079 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8080 raise errors.OpExecError("Can't shut down the instance's disks")
8082 instance.primary_node = target_node
8083 # distribute new instance config to the other nodes
8084 self.cfg.Update(instance, self.feedback_fn)
8086 # Only start the instance if it's marked as up
8087 if instance.admin_state == constants.ADMINST_UP:
8088 self.feedback_fn("* activating the instance's disks on target node %s" %
8090 logging.info("Starting instance %s on node %s",
8091 instance.name, target_node)
8093 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8094 ignore_secondaries=True)
8096 _ShutdownInstanceDisks(self.lu, instance)
8097 raise errors.OpExecError("Can't activate the instance's disks")
8099 self.feedback_fn("* starting the instance on the target node %s" %
8101 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8103 msg = result.fail_msg
8105 _ShutdownInstanceDisks(self.lu, instance)
8106 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8107 (instance.name, target_node, msg))
8109 def Exec(self, feedback_fn):
8110 """Perform the migration.
8113 self.feedback_fn = feedback_fn
8114 self.source_node = self.instance.primary_node
8116 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8117 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8118 self.target_node = self.instance.secondary_nodes[0]
8119 # Otherwise self.target_node has been populated either
8120 # directly, or through an iallocator.
8122 self.all_nodes = [self.source_node, self.target_node]
8123 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8124 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8127 feedback_fn("Failover instance %s" % self.instance.name)
8128 self._ExecFailover()
8130 feedback_fn("Migrating instance %s" % self.instance.name)
8133 return self._ExecCleanup()
8135 return self._ExecMigration()
8138 def _CreateBlockDev(lu, node, instance, device, force_create,
8140 """Create a tree of block devices on a given node.
8142 If this device type has to be created on secondaries, create it and
8145 If not, just recurse to children keeping the same 'force' value.
8147 @param lu: the lu on whose behalf we execute
8148 @param node: the node on which to create the device
8149 @type instance: L{objects.Instance}
8150 @param instance: the instance which owns the device
8151 @type device: L{objects.Disk}
8152 @param device: the device to create
8153 @type force_create: boolean
8154 @param force_create: whether to force creation of this device; this
8155 will be change to True whenever we find a device which has
8156 CreateOnSecondary() attribute
8157 @param info: the extra 'metadata' we should attach to the device
8158 (this will be represented as a LVM tag)
8159 @type force_open: boolean
8160 @param force_open: this parameter will be passes to the
8161 L{backend.BlockdevCreate} function where it specifies
8162 whether we run on primary or not, and it affects both
8163 the child assembly and the device own Open() execution
8166 if device.CreateOnSecondary():
8170 for child in device.children:
8171 _CreateBlockDev(lu, node, instance, child, force_create,
8174 if not force_create:
8177 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8180 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8181 """Create a single block device on a given node.
8183 This will not recurse over children of the device, so they must be
8186 @param lu: the lu on whose behalf we execute
8187 @param node: the node on which to create the device
8188 @type instance: L{objects.Instance}
8189 @param instance: the instance which owns the device
8190 @type device: L{objects.Disk}
8191 @param device: the device to create
8192 @param info: the extra 'metadata' we should attach to the device
8193 (this will be represented as a LVM tag)
8194 @type force_open: boolean
8195 @param force_open: this parameter will be passes to the
8196 L{backend.BlockdevCreate} function where it specifies
8197 whether we run on primary or not, and it affects both
8198 the child assembly and the device own Open() execution
8201 lu.cfg.SetDiskID(device, node)
8202 result = lu.rpc.call_blockdev_create(node, device, device.size,
8203 instance.name, force_open, info)
8204 result.Raise("Can't create block device %s on"
8205 " node %s for instance %s" % (device, node, instance.name))
8206 if device.physical_id is None:
8207 device.physical_id = result.payload
8210 def _GenerateUniqueNames(lu, exts):
8211 """Generate a suitable LV name.
8213 This will generate a logical volume name for the given instance.
8218 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8219 results.append("%s%s" % (new_id, val))
8223 def _ComputeLDParams(disk_template, disk_params):
8224 """Computes Logical Disk parameters from Disk Template parameters.
8226 @type disk_template: string
8227 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8228 @type disk_params: dict
8229 @param disk_params: disk template parameters; dict(template_name -> parameters
8231 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8232 contains the LD parameters of the node. The tree is flattened in-order.
8235 if disk_template not in constants.DISK_TEMPLATES:
8236 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8239 dt_params = disk_params[disk_template]
8240 if disk_template == constants.DT_DRBD8:
8242 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8243 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8244 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8245 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8246 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8247 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8251 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8254 result.append(drbd_params)
8258 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8261 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8263 result.append(data_params)
8267 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8270 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8272 result.append(meta_params)
8274 elif (disk_template == constants.DT_FILE or
8275 disk_template == constants.DT_SHARED_FILE):
8276 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8278 elif disk_template == constants.DT_PLAIN:
8280 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8283 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8285 result.append(params)
8287 elif disk_template == constants.DT_BLOCK:
8288 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8293 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8294 iv_name, p_minor, s_minor, drbd_params, data_params,
8296 """Generate a drbd8 device complete with its children.
8299 assert len(vgnames) == len(names) == 2
8300 port = lu.cfg.AllocatePort()
8301 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8303 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8304 logical_id=(vgnames[0], names[0]),
8306 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8307 logical_id=(vgnames[1], names[1]),
8309 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8310 logical_id=(primary, secondary, port,
8313 children=[dev_data, dev_meta],
8314 iv_name=iv_name, params=drbd_params)
8318 def _GenerateDiskTemplate(lu, template_name,
8319 instance_name, primary_node,
8320 secondary_nodes, disk_info,
8321 file_storage_dir, file_driver,
8322 base_index, feedback_fn, disk_params):
8323 """Generate the entire disk layout for a given template type.
8326 #TODO: compute space requirements
8328 vgname = lu.cfg.GetVGName()
8329 disk_count = len(disk_info)
8331 ld_params = _ComputeLDParams(template_name, disk_params)
8332 if template_name == constants.DT_DISKLESS:
8334 elif template_name == constants.DT_PLAIN:
8335 if len(secondary_nodes) != 0:
8336 raise errors.ProgrammerError("Wrong template configuration")
8338 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8339 for i in range(disk_count)])
8340 for idx, disk in enumerate(disk_info):
8341 disk_index = idx + base_index
8342 vg = disk.get(constants.IDISK_VG, vgname)
8343 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8344 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8345 size=disk[constants.IDISK_SIZE],
8346 logical_id=(vg, names[idx]),
8347 iv_name="disk/%d" % disk_index,
8348 mode=disk[constants.IDISK_MODE],
8349 params=ld_params[0])
8350 disks.append(disk_dev)
8351 elif template_name == constants.DT_DRBD8:
8352 drbd_params, data_params, meta_params = ld_params
8353 if len(secondary_nodes) != 1:
8354 raise errors.ProgrammerError("Wrong template configuration")
8355 remote_node = secondary_nodes[0]
8356 minors = lu.cfg.AllocateDRBDMinor(
8357 [primary_node, remote_node] * len(disk_info), instance_name)
8360 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8361 for i in range(disk_count)]):
8362 names.append(lv_prefix + "_data")
8363 names.append(lv_prefix + "_meta")
8364 for idx, disk in enumerate(disk_info):
8365 disk_index = idx + base_index
8366 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8367 data_vg = disk.get(constants.IDISK_VG, vgname)
8368 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8369 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8370 disk[constants.IDISK_SIZE],
8372 names[idx * 2:idx * 2 + 2],
8373 "disk/%d" % disk_index,
8374 minors[idx * 2], minors[idx * 2 + 1],
8375 drbd_params, data_params, meta_params)
8376 disk_dev.mode = disk[constants.IDISK_MODE]
8377 disks.append(disk_dev)
8378 elif template_name == constants.DT_FILE:
8379 if len(secondary_nodes) != 0:
8380 raise errors.ProgrammerError("Wrong template configuration")
8382 opcodes.RequireFileStorage()
8384 for idx, disk in enumerate(disk_info):
8385 disk_index = idx + base_index
8386 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8387 size=disk[constants.IDISK_SIZE],
8388 iv_name="disk/%d" % disk_index,
8389 logical_id=(file_driver,
8390 "%s/disk%d" % (file_storage_dir,
8392 mode=disk[constants.IDISK_MODE],
8393 params=ld_params[0])
8394 disks.append(disk_dev)
8395 elif template_name == constants.DT_SHARED_FILE:
8396 if len(secondary_nodes) != 0:
8397 raise errors.ProgrammerError("Wrong template configuration")
8399 opcodes.RequireSharedFileStorage()
8401 for idx, disk in enumerate(disk_info):
8402 disk_index = idx + base_index
8403 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8404 size=disk[constants.IDISK_SIZE],
8405 iv_name="disk/%d" % disk_index,
8406 logical_id=(file_driver,
8407 "%s/disk%d" % (file_storage_dir,
8409 mode=disk[constants.IDISK_MODE],
8410 params=ld_params[0])
8411 disks.append(disk_dev)
8412 elif template_name == constants.DT_BLOCK:
8413 if len(secondary_nodes) != 0:
8414 raise errors.ProgrammerError("Wrong template configuration")
8416 for idx, disk in enumerate(disk_info):
8417 disk_index = idx + base_index
8418 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8419 size=disk[constants.IDISK_SIZE],
8420 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8421 disk[constants.IDISK_ADOPT]),
8422 iv_name="disk/%d" % disk_index,
8423 mode=disk[constants.IDISK_MODE],
8424 params=ld_params[0])
8425 disks.append(disk_dev)
8428 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8432 def _GetInstanceInfoText(instance):
8433 """Compute that text that should be added to the disk's metadata.
8436 return "originstname+%s" % instance.name
8439 def _CalcEta(time_taken, written, total_size):
8440 """Calculates the ETA based on size written and total size.
8442 @param time_taken: The time taken so far
8443 @param written: amount written so far
8444 @param total_size: The total size of data to be written
8445 @return: The remaining time in seconds
8448 avg_time = time_taken / float(written)
8449 return (total_size - written) * avg_time
8452 def _WipeDisks(lu, instance):
8453 """Wipes instance disks.
8455 @type lu: L{LogicalUnit}
8456 @param lu: the logical unit on whose behalf we execute
8457 @type instance: L{objects.Instance}
8458 @param instance: the instance whose disks we should create
8459 @return: the success of the wipe
8462 node = instance.primary_node
8464 for device in instance.disks:
8465 lu.cfg.SetDiskID(device, node)
8467 logging.info("Pause sync of instance %s disks", instance.name)
8468 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8470 for idx, success in enumerate(result.payload):
8472 logging.warn("pause-sync of instance %s for disks %d failed",
8476 for idx, device in enumerate(instance.disks):
8477 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8478 # MAX_WIPE_CHUNK at max
8479 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8480 constants.MIN_WIPE_CHUNK_PERCENT)
8481 # we _must_ make this an int, otherwise rounding errors will
8483 wipe_chunk_size = int(wipe_chunk_size)
8485 lu.LogInfo("* Wiping disk %d", idx)
8486 logging.info("Wiping disk %d for instance %s, node %s using"
8487 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8492 start_time = time.time()
8494 while offset < size:
8495 wipe_size = min(wipe_chunk_size, size - offset)
8496 logging.debug("Wiping disk %d, offset %s, chunk %s",
8497 idx, offset, wipe_size)
8498 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8499 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8500 (idx, offset, wipe_size))
8503 if now - last_output >= 60:
8504 eta = _CalcEta(now - start_time, offset, size)
8505 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8506 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8509 logging.info("Resume sync of instance %s disks", instance.name)
8511 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8513 for idx, success in enumerate(result.payload):
8515 lu.LogWarning("Resume sync of disk %d failed, please have a"
8516 " look at the status and troubleshoot the issue", idx)
8517 logging.warn("resume-sync of instance %s for disks %d failed",
8521 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8522 """Create all disks for an instance.
8524 This abstracts away some work from AddInstance.
8526 @type lu: L{LogicalUnit}
8527 @param lu: the logical unit on whose behalf we execute
8528 @type instance: L{objects.Instance}
8529 @param instance: the instance whose disks we should create
8531 @param to_skip: list of indices to skip
8532 @type target_node: string
8533 @param target_node: if passed, overrides the target node for creation
8535 @return: the success of the creation
8538 info = _GetInstanceInfoText(instance)
8539 if target_node is None:
8540 pnode = instance.primary_node
8541 all_nodes = instance.all_nodes
8546 if instance.disk_template in constants.DTS_FILEBASED:
8547 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8548 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8550 result.Raise("Failed to create directory '%s' on"
8551 " node %s" % (file_storage_dir, pnode))
8553 # Note: this needs to be kept in sync with adding of disks in
8554 # LUInstanceSetParams
8555 for idx, device in enumerate(instance.disks):
8556 if to_skip and idx in to_skip:
8558 logging.info("Creating volume %s for instance %s",
8559 device.iv_name, instance.name)
8561 for node in all_nodes:
8562 f_create = node == pnode
8563 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8566 def _RemoveDisks(lu, instance, target_node=None):
8567 """Remove all disks for an instance.
8569 This abstracts away some work from `AddInstance()` and
8570 `RemoveInstance()`. Note that in case some of the devices couldn't
8571 be removed, the removal will continue with the other ones (compare
8572 with `_CreateDisks()`).
8574 @type lu: L{LogicalUnit}
8575 @param lu: the logical unit on whose behalf we execute
8576 @type instance: L{objects.Instance}
8577 @param instance: the instance whose disks we should remove
8578 @type target_node: string
8579 @param target_node: used to override the node on which to remove the disks
8581 @return: the success of the removal
8584 logging.info("Removing block devices for instance %s", instance.name)
8587 for device in instance.disks:
8589 edata = [(target_node, device)]
8591 edata = device.ComputeNodeTree(instance.primary_node)
8592 for node, disk in edata:
8593 lu.cfg.SetDiskID(disk, node)
8594 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8596 lu.LogWarning("Could not remove block device %s on node %s,"
8597 " continuing anyway: %s", device.iv_name, node, msg)
8600 # if this is a DRBD disk, return its port to the pool
8601 if device.dev_type in constants.LDS_DRBD:
8602 tcp_port = device.logical_id[2]
8603 lu.cfg.AddTcpUdpPort(tcp_port)
8605 if instance.disk_template == constants.DT_FILE:
8606 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8610 tgt = instance.primary_node
8611 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8613 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8614 file_storage_dir, instance.primary_node, result.fail_msg)
8620 def _ComputeDiskSizePerVG(disk_template, disks):
8621 """Compute disk size requirements in the volume group
8624 def _compute(disks, payload):
8625 """Universal algorithm.
8630 vgs[disk[constants.IDISK_VG]] = \
8631 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8635 # Required free disk space as a function of disk and swap space
8637 constants.DT_DISKLESS: {},
8638 constants.DT_PLAIN: _compute(disks, 0),
8639 # 128 MB are added for drbd metadata for each disk
8640 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8641 constants.DT_FILE: {},
8642 constants.DT_SHARED_FILE: {},
8645 if disk_template not in req_size_dict:
8646 raise errors.ProgrammerError("Disk template '%s' size requirement"
8647 " is unknown" % disk_template)
8649 return req_size_dict[disk_template]
8652 def _ComputeDiskSize(disk_template, disks):
8653 """Compute disk size requirements in the volume group
8656 # Required free disk space as a function of disk and swap space
8658 constants.DT_DISKLESS: None,
8659 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8660 # 128 MB are added for drbd metadata for each disk
8662 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8663 constants.DT_FILE: None,
8664 constants.DT_SHARED_FILE: 0,
8665 constants.DT_BLOCK: 0,
8668 if disk_template not in req_size_dict:
8669 raise errors.ProgrammerError("Disk template '%s' size requirement"
8670 " is unknown" % disk_template)
8672 return req_size_dict[disk_template]
8675 def _FilterVmNodes(lu, nodenames):
8676 """Filters out non-vm_capable nodes from a list.
8678 @type lu: L{LogicalUnit}
8679 @param lu: the logical unit for which we check
8680 @type nodenames: list
8681 @param nodenames: the list of nodes on which we should check
8683 @return: the list of vm-capable nodes
8686 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8687 return [name for name in nodenames if name not in vm_nodes]
8690 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8691 """Hypervisor parameter validation.
8693 This function abstract the hypervisor parameter validation to be
8694 used in both instance create and instance modify.
8696 @type lu: L{LogicalUnit}
8697 @param lu: the logical unit for which we check
8698 @type nodenames: list
8699 @param nodenames: the list of nodes on which we should check
8700 @type hvname: string
8701 @param hvname: the name of the hypervisor we should use
8702 @type hvparams: dict
8703 @param hvparams: the parameters which we need to check
8704 @raise errors.OpPrereqError: if the parameters are not valid
8707 nodenames = _FilterVmNodes(lu, nodenames)
8709 cluster = lu.cfg.GetClusterInfo()
8710 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8712 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8713 for node in nodenames:
8717 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8720 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8721 """OS parameters validation.
8723 @type lu: L{LogicalUnit}
8724 @param lu: the logical unit for which we check
8725 @type required: boolean
8726 @param required: whether the validation should fail if the OS is not
8728 @type nodenames: list
8729 @param nodenames: the list of nodes on which we should check
8730 @type osname: string
8731 @param osname: the name of the hypervisor we should use
8732 @type osparams: dict
8733 @param osparams: the parameters which we need to check
8734 @raise errors.OpPrereqError: if the parameters are not valid
8737 nodenames = _FilterVmNodes(lu, nodenames)
8738 result = lu.rpc.call_os_validate(nodenames, required, osname,
8739 [constants.OS_VALIDATE_PARAMETERS],
8741 for node, nres in result.items():
8742 # we don't check for offline cases since this should be run only
8743 # against the master node and/or an instance's nodes
8744 nres.Raise("OS Parameters validation failed on node %s" % node)
8745 if not nres.payload:
8746 lu.LogInfo("OS %s not found on node %s, validation skipped",
8750 class LUInstanceCreate(LogicalUnit):
8751 """Create an instance.
8754 HPATH = "instance-add"
8755 HTYPE = constants.HTYPE_INSTANCE
8758 def CheckArguments(self):
8762 # do not require name_check to ease forward/backward compatibility
8764 if self.op.no_install and self.op.start:
8765 self.LogInfo("No-installation mode selected, disabling startup")
8766 self.op.start = False
8767 # validate/normalize the instance name
8768 self.op.instance_name = \
8769 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8771 if self.op.ip_check and not self.op.name_check:
8772 # TODO: make the ip check more flexible and not depend on the name check
8773 raise errors.OpPrereqError("Cannot do IP address check without a name"
8774 " check", errors.ECODE_INVAL)
8776 # check nics' parameter names
8777 for nic in self.op.nics:
8778 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8780 # check disks. parameter names and consistent adopt/no-adopt strategy
8781 has_adopt = has_no_adopt = False
8782 for disk in self.op.disks:
8783 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8784 if constants.IDISK_ADOPT in disk:
8788 if has_adopt and has_no_adopt:
8789 raise errors.OpPrereqError("Either all disks are adopted or none is",
8792 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8793 raise errors.OpPrereqError("Disk adoption is not supported for the"
8794 " '%s' disk template" %
8795 self.op.disk_template,
8797 if self.op.iallocator is not None:
8798 raise errors.OpPrereqError("Disk adoption not allowed with an"
8799 " iallocator script", errors.ECODE_INVAL)
8800 if self.op.mode == constants.INSTANCE_IMPORT:
8801 raise errors.OpPrereqError("Disk adoption not allowed for"
8802 " instance import", errors.ECODE_INVAL)
8804 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8805 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8806 " but no 'adopt' parameter given" %
8807 self.op.disk_template,
8810 self.adopt_disks = has_adopt
8812 # instance name verification
8813 if self.op.name_check:
8814 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8815 self.op.instance_name = self.hostname1.name
8816 # used in CheckPrereq for ip ping check
8817 self.check_ip = self.hostname1.ip
8819 self.check_ip = None
8821 # file storage checks
8822 if (self.op.file_driver and
8823 not self.op.file_driver in constants.FILE_DRIVER):
8824 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8825 self.op.file_driver, errors.ECODE_INVAL)
8827 if self.op.disk_template == constants.DT_FILE:
8828 opcodes.RequireFileStorage()
8829 elif self.op.disk_template == constants.DT_SHARED_FILE:
8830 opcodes.RequireSharedFileStorage()
8832 ### Node/iallocator related checks
8833 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8835 if self.op.pnode is not None:
8836 if self.op.disk_template in constants.DTS_INT_MIRROR:
8837 if self.op.snode is None:
8838 raise errors.OpPrereqError("The networked disk templates need"
8839 " a mirror node", errors.ECODE_INVAL)
8841 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8843 self.op.snode = None
8845 self._cds = _GetClusterDomainSecret()
8847 if self.op.mode == constants.INSTANCE_IMPORT:
8848 # On import force_variant must be True, because if we forced it at
8849 # initial install, our only chance when importing it back is that it
8851 self.op.force_variant = True
8853 if self.op.no_install:
8854 self.LogInfo("No-installation mode has no effect during import")
8856 elif self.op.mode == constants.INSTANCE_CREATE:
8857 if self.op.os_type is None:
8858 raise errors.OpPrereqError("No guest OS specified",
8860 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8861 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8862 " installation" % self.op.os_type,
8864 if self.op.disk_template is None:
8865 raise errors.OpPrereqError("No disk template specified",
8868 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8869 # Check handshake to ensure both clusters have the same domain secret
8870 src_handshake = self.op.source_handshake
8871 if not src_handshake:
8872 raise errors.OpPrereqError("Missing source handshake",
8875 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8878 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8881 # Load and check source CA
8882 self.source_x509_ca_pem = self.op.source_x509_ca
8883 if not self.source_x509_ca_pem:
8884 raise errors.OpPrereqError("Missing source X509 CA",
8888 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8890 except OpenSSL.crypto.Error, err:
8891 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8892 (err, ), errors.ECODE_INVAL)
8894 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8895 if errcode is not None:
8896 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8899 self.source_x509_ca = cert
8901 src_instance_name = self.op.source_instance_name
8902 if not src_instance_name:
8903 raise errors.OpPrereqError("Missing source instance name",
8906 self.source_instance_name = \
8907 netutils.GetHostname(name=src_instance_name).name
8910 raise errors.OpPrereqError("Invalid instance creation mode %r" %
8911 self.op.mode, errors.ECODE_INVAL)
8913 def ExpandNames(self):
8914 """ExpandNames for CreateInstance.
8916 Figure out the right locks for instance creation.
8919 self.needed_locks = {}
8921 instance_name = self.op.instance_name
8922 # this is just a preventive check, but someone might still add this
8923 # instance in the meantime, and creation will fail at lock-add time
8924 if instance_name in self.cfg.GetInstanceList():
8925 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8926 instance_name, errors.ECODE_EXISTS)
8928 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8930 if self.op.iallocator:
8931 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8932 # specifying a group on instance creation and then selecting nodes from
8934 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8935 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8937 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8938 nodelist = [self.op.pnode]
8939 if self.op.snode is not None:
8940 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8941 nodelist.append(self.op.snode)
8942 self.needed_locks[locking.LEVEL_NODE] = nodelist
8943 # Lock resources of instance's primary and secondary nodes (copy to
8944 # prevent accidential modification)
8945 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8947 # in case of import lock the source node too
8948 if self.op.mode == constants.INSTANCE_IMPORT:
8949 src_node = self.op.src_node
8950 src_path = self.op.src_path
8952 if src_path is None:
8953 self.op.src_path = src_path = self.op.instance_name
8955 if src_node is None:
8956 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8957 self.op.src_node = None
8958 if os.path.isabs(src_path):
8959 raise errors.OpPrereqError("Importing an instance from a path"
8960 " requires a source node option",
8963 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8964 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8965 self.needed_locks[locking.LEVEL_NODE].append(src_node)
8966 if not os.path.isabs(src_path):
8967 self.op.src_path = src_path = \
8968 utils.PathJoin(constants.EXPORT_DIR, src_path)
8970 def _RunAllocator(self):
8971 """Run the allocator based on input opcode.
8974 nics = [n.ToDict() for n in self.nics]
8975 ial = IAllocator(self.cfg, self.rpc,
8976 mode=constants.IALLOCATOR_MODE_ALLOC,
8977 name=self.op.instance_name,
8978 disk_template=self.op.disk_template,
8981 vcpus=self.be_full[constants.BE_VCPUS],
8982 memory=self.be_full[constants.BE_MAXMEM],
8985 hypervisor=self.op.hypervisor,
8988 ial.Run(self.op.iallocator)
8991 raise errors.OpPrereqError("Can't compute nodes using"
8992 " iallocator '%s': %s" %
8993 (self.op.iallocator, ial.info),
8995 if len(ial.result) != ial.required_nodes:
8996 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8997 " of nodes (%s), required %s" %
8998 (self.op.iallocator, len(ial.result),
8999 ial.required_nodes), errors.ECODE_FAULT)
9000 self.op.pnode = ial.result[0]
9001 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9002 self.op.instance_name, self.op.iallocator,
9003 utils.CommaJoin(ial.result))
9004 if ial.required_nodes == 2:
9005 self.op.snode = ial.result[1]
9007 def BuildHooksEnv(self):
9010 This runs on master, primary and secondary nodes of the instance.
9014 "ADD_MODE": self.op.mode,
9016 if self.op.mode == constants.INSTANCE_IMPORT:
9017 env["SRC_NODE"] = self.op.src_node
9018 env["SRC_PATH"] = self.op.src_path
9019 env["SRC_IMAGES"] = self.src_images
9021 env.update(_BuildInstanceHookEnv(
9022 name=self.op.instance_name,
9023 primary_node=self.op.pnode,
9024 secondary_nodes=self.secondaries,
9025 status=self.op.start,
9026 os_type=self.op.os_type,
9027 minmem=self.be_full[constants.BE_MINMEM],
9028 maxmem=self.be_full[constants.BE_MAXMEM],
9029 vcpus=self.be_full[constants.BE_VCPUS],
9030 nics=_NICListToTuple(self, self.nics),
9031 disk_template=self.op.disk_template,
9032 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9033 for d in self.disks],
9036 hypervisor_name=self.op.hypervisor,
9042 def BuildHooksNodes(self):
9043 """Build hooks nodes.
9046 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9049 def _ReadExportInfo(self):
9050 """Reads the export information from disk.
9052 It will override the opcode source node and path with the actual
9053 information, if these two were not specified before.
9055 @return: the export information
9058 assert self.op.mode == constants.INSTANCE_IMPORT
9060 src_node = self.op.src_node
9061 src_path = self.op.src_path
9063 if src_node is None:
9064 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9065 exp_list = self.rpc.call_export_list(locked_nodes)
9067 for node in exp_list:
9068 if exp_list[node].fail_msg:
9070 if src_path in exp_list[node].payload:
9072 self.op.src_node = src_node = node
9073 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9077 raise errors.OpPrereqError("No export found for relative path %s" %
9078 src_path, errors.ECODE_INVAL)
9080 _CheckNodeOnline(self, src_node)
9081 result = self.rpc.call_export_info(src_node, src_path)
9082 result.Raise("No export or invalid export found in dir %s" % src_path)
9084 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9085 if not export_info.has_section(constants.INISECT_EXP):
9086 raise errors.ProgrammerError("Corrupted export config",
9087 errors.ECODE_ENVIRON)
9089 ei_version = export_info.get(constants.INISECT_EXP, "version")
9090 if (int(ei_version) != constants.EXPORT_VERSION):
9091 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9092 (ei_version, constants.EXPORT_VERSION),
9093 errors.ECODE_ENVIRON)
9096 def _ReadExportParams(self, einfo):
9097 """Use export parameters as defaults.
9099 In case the opcode doesn't specify (as in override) some instance
9100 parameters, then try to use them from the export information, if
9104 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9106 if self.op.disk_template is None:
9107 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9108 self.op.disk_template = einfo.get(constants.INISECT_INS,
9110 if self.op.disk_template not in constants.DISK_TEMPLATES:
9111 raise errors.OpPrereqError("Disk template specified in configuration"
9112 " file is not one of the allowed values:"
9113 " %s" % " ".join(constants.DISK_TEMPLATES))
9115 raise errors.OpPrereqError("No disk template specified and the export"
9116 " is missing the disk_template information",
9119 if not self.op.disks:
9121 # TODO: import the disk iv_name too
9122 for idx in range(constants.MAX_DISKS):
9123 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9124 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9125 disks.append({constants.IDISK_SIZE: disk_sz})
9126 self.op.disks = disks
9127 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9128 raise errors.OpPrereqError("No disk info specified and the export"
9129 " is missing the disk information",
9132 if not self.op.nics:
9134 for idx in range(constants.MAX_NICS):
9135 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9137 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9138 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9145 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9146 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9148 if (self.op.hypervisor is None and
9149 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9150 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9152 if einfo.has_section(constants.INISECT_HYP):
9153 # use the export parameters but do not override the ones
9154 # specified by the user
9155 for name, value in einfo.items(constants.INISECT_HYP):
9156 if name not in self.op.hvparams:
9157 self.op.hvparams[name] = value
9159 if einfo.has_section(constants.INISECT_BEP):
9160 # use the parameters, without overriding
9161 for name, value in einfo.items(constants.INISECT_BEP):
9162 if name not in self.op.beparams:
9163 self.op.beparams[name] = value
9164 # Compatibility for the old "memory" be param
9165 if name == constants.BE_MEMORY:
9166 if constants.BE_MAXMEM not in self.op.beparams:
9167 self.op.beparams[constants.BE_MAXMEM] = value
9168 if constants.BE_MINMEM not in self.op.beparams:
9169 self.op.beparams[constants.BE_MINMEM] = value
9171 # try to read the parameters old style, from the main section
9172 for name in constants.BES_PARAMETERS:
9173 if (name not in self.op.beparams and
9174 einfo.has_option(constants.INISECT_INS, name)):
9175 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9177 if einfo.has_section(constants.INISECT_OSP):
9178 # use the parameters, without overriding
9179 for name, value in einfo.items(constants.INISECT_OSP):
9180 if name not in self.op.osparams:
9181 self.op.osparams[name] = value
9183 def _RevertToDefaults(self, cluster):
9184 """Revert the instance parameters to the default values.
9188 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9189 for name in self.op.hvparams.keys():
9190 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9191 del self.op.hvparams[name]
9193 be_defs = cluster.SimpleFillBE({})
9194 for name in self.op.beparams.keys():
9195 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9196 del self.op.beparams[name]
9198 nic_defs = cluster.SimpleFillNIC({})
9199 for nic in self.op.nics:
9200 for name in constants.NICS_PARAMETERS:
9201 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9204 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9205 for name in self.op.osparams.keys():
9206 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9207 del self.op.osparams[name]
9209 def _CalculateFileStorageDir(self):
9210 """Calculate final instance file storage dir.
9213 # file storage dir calculation/check
9214 self.instance_file_storage_dir = None
9215 if self.op.disk_template in constants.DTS_FILEBASED:
9216 # build the full file storage dir path
9219 if self.op.disk_template == constants.DT_SHARED_FILE:
9220 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9222 get_fsd_fn = self.cfg.GetFileStorageDir
9224 cfg_storagedir = get_fsd_fn()
9225 if not cfg_storagedir:
9226 raise errors.OpPrereqError("Cluster file storage dir not defined")
9227 joinargs.append(cfg_storagedir)
9229 if self.op.file_storage_dir is not None:
9230 joinargs.append(self.op.file_storage_dir)
9232 joinargs.append(self.op.instance_name)
9234 # pylint: disable=W0142
9235 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9237 def CheckPrereq(self):
9238 """Check prerequisites.
9241 self._CalculateFileStorageDir()
9243 if self.op.mode == constants.INSTANCE_IMPORT:
9244 export_info = self._ReadExportInfo()
9245 self._ReadExportParams(export_info)
9247 if (not self.cfg.GetVGName() and
9248 self.op.disk_template not in constants.DTS_NOT_LVM):
9249 raise errors.OpPrereqError("Cluster does not support lvm-based"
9250 " instances", errors.ECODE_STATE)
9252 if (self.op.hypervisor is None or
9253 self.op.hypervisor == constants.VALUE_AUTO):
9254 self.op.hypervisor = self.cfg.GetHypervisorType()
9256 cluster = self.cfg.GetClusterInfo()
9257 enabled_hvs = cluster.enabled_hypervisors
9258 if self.op.hypervisor not in enabled_hvs:
9259 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9260 " cluster (%s)" % (self.op.hypervisor,
9261 ",".join(enabled_hvs)),
9264 # Check tag validity
9265 for tag in self.op.tags:
9266 objects.TaggableObject.ValidateTag(tag)
9268 # check hypervisor parameter syntax (locally)
9269 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9270 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9272 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9273 hv_type.CheckParameterSyntax(filled_hvp)
9274 self.hv_full = filled_hvp
9275 # check that we don't specify global parameters on an instance
9276 _CheckGlobalHvParams(self.op.hvparams)
9278 # fill and remember the beparams dict
9279 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9280 for param, value in self.op.beparams.iteritems():
9281 if value == constants.VALUE_AUTO:
9282 self.op.beparams[param] = default_beparams[param]
9283 objects.UpgradeBeParams(self.op.beparams)
9284 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9285 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9287 # build os parameters
9288 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9290 # now that hvp/bep are in final format, let's reset to defaults,
9292 if self.op.identify_defaults:
9293 self._RevertToDefaults(cluster)
9297 for idx, nic in enumerate(self.op.nics):
9298 nic_mode_req = nic.get(constants.INIC_MODE, None)
9299 nic_mode = nic_mode_req
9300 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9301 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9303 # in routed mode, for the first nic, the default ip is 'auto'
9304 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9305 default_ip_mode = constants.VALUE_AUTO
9307 default_ip_mode = constants.VALUE_NONE
9309 # ip validity checks
9310 ip = nic.get(constants.INIC_IP, default_ip_mode)
9311 if ip is None or ip.lower() == constants.VALUE_NONE:
9313 elif ip.lower() == constants.VALUE_AUTO:
9314 if not self.op.name_check:
9315 raise errors.OpPrereqError("IP address set to auto but name checks"
9316 " have been skipped",
9318 nic_ip = self.hostname1.ip
9320 if not netutils.IPAddress.IsValid(ip):
9321 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9325 # TODO: check the ip address for uniqueness
9326 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9327 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9330 # MAC address verification
9331 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9332 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9333 mac = utils.NormalizeAndValidateMac(mac)
9336 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9337 except errors.ReservationError:
9338 raise errors.OpPrereqError("MAC address %s already in use"
9339 " in cluster" % mac,
9340 errors.ECODE_NOTUNIQUE)
9342 # Build nic parameters
9343 link = nic.get(constants.INIC_LINK, None)
9344 if link == constants.VALUE_AUTO:
9345 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9348 nicparams[constants.NIC_MODE] = nic_mode
9350 nicparams[constants.NIC_LINK] = link
9352 check_params = cluster.SimpleFillNIC(nicparams)
9353 objects.NIC.CheckParameterSyntax(check_params)
9354 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9356 # disk checks/pre-build
9357 default_vg = self.cfg.GetVGName()
9359 for disk in self.op.disks:
9360 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9361 if mode not in constants.DISK_ACCESS_SET:
9362 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9363 mode, errors.ECODE_INVAL)
9364 size = disk.get(constants.IDISK_SIZE, None)
9366 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9369 except (TypeError, ValueError):
9370 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9373 data_vg = disk.get(constants.IDISK_VG, default_vg)
9375 constants.IDISK_SIZE: size,
9376 constants.IDISK_MODE: mode,
9377 constants.IDISK_VG: data_vg,
9379 if constants.IDISK_METAVG in disk:
9380 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9381 if constants.IDISK_ADOPT in disk:
9382 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9383 self.disks.append(new_disk)
9385 if self.op.mode == constants.INSTANCE_IMPORT:
9387 for idx in range(len(self.disks)):
9388 option = "disk%d_dump" % idx
9389 if export_info.has_option(constants.INISECT_INS, option):
9390 # FIXME: are the old os-es, disk sizes, etc. useful?
9391 export_name = export_info.get(constants.INISECT_INS, option)
9392 image = utils.PathJoin(self.op.src_path, export_name)
9393 disk_images.append(image)
9395 disk_images.append(False)
9397 self.src_images = disk_images
9399 old_name = export_info.get(constants.INISECT_INS, "name")
9400 if self.op.instance_name == old_name:
9401 for idx, nic in enumerate(self.nics):
9402 if nic.mac == constants.VALUE_AUTO:
9403 nic_mac_ini = "nic%d_mac" % idx
9404 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9406 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9408 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9409 if self.op.ip_check:
9410 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9411 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9412 (self.check_ip, self.op.instance_name),
9413 errors.ECODE_NOTUNIQUE)
9415 #### mac address generation
9416 # By generating here the mac address both the allocator and the hooks get
9417 # the real final mac address rather than the 'auto' or 'generate' value.
9418 # There is a race condition between the generation and the instance object
9419 # creation, which means that we know the mac is valid now, but we're not
9420 # sure it will be when we actually add the instance. If things go bad
9421 # adding the instance will abort because of a duplicate mac, and the
9422 # creation job will fail.
9423 for nic in self.nics:
9424 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9425 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9429 if self.op.iallocator is not None:
9430 self._RunAllocator()
9432 # Release all unneeded node locks
9433 _ReleaseLocks(self, locking.LEVEL_NODE,
9434 keep=filter(None, [self.op.pnode, self.op.snode,
9437 #### node related checks
9439 # check primary node
9440 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9441 assert self.pnode is not None, \
9442 "Cannot retrieve locked node %s" % self.op.pnode
9444 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9445 pnode.name, errors.ECODE_STATE)
9447 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9448 pnode.name, errors.ECODE_STATE)
9449 if not pnode.vm_capable:
9450 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9451 " '%s'" % pnode.name, errors.ECODE_STATE)
9453 self.secondaries = []
9455 # mirror node verification
9456 if self.op.disk_template in constants.DTS_INT_MIRROR:
9457 if self.op.snode == pnode.name:
9458 raise errors.OpPrereqError("The secondary node cannot be the"
9459 " primary node", errors.ECODE_INVAL)
9460 _CheckNodeOnline(self, self.op.snode)
9461 _CheckNodeNotDrained(self, self.op.snode)
9462 _CheckNodeVmCapable(self, self.op.snode)
9463 self.secondaries.append(self.op.snode)
9465 snode = self.cfg.GetNodeInfo(self.op.snode)
9466 if pnode.group != snode.group:
9467 self.LogWarning("The primary and secondary nodes are in two"
9468 " different node groups; the disk parameters"
9469 " from the first disk's node group will be"
9472 nodenames = [pnode.name] + self.secondaries
9474 # disk parameters (not customizable at instance or node level)
9475 # just use the primary node parameters, ignoring the secondary.
9476 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9478 if not self.adopt_disks:
9479 # Check lv size requirements, if not adopting
9480 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9481 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9483 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9484 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9485 disk[constants.IDISK_ADOPT])
9486 for disk in self.disks])
9487 if len(all_lvs) != len(self.disks):
9488 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9490 for lv_name in all_lvs:
9492 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9493 # to ReserveLV uses the same syntax
9494 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9495 except errors.ReservationError:
9496 raise errors.OpPrereqError("LV named %s used by another instance" %
9497 lv_name, errors.ECODE_NOTUNIQUE)
9499 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9500 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9502 node_lvs = self.rpc.call_lv_list([pnode.name],
9503 vg_names.payload.keys())[pnode.name]
9504 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9505 node_lvs = node_lvs.payload
9507 delta = all_lvs.difference(node_lvs.keys())
9509 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9510 utils.CommaJoin(delta),
9512 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9514 raise errors.OpPrereqError("Online logical volumes found, cannot"
9515 " adopt: %s" % utils.CommaJoin(online_lvs),
9517 # update the size of disk based on what is found
9518 for dsk in self.disks:
9519 dsk[constants.IDISK_SIZE] = \
9520 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9521 dsk[constants.IDISK_ADOPT])][0]))
9523 elif self.op.disk_template == constants.DT_BLOCK:
9524 # Normalize and de-duplicate device paths
9525 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9526 for disk in self.disks])
9527 if len(all_disks) != len(self.disks):
9528 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9530 baddisks = [d for d in all_disks
9531 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9533 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9534 " cannot be adopted" %
9535 (", ".join(baddisks),
9536 constants.ADOPTABLE_BLOCKDEV_ROOT),
9539 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9540 list(all_disks))[pnode.name]
9541 node_disks.Raise("Cannot get block device information from node %s" %
9543 node_disks = node_disks.payload
9544 delta = all_disks.difference(node_disks.keys())
9546 raise errors.OpPrereqError("Missing block device(s): %s" %
9547 utils.CommaJoin(delta),
9549 for dsk in self.disks:
9550 dsk[constants.IDISK_SIZE] = \
9551 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9553 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9555 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9556 # check OS parameters (remotely)
9557 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9559 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9561 # memory check on primary node
9562 #TODO(dynmem): use MINMEM for checking
9564 _CheckNodeFreeMemory(self, self.pnode.name,
9565 "creating instance %s" % self.op.instance_name,
9566 self.be_full[constants.BE_MAXMEM],
9569 self.dry_run_result = list(nodenames)
9571 def Exec(self, feedback_fn):
9572 """Create and add the instance to the cluster.
9575 instance = self.op.instance_name
9576 pnode_name = self.pnode.name
9578 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9579 self.owned_locks(locking.LEVEL_NODE)), \
9580 "Node locks differ from node resource locks"
9582 ht_kind = self.op.hypervisor
9583 if ht_kind in constants.HTS_REQ_PORT:
9584 network_port = self.cfg.AllocatePort()
9588 disks = _GenerateDiskTemplate(self,
9589 self.op.disk_template,
9590 instance, pnode_name,
9593 self.instance_file_storage_dir,
9594 self.op.file_driver,
9599 iobj = objects.Instance(name=instance, os=self.op.os_type,
9600 primary_node=pnode_name,
9601 nics=self.nics, disks=disks,
9602 disk_template=self.op.disk_template,
9603 admin_state=constants.ADMINST_DOWN,
9604 network_port=network_port,
9605 beparams=self.op.beparams,
9606 hvparams=self.op.hvparams,
9607 hypervisor=self.op.hypervisor,
9608 osparams=self.op.osparams,
9612 for tag in self.op.tags:
9615 if self.adopt_disks:
9616 if self.op.disk_template == constants.DT_PLAIN:
9617 # rename LVs to the newly-generated names; we need to construct
9618 # 'fake' LV disks with the old data, plus the new unique_id
9619 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9621 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9622 rename_to.append(t_dsk.logical_id)
9623 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9624 self.cfg.SetDiskID(t_dsk, pnode_name)
9625 result = self.rpc.call_blockdev_rename(pnode_name,
9626 zip(tmp_disks, rename_to))
9627 result.Raise("Failed to rename adoped LVs")
9629 feedback_fn("* creating instance disks...")
9631 _CreateDisks(self, iobj)
9632 except errors.OpExecError:
9633 self.LogWarning("Device creation failed, reverting...")
9635 _RemoveDisks(self, iobj)
9637 self.cfg.ReleaseDRBDMinors(instance)
9640 feedback_fn("adding instance %s to cluster config" % instance)
9642 self.cfg.AddInstance(iobj, self.proc.GetECId())
9644 # Declare that we don't want to remove the instance lock anymore, as we've
9645 # added the instance to the config
9646 del self.remove_locks[locking.LEVEL_INSTANCE]
9648 if self.op.mode == constants.INSTANCE_IMPORT:
9649 # Release unused nodes
9650 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9653 _ReleaseLocks(self, locking.LEVEL_NODE)
9656 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9657 feedback_fn("* wiping instance disks...")
9659 _WipeDisks(self, iobj)
9660 except errors.OpExecError, err:
9661 logging.exception("Wiping disks failed")
9662 self.LogWarning("Wiping instance disks failed (%s)", err)
9666 # Something is already wrong with the disks, don't do anything else
9668 elif self.op.wait_for_sync:
9669 disk_abort = not _WaitForSync(self, iobj)
9670 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9671 # make sure the disks are not degraded (still sync-ing is ok)
9672 feedback_fn("* checking mirrors status")
9673 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9678 _RemoveDisks(self, iobj)
9679 self.cfg.RemoveInstance(iobj.name)
9680 # Make sure the instance lock gets removed
9681 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9682 raise errors.OpExecError("There are some degraded disks for"
9685 # Release all node resource locks
9686 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9688 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9689 if self.op.mode == constants.INSTANCE_CREATE:
9690 if not self.op.no_install:
9691 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9692 not self.op.wait_for_sync)
9694 feedback_fn("* pausing disk sync to install instance OS")
9695 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9697 for idx, success in enumerate(result.payload):
9699 logging.warn("pause-sync of instance %s for disk %d failed",
9702 feedback_fn("* running the instance OS create scripts...")
9703 # FIXME: pass debug option from opcode to backend
9705 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9706 self.op.debug_level)
9708 feedback_fn("* resuming disk sync")
9709 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9711 for idx, success in enumerate(result.payload):
9713 logging.warn("resume-sync of instance %s for disk %d failed",
9716 os_add_result.Raise("Could not add os for instance %s"
9717 " on node %s" % (instance, pnode_name))
9719 elif self.op.mode == constants.INSTANCE_IMPORT:
9720 feedback_fn("* running the instance OS import scripts...")
9724 for idx, image in enumerate(self.src_images):
9728 # FIXME: pass debug option from opcode to backend
9729 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9730 constants.IEIO_FILE, (image, ),
9731 constants.IEIO_SCRIPT,
9732 (iobj.disks[idx], idx),
9734 transfers.append(dt)
9737 masterd.instance.TransferInstanceData(self, feedback_fn,
9738 self.op.src_node, pnode_name,
9739 self.pnode.secondary_ip,
9741 if not compat.all(import_result):
9742 self.LogWarning("Some disks for instance %s on node %s were not"
9743 " imported successfully" % (instance, pnode_name))
9745 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9746 feedback_fn("* preparing remote import...")
9747 # The source cluster will stop the instance before attempting to make a
9748 # connection. In some cases stopping an instance can take a long time,
9749 # hence the shutdown timeout is added to the connection timeout.
9750 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9751 self.op.source_shutdown_timeout)
9752 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9754 assert iobj.primary_node == self.pnode.name
9756 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9757 self.source_x509_ca,
9758 self._cds, timeouts)
9759 if not compat.all(disk_results):
9760 # TODO: Should the instance still be started, even if some disks
9761 # failed to import (valid for local imports, too)?
9762 self.LogWarning("Some disks for instance %s on node %s were not"
9763 " imported successfully" % (instance, pnode_name))
9765 # Run rename script on newly imported instance
9766 assert iobj.name == instance
9767 feedback_fn("Running rename script for %s" % instance)
9768 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9769 self.source_instance_name,
9770 self.op.debug_level)
9772 self.LogWarning("Failed to run rename script for %s on node"
9773 " %s: %s" % (instance, pnode_name, result.fail_msg))
9776 # also checked in the prereq part
9777 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9780 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9783 iobj.admin_state = constants.ADMINST_UP
9784 self.cfg.Update(iobj, feedback_fn)
9785 logging.info("Starting instance %s on node %s", instance, pnode_name)
9786 feedback_fn("* starting instance...")
9787 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9789 result.Raise("Could not start instance")
9791 return list(iobj.all_nodes)
9794 class LUInstanceConsole(NoHooksLU):
9795 """Connect to an instance's console.
9797 This is somewhat special in that it returns the command line that
9798 you need to run on the master node in order to connect to the
9804 def ExpandNames(self):
9805 self.share_locks = _ShareAll()
9806 self._ExpandAndLockInstance()
9808 def CheckPrereq(self):
9809 """Check prerequisites.
9811 This checks that the instance is in the cluster.
9814 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9815 assert self.instance is not None, \
9816 "Cannot retrieve locked instance %s" % self.op.instance_name
9817 _CheckNodeOnline(self, self.instance.primary_node)
9819 def Exec(self, feedback_fn):
9820 """Connect to the console of an instance
9823 instance = self.instance
9824 node = instance.primary_node
9826 node_insts = self.rpc.call_instance_list([node],
9827 [instance.hypervisor])[node]
9828 node_insts.Raise("Can't get node information from %s" % node)
9830 if instance.name not in node_insts.payload:
9831 if instance.admin_state == constants.ADMINST_UP:
9832 state = constants.INSTST_ERRORDOWN
9833 elif instance.admin_state == constants.ADMINST_DOWN:
9834 state = constants.INSTST_ADMINDOWN
9836 state = constants.INSTST_ADMINOFFLINE
9837 raise errors.OpExecError("Instance %s is not running (state %s)" %
9838 (instance.name, state))
9840 logging.debug("Connecting to console of %s on %s", instance.name, node)
9842 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9845 def _GetInstanceConsole(cluster, instance):
9846 """Returns console information for an instance.
9848 @type cluster: L{objects.Cluster}
9849 @type instance: L{objects.Instance}
9853 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9854 # beparams and hvparams are passed separately, to avoid editing the
9855 # instance and then saving the defaults in the instance itself.
9856 hvparams = cluster.FillHV(instance)
9857 beparams = cluster.FillBE(instance)
9858 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9860 assert console.instance == instance.name
9861 assert console.Validate()
9863 return console.ToDict()
9866 class LUInstanceReplaceDisks(LogicalUnit):
9867 """Replace the disks of an instance.
9870 HPATH = "mirrors-replace"
9871 HTYPE = constants.HTYPE_INSTANCE
9874 def CheckArguments(self):
9875 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9878 def ExpandNames(self):
9879 self._ExpandAndLockInstance()
9881 assert locking.LEVEL_NODE not in self.needed_locks
9882 assert locking.LEVEL_NODE_RES not in self.needed_locks
9883 assert locking.LEVEL_NODEGROUP not in self.needed_locks
9885 assert self.op.iallocator is None or self.op.remote_node is None, \
9886 "Conflicting options"
9888 if self.op.remote_node is not None:
9889 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9891 # Warning: do not remove the locking of the new secondary here
9892 # unless DRBD8.AddChildren is changed to work in parallel;
9893 # currently it doesn't since parallel invocations of
9894 # FindUnusedMinor will conflict
9895 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9896 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9898 self.needed_locks[locking.LEVEL_NODE] = []
9899 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9901 if self.op.iallocator is not None:
9902 # iallocator will select a new node in the same group
9903 self.needed_locks[locking.LEVEL_NODEGROUP] = []
9905 self.needed_locks[locking.LEVEL_NODE_RES] = []
9907 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9908 self.op.iallocator, self.op.remote_node,
9909 self.op.disks, False, self.op.early_release)
9911 self.tasklets = [self.replacer]
9913 def DeclareLocks(self, level):
9914 if level == locking.LEVEL_NODEGROUP:
9915 assert self.op.remote_node is None
9916 assert self.op.iallocator is not None
9917 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9919 self.share_locks[locking.LEVEL_NODEGROUP] = 1
9920 # Lock all groups used by instance optimistically; this requires going
9921 # via the node before it's locked, requiring verification later on
9922 self.needed_locks[locking.LEVEL_NODEGROUP] = \
9923 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9925 elif level == locking.LEVEL_NODE:
9926 if self.op.iallocator is not None:
9927 assert self.op.remote_node is None
9928 assert not self.needed_locks[locking.LEVEL_NODE]
9930 # Lock member nodes of all locked groups
9931 self.needed_locks[locking.LEVEL_NODE] = [node_name
9932 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9933 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9935 self._LockInstancesNodes()
9936 elif level == locking.LEVEL_NODE_RES:
9938 self.needed_locks[locking.LEVEL_NODE_RES] = \
9939 self.needed_locks[locking.LEVEL_NODE]
9941 def BuildHooksEnv(self):
9944 This runs on the master, the primary and all the secondaries.
9947 instance = self.replacer.instance
9949 "MODE": self.op.mode,
9950 "NEW_SECONDARY": self.op.remote_node,
9951 "OLD_SECONDARY": instance.secondary_nodes[0],
9953 env.update(_BuildInstanceHookEnvByObject(self, instance))
9956 def BuildHooksNodes(self):
9957 """Build hooks nodes.
9960 instance = self.replacer.instance
9962 self.cfg.GetMasterNode(),
9963 instance.primary_node,
9965 if self.op.remote_node is not None:
9966 nl.append(self.op.remote_node)
9969 def CheckPrereq(self):
9970 """Check prerequisites.
9973 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9974 self.op.iallocator is None)
9976 # Verify if node group locks are still correct
9977 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9979 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9981 return LogicalUnit.CheckPrereq(self)
9984 class TLReplaceDisks(Tasklet):
9985 """Replaces disks for an instance.
9987 Note: Locking is not within the scope of this class.
9990 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9991 disks, delay_iallocator, early_release):
9992 """Initializes this class.
9995 Tasklet.__init__(self, lu)
9998 self.instance_name = instance_name
10000 self.iallocator_name = iallocator_name
10001 self.remote_node = remote_node
10003 self.delay_iallocator = delay_iallocator
10004 self.early_release = early_release
10007 self.instance = None
10008 self.new_node = None
10009 self.target_node = None
10010 self.other_node = None
10011 self.remote_node_info = None
10012 self.node_secondary_ip = None
10015 def CheckArguments(mode, remote_node, iallocator):
10016 """Helper function for users of this class.
10019 # check for valid parameter combination
10020 if mode == constants.REPLACE_DISK_CHG:
10021 if remote_node is None and iallocator is None:
10022 raise errors.OpPrereqError("When changing the secondary either an"
10023 " iallocator script must be used or the"
10024 " new node given", errors.ECODE_INVAL)
10026 if remote_node is not None and iallocator is not None:
10027 raise errors.OpPrereqError("Give either the iallocator or the new"
10028 " secondary, not both", errors.ECODE_INVAL)
10030 elif remote_node is not None or iallocator is not None:
10031 # Not replacing the secondary
10032 raise errors.OpPrereqError("The iallocator and new node options can"
10033 " only be used when changing the"
10034 " secondary node", errors.ECODE_INVAL)
10037 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10038 """Compute a new secondary node using an IAllocator.
10041 ial = IAllocator(lu.cfg, lu.rpc,
10042 mode=constants.IALLOCATOR_MODE_RELOC,
10043 name=instance_name,
10044 relocate_from=list(relocate_from))
10046 ial.Run(iallocator_name)
10048 if not ial.success:
10049 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10050 " %s" % (iallocator_name, ial.info),
10051 errors.ECODE_NORES)
10053 if len(ial.result) != ial.required_nodes:
10054 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10055 " of nodes (%s), required %s" %
10057 len(ial.result), ial.required_nodes),
10058 errors.ECODE_FAULT)
10060 remote_node_name = ial.result[0]
10062 lu.LogInfo("Selected new secondary for instance '%s': %s",
10063 instance_name, remote_node_name)
10065 return remote_node_name
10067 def _FindFaultyDisks(self, node_name):
10068 """Wrapper for L{_FindFaultyInstanceDisks}.
10071 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10074 def _CheckDisksActivated(self, instance):
10075 """Checks if the instance disks are activated.
10077 @param instance: The instance to check disks
10078 @return: True if they are activated, False otherwise
10081 nodes = instance.all_nodes
10083 for idx, dev in enumerate(instance.disks):
10085 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10086 self.cfg.SetDiskID(dev, node)
10088 result = self.rpc.call_blockdev_find(node, dev)
10092 elif result.fail_msg or not result.payload:
10097 def CheckPrereq(self):
10098 """Check prerequisites.
10100 This checks that the instance is in the cluster.
10103 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10104 assert instance is not None, \
10105 "Cannot retrieve locked instance %s" % self.instance_name
10107 if instance.disk_template != constants.DT_DRBD8:
10108 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10109 " instances", errors.ECODE_INVAL)
10111 if len(instance.secondary_nodes) != 1:
10112 raise errors.OpPrereqError("The instance has a strange layout,"
10113 " expected one secondary but found %d" %
10114 len(instance.secondary_nodes),
10115 errors.ECODE_FAULT)
10117 if not self.delay_iallocator:
10118 self._CheckPrereq2()
10120 def _CheckPrereq2(self):
10121 """Check prerequisites, second part.
10123 This function should always be part of CheckPrereq. It was separated and is
10124 now called from Exec because during node evacuation iallocator was only
10125 called with an unmodified cluster model, not taking planned changes into
10129 instance = self.instance
10130 secondary_node = instance.secondary_nodes[0]
10132 if self.iallocator_name is None:
10133 remote_node = self.remote_node
10135 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10136 instance.name, instance.secondary_nodes)
10138 if remote_node is None:
10139 self.remote_node_info = None
10141 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10142 "Remote node '%s' is not locked" % remote_node
10144 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10145 assert self.remote_node_info is not None, \
10146 "Cannot retrieve locked node %s" % remote_node
10148 if remote_node == self.instance.primary_node:
10149 raise errors.OpPrereqError("The specified node is the primary node of"
10150 " the instance", errors.ECODE_INVAL)
10152 if remote_node == secondary_node:
10153 raise errors.OpPrereqError("The specified node is already the"
10154 " secondary node of the instance",
10155 errors.ECODE_INVAL)
10157 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10158 constants.REPLACE_DISK_CHG):
10159 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10160 errors.ECODE_INVAL)
10162 if self.mode == constants.REPLACE_DISK_AUTO:
10163 if not self._CheckDisksActivated(instance):
10164 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10165 " first" % self.instance_name,
10166 errors.ECODE_STATE)
10167 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10168 faulty_secondary = self._FindFaultyDisks(secondary_node)
10170 if faulty_primary and faulty_secondary:
10171 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10172 " one node and can not be repaired"
10173 " automatically" % self.instance_name,
10174 errors.ECODE_STATE)
10177 self.disks = faulty_primary
10178 self.target_node = instance.primary_node
10179 self.other_node = secondary_node
10180 check_nodes = [self.target_node, self.other_node]
10181 elif faulty_secondary:
10182 self.disks = faulty_secondary
10183 self.target_node = secondary_node
10184 self.other_node = instance.primary_node
10185 check_nodes = [self.target_node, self.other_node]
10191 # Non-automatic modes
10192 if self.mode == constants.REPLACE_DISK_PRI:
10193 self.target_node = instance.primary_node
10194 self.other_node = secondary_node
10195 check_nodes = [self.target_node, self.other_node]
10197 elif self.mode == constants.REPLACE_DISK_SEC:
10198 self.target_node = secondary_node
10199 self.other_node = instance.primary_node
10200 check_nodes = [self.target_node, self.other_node]
10202 elif self.mode == constants.REPLACE_DISK_CHG:
10203 self.new_node = remote_node
10204 self.other_node = instance.primary_node
10205 self.target_node = secondary_node
10206 check_nodes = [self.new_node, self.other_node]
10208 _CheckNodeNotDrained(self.lu, remote_node)
10209 _CheckNodeVmCapable(self.lu, remote_node)
10211 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10212 assert old_node_info is not None
10213 if old_node_info.offline and not self.early_release:
10214 # doesn't make sense to delay the release
10215 self.early_release = True
10216 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10217 " early-release mode", secondary_node)
10220 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10223 # If not specified all disks should be replaced
10225 self.disks = range(len(self.instance.disks))
10227 # TODO: compute disk parameters
10228 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10229 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10230 if primary_node_info.group != secondary_node_info.group:
10231 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10232 " different node groups; the disk parameters of the"
10233 " primary node's group will be applied.")
10235 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10237 for node in check_nodes:
10238 _CheckNodeOnline(self.lu, node)
10240 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10243 if node_name is not None)
10245 # Release unneeded node and node resource locks
10246 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10247 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10249 # Release any owned node group
10250 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10251 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10253 # Check whether disks are valid
10254 for disk_idx in self.disks:
10255 instance.FindDisk(disk_idx)
10257 # Get secondary node IP addresses
10258 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10259 in self.cfg.GetMultiNodeInfo(touched_nodes))
10261 def Exec(self, feedback_fn):
10262 """Execute disk replacement.
10264 This dispatches the disk replacement to the appropriate handler.
10267 if self.delay_iallocator:
10268 self._CheckPrereq2()
10271 # Verify owned locks before starting operation
10272 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10273 assert set(owned_nodes) == set(self.node_secondary_ip), \
10274 ("Incorrect node locks, owning %s, expected %s" %
10275 (owned_nodes, self.node_secondary_ip.keys()))
10276 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10277 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10279 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10280 assert list(owned_instances) == [self.instance_name], \
10281 "Instance '%s' not locked" % self.instance_name
10283 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10284 "Should not own any node group lock at this point"
10287 feedback_fn("No disks need replacement")
10290 feedback_fn("Replacing disk(s) %s for %s" %
10291 (utils.CommaJoin(self.disks), self.instance.name))
10293 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10295 # Activate the instance disks if we're replacing them on a down instance
10297 _StartInstanceDisks(self.lu, self.instance, True)
10300 # Should we replace the secondary node?
10301 if self.new_node is not None:
10302 fn = self._ExecDrbd8Secondary
10304 fn = self._ExecDrbd8DiskOnly
10306 result = fn(feedback_fn)
10308 # Deactivate the instance disks if we're replacing them on a
10311 _SafeShutdownInstanceDisks(self.lu, self.instance)
10313 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10316 # Verify owned locks
10317 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10318 nodes = frozenset(self.node_secondary_ip)
10319 assert ((self.early_release and not owned_nodes) or
10320 (not self.early_release and not (set(owned_nodes) - nodes))), \
10321 ("Not owning the correct locks, early_release=%s, owned=%r,"
10322 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10326 def _CheckVolumeGroup(self, nodes):
10327 self.lu.LogInfo("Checking volume groups")
10329 vgname = self.cfg.GetVGName()
10331 # Make sure volume group exists on all involved nodes
10332 results = self.rpc.call_vg_list(nodes)
10334 raise errors.OpExecError("Can't list volume groups on the nodes")
10337 res = results[node]
10338 res.Raise("Error checking node %s" % node)
10339 if vgname not in res.payload:
10340 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10343 def _CheckDisksExistence(self, nodes):
10344 # Check disk existence
10345 for idx, dev in enumerate(self.instance.disks):
10346 if idx not in self.disks:
10350 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10351 self.cfg.SetDiskID(dev, node)
10353 result = self.rpc.call_blockdev_find(node, dev)
10355 msg = result.fail_msg
10356 if msg or not result.payload:
10358 msg = "disk not found"
10359 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10362 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10363 for idx, dev in enumerate(self.instance.disks):
10364 if idx not in self.disks:
10367 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10370 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10372 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10373 " replace disks for instance %s" %
10374 (node_name, self.instance.name))
10376 def _CreateNewStorage(self, node_name):
10377 """Create new storage on the primary or secondary node.
10379 This is only used for same-node replaces, not for changing the
10380 secondary node, hence we don't want to modify the existing disk.
10385 for idx, dev in enumerate(self.instance.disks):
10386 if idx not in self.disks:
10389 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10391 self.cfg.SetDiskID(dev, node_name)
10393 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10394 names = _GenerateUniqueNames(self.lu, lv_names)
10396 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10398 vg_data = dev.children[0].logical_id[0]
10399 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10400 logical_id=(vg_data, names[0]), params=data_p)
10401 vg_meta = dev.children[1].logical_id[0]
10402 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10403 logical_id=(vg_meta, names[1]), params=meta_p)
10405 new_lvs = [lv_data, lv_meta]
10406 old_lvs = [child.Copy() for child in dev.children]
10407 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10409 # we pass force_create=True to force the LVM creation
10410 for new_lv in new_lvs:
10411 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10412 _GetInstanceInfoText(self.instance), False)
10416 def _CheckDevices(self, node_name, iv_names):
10417 for name, (dev, _, _) in iv_names.iteritems():
10418 self.cfg.SetDiskID(dev, node_name)
10420 result = self.rpc.call_blockdev_find(node_name, dev)
10422 msg = result.fail_msg
10423 if msg or not result.payload:
10425 msg = "disk not found"
10426 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10429 if result.payload.is_degraded:
10430 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10432 def _RemoveOldStorage(self, node_name, iv_names):
10433 for name, (_, old_lvs, _) in iv_names.iteritems():
10434 self.lu.LogInfo("Remove logical volumes for %s" % name)
10437 self.cfg.SetDiskID(lv, node_name)
10439 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10441 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10442 hint="remove unused LVs manually")
10444 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10445 """Replace a disk on the primary or secondary for DRBD 8.
10447 The algorithm for replace is quite complicated:
10449 1. for each disk to be replaced:
10451 1. create new LVs on the target node with unique names
10452 1. detach old LVs from the drbd device
10453 1. rename old LVs to name_replaced.<time_t>
10454 1. rename new LVs to old LVs
10455 1. attach the new LVs (with the old names now) to the drbd device
10457 1. wait for sync across all devices
10459 1. for each modified disk:
10461 1. remove old LVs (which have the name name_replaces.<time_t>)
10463 Failures are not very well handled.
10468 # Step: check device activation
10469 self.lu.LogStep(1, steps_total, "Check device existence")
10470 self._CheckDisksExistence([self.other_node, self.target_node])
10471 self._CheckVolumeGroup([self.target_node, self.other_node])
10473 # Step: check other node consistency
10474 self.lu.LogStep(2, steps_total, "Check peer consistency")
10475 self._CheckDisksConsistency(self.other_node,
10476 self.other_node == self.instance.primary_node,
10479 # Step: create new storage
10480 self.lu.LogStep(3, steps_total, "Allocate new storage")
10481 iv_names = self._CreateNewStorage(self.target_node)
10483 # Step: for each lv, detach+rename*2+attach
10484 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10485 for dev, old_lvs, new_lvs in iv_names.itervalues():
10486 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10488 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10490 result.Raise("Can't detach drbd from local storage on node"
10491 " %s for device %s" % (self.target_node, dev.iv_name))
10493 #cfg.Update(instance)
10495 # ok, we created the new LVs, so now we know we have the needed
10496 # storage; as such, we proceed on the target node to rename
10497 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10498 # using the assumption that logical_id == physical_id (which in
10499 # turn is the unique_id on that node)
10501 # FIXME(iustin): use a better name for the replaced LVs
10502 temp_suffix = int(time.time())
10503 ren_fn = lambda d, suff: (d.physical_id[0],
10504 d.physical_id[1] + "_replaced-%s" % suff)
10506 # Build the rename list based on what LVs exist on the node
10507 rename_old_to_new = []
10508 for to_ren in old_lvs:
10509 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10510 if not result.fail_msg and result.payload:
10512 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10514 self.lu.LogInfo("Renaming the old LVs on the target node")
10515 result = self.rpc.call_blockdev_rename(self.target_node,
10517 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10519 # Now we rename the new LVs to the old LVs
10520 self.lu.LogInfo("Renaming the new LVs on the target node")
10521 rename_new_to_old = [(new, old.physical_id)
10522 for old, new in zip(old_lvs, new_lvs)]
10523 result = self.rpc.call_blockdev_rename(self.target_node,
10525 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10527 # Intermediate steps of in memory modifications
10528 for old, new in zip(old_lvs, new_lvs):
10529 new.logical_id = old.logical_id
10530 self.cfg.SetDiskID(new, self.target_node)
10532 # We need to modify old_lvs so that removal later removes the
10533 # right LVs, not the newly added ones; note that old_lvs is a
10535 for disk in old_lvs:
10536 disk.logical_id = ren_fn(disk, temp_suffix)
10537 self.cfg.SetDiskID(disk, self.target_node)
10539 # Now that the new lvs have the old name, we can add them to the device
10540 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10541 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10543 msg = result.fail_msg
10545 for new_lv in new_lvs:
10546 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10549 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10550 hint=("cleanup manually the unused logical"
10552 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10554 cstep = itertools.count(5)
10556 if self.early_release:
10557 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10558 self._RemoveOldStorage(self.target_node, iv_names)
10559 # TODO: Check if releasing locks early still makes sense
10560 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10562 # Release all resource locks except those used by the instance
10563 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10564 keep=self.node_secondary_ip.keys())
10566 # Release all node locks while waiting for sync
10567 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10569 # TODO: Can the instance lock be downgraded here? Take the optional disk
10570 # shutdown in the caller into consideration.
10573 # This can fail as the old devices are degraded and _WaitForSync
10574 # does a combined result over all disks, so we don't check its return value
10575 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10576 _WaitForSync(self.lu, self.instance)
10578 # Check all devices manually
10579 self._CheckDevices(self.instance.primary_node, iv_names)
10581 # Step: remove old storage
10582 if not self.early_release:
10583 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10584 self._RemoveOldStorage(self.target_node, iv_names)
10586 def _ExecDrbd8Secondary(self, feedback_fn):
10587 """Replace the secondary node for DRBD 8.
10589 The algorithm for replace is quite complicated:
10590 - for all disks of the instance:
10591 - create new LVs on the new node with same names
10592 - shutdown the drbd device on the old secondary
10593 - disconnect the drbd network on the primary
10594 - create the drbd device on the new secondary
10595 - network attach the drbd on the primary, using an artifice:
10596 the drbd code for Attach() will connect to the network if it
10597 finds a device which is connected to the good local disks but
10598 not network enabled
10599 - wait for sync across all devices
10600 - remove all disks from the old secondary
10602 Failures are not very well handled.
10607 pnode = self.instance.primary_node
10609 # Step: check device activation
10610 self.lu.LogStep(1, steps_total, "Check device existence")
10611 self._CheckDisksExistence([self.instance.primary_node])
10612 self._CheckVolumeGroup([self.instance.primary_node])
10614 # Step: check other node consistency
10615 self.lu.LogStep(2, steps_total, "Check peer consistency")
10616 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10618 # Step: create new storage
10619 self.lu.LogStep(3, steps_total, "Allocate new storage")
10620 for idx, dev in enumerate(self.instance.disks):
10621 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10622 (self.new_node, idx))
10623 # we pass force_create=True to force LVM creation
10624 for new_lv in dev.children:
10625 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10626 _GetInstanceInfoText(self.instance), False)
10628 # Step 4: dbrd minors and drbd setups changes
10629 # after this, we must manually remove the drbd minors on both the
10630 # error and the success paths
10631 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10632 minors = self.cfg.AllocateDRBDMinor([self.new_node
10633 for dev in self.instance.disks],
10634 self.instance.name)
10635 logging.debug("Allocated minors %r", minors)
10638 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10639 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10640 (self.new_node, idx))
10641 # create new devices on new_node; note that we create two IDs:
10642 # one without port, so the drbd will be activated without
10643 # networking information on the new node at this stage, and one
10644 # with network, for the latter activation in step 4
10645 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10646 if self.instance.primary_node == o_node1:
10649 assert self.instance.primary_node == o_node2, "Three-node instance?"
10652 new_alone_id = (self.instance.primary_node, self.new_node, None,
10653 p_minor, new_minor, o_secret)
10654 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10655 p_minor, new_minor, o_secret)
10657 iv_names[idx] = (dev, dev.children, new_net_id)
10658 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10660 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10661 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10662 logical_id=new_alone_id,
10663 children=dev.children,
10665 params=drbd_params)
10667 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10668 _GetInstanceInfoText(self.instance), False)
10669 except errors.GenericError:
10670 self.cfg.ReleaseDRBDMinors(self.instance.name)
10673 # We have new devices, shutdown the drbd on the old secondary
10674 for idx, dev in enumerate(self.instance.disks):
10675 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10676 self.cfg.SetDiskID(dev, self.target_node)
10677 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10679 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10680 "node: %s" % (idx, msg),
10681 hint=("Please cleanup this device manually as"
10682 " soon as possible"))
10684 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10685 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10686 self.instance.disks)[pnode]
10688 msg = result.fail_msg
10690 # detaches didn't succeed (unlikely)
10691 self.cfg.ReleaseDRBDMinors(self.instance.name)
10692 raise errors.OpExecError("Can't detach the disks from the network on"
10693 " old node: %s" % (msg,))
10695 # if we managed to detach at least one, we update all the disks of
10696 # the instance to point to the new secondary
10697 self.lu.LogInfo("Updating instance configuration")
10698 for dev, _, new_logical_id in iv_names.itervalues():
10699 dev.logical_id = new_logical_id
10700 self.cfg.SetDiskID(dev, self.instance.primary_node)
10702 self.cfg.Update(self.instance, feedback_fn)
10704 # Release all node locks (the configuration has been updated)
10705 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10707 # and now perform the drbd attach
10708 self.lu.LogInfo("Attaching primary drbds to new secondary"
10709 " (standalone => connected)")
10710 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10712 self.node_secondary_ip,
10713 self.instance.disks,
10714 self.instance.name,
10716 for to_node, to_result in result.items():
10717 msg = to_result.fail_msg
10719 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10721 hint=("please do a gnt-instance info to see the"
10722 " status of disks"))
10724 cstep = itertools.count(5)
10726 if self.early_release:
10727 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10728 self._RemoveOldStorage(self.target_node, iv_names)
10729 # TODO: Check if releasing locks early still makes sense
10730 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10732 # Release all resource locks except those used by the instance
10733 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10734 keep=self.node_secondary_ip.keys())
10736 # TODO: Can the instance lock be downgraded here? Take the optional disk
10737 # shutdown in the caller into consideration.
10740 # This can fail as the old devices are degraded and _WaitForSync
10741 # does a combined result over all disks, so we don't check its return value
10742 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10743 _WaitForSync(self.lu, self.instance)
10745 # Check all devices manually
10746 self._CheckDevices(self.instance.primary_node, iv_names)
10748 # Step: remove old storage
10749 if not self.early_release:
10750 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10751 self._RemoveOldStorage(self.target_node, iv_names)
10754 class LURepairNodeStorage(NoHooksLU):
10755 """Repairs the volume group on a node.
10760 def CheckArguments(self):
10761 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10763 storage_type = self.op.storage_type
10765 if (constants.SO_FIX_CONSISTENCY not in
10766 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10767 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10768 " repaired" % storage_type,
10769 errors.ECODE_INVAL)
10771 def ExpandNames(self):
10772 self.needed_locks = {
10773 locking.LEVEL_NODE: [self.op.node_name],
10776 def _CheckFaultyDisks(self, instance, node_name):
10777 """Ensure faulty disks abort the opcode or at least warn."""
10779 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10781 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10782 " node '%s'" % (instance.name, node_name),
10783 errors.ECODE_STATE)
10784 except errors.OpPrereqError, err:
10785 if self.op.ignore_consistency:
10786 self.proc.LogWarning(str(err.args[0]))
10790 def CheckPrereq(self):
10791 """Check prerequisites.
10794 # Check whether any instance on this node has faulty disks
10795 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10796 if inst.admin_state != constants.ADMINST_UP:
10798 check_nodes = set(inst.all_nodes)
10799 check_nodes.discard(self.op.node_name)
10800 for inst_node_name in check_nodes:
10801 self._CheckFaultyDisks(inst, inst_node_name)
10803 def Exec(self, feedback_fn):
10804 feedback_fn("Repairing storage unit '%s' on %s ..." %
10805 (self.op.name, self.op.node_name))
10807 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10808 result = self.rpc.call_storage_execute(self.op.node_name,
10809 self.op.storage_type, st_args,
10811 constants.SO_FIX_CONSISTENCY)
10812 result.Raise("Failed to repair storage unit '%s' on %s" %
10813 (self.op.name, self.op.node_name))
10816 class LUNodeEvacuate(NoHooksLU):
10817 """Evacuates instances off a list of nodes.
10822 _MODE2IALLOCATOR = {
10823 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10824 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10825 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10827 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10828 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10829 constants.IALLOCATOR_NEVAC_MODES)
10831 def CheckArguments(self):
10832 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10834 def ExpandNames(self):
10835 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10837 if self.op.remote_node is not None:
10838 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10839 assert self.op.remote_node
10841 if self.op.remote_node == self.op.node_name:
10842 raise errors.OpPrereqError("Can not use evacuated node as a new"
10843 " secondary node", errors.ECODE_INVAL)
10845 if self.op.mode != constants.NODE_EVAC_SEC:
10846 raise errors.OpPrereqError("Without the use of an iallocator only"
10847 " secondary instances can be evacuated",
10848 errors.ECODE_INVAL)
10851 self.share_locks = _ShareAll()
10852 self.needed_locks = {
10853 locking.LEVEL_INSTANCE: [],
10854 locking.LEVEL_NODEGROUP: [],
10855 locking.LEVEL_NODE: [],
10858 # Determine nodes (via group) optimistically, needs verification once locks
10859 # have been acquired
10860 self.lock_nodes = self._DetermineNodes()
10862 def _DetermineNodes(self):
10863 """Gets the list of nodes to operate on.
10866 if self.op.remote_node is None:
10867 # Iallocator will choose any node(s) in the same group
10868 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10870 group_nodes = frozenset([self.op.remote_node])
10872 # Determine nodes to be locked
10873 return set([self.op.node_name]) | group_nodes
10875 def _DetermineInstances(self):
10876 """Builds list of instances to operate on.
10879 assert self.op.mode in constants.NODE_EVAC_MODES
10881 if self.op.mode == constants.NODE_EVAC_PRI:
10882 # Primary instances only
10883 inst_fn = _GetNodePrimaryInstances
10884 assert self.op.remote_node is None, \
10885 "Evacuating primary instances requires iallocator"
10886 elif self.op.mode == constants.NODE_EVAC_SEC:
10887 # Secondary instances only
10888 inst_fn = _GetNodeSecondaryInstances
10891 assert self.op.mode == constants.NODE_EVAC_ALL
10892 inst_fn = _GetNodeInstances
10893 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10895 raise errors.OpPrereqError("Due to an issue with the iallocator"
10896 " interface it is not possible to evacuate"
10897 " all instances at once; specify explicitly"
10898 " whether to evacuate primary or secondary"
10900 errors.ECODE_INVAL)
10902 return inst_fn(self.cfg, self.op.node_name)
10904 def DeclareLocks(self, level):
10905 if level == locking.LEVEL_INSTANCE:
10906 # Lock instances optimistically, needs verification once node and group
10907 # locks have been acquired
10908 self.needed_locks[locking.LEVEL_INSTANCE] = \
10909 set(i.name for i in self._DetermineInstances())
10911 elif level == locking.LEVEL_NODEGROUP:
10912 # Lock node groups for all potential target nodes optimistically, needs
10913 # verification once nodes have been acquired
10914 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10915 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10917 elif level == locking.LEVEL_NODE:
10918 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10920 def CheckPrereq(self):
10922 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10923 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10924 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10926 need_nodes = self._DetermineNodes()
10928 if not owned_nodes.issuperset(need_nodes):
10929 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10930 " locks were acquired, current nodes are"
10931 " are '%s', used to be '%s'; retry the"
10933 (self.op.node_name,
10934 utils.CommaJoin(need_nodes),
10935 utils.CommaJoin(owned_nodes)),
10936 errors.ECODE_STATE)
10938 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10939 if owned_groups != wanted_groups:
10940 raise errors.OpExecError("Node groups changed since locks were acquired,"
10941 " current groups are '%s', used to be '%s';"
10942 " retry the operation" %
10943 (utils.CommaJoin(wanted_groups),
10944 utils.CommaJoin(owned_groups)))
10946 # Determine affected instances
10947 self.instances = self._DetermineInstances()
10948 self.instance_names = [i.name for i in self.instances]
10950 if set(self.instance_names) != owned_instances:
10951 raise errors.OpExecError("Instances on node '%s' changed since locks"
10952 " were acquired, current instances are '%s',"
10953 " used to be '%s'; retry the operation" %
10954 (self.op.node_name,
10955 utils.CommaJoin(self.instance_names),
10956 utils.CommaJoin(owned_instances)))
10958 if self.instance_names:
10959 self.LogInfo("Evacuating instances from node '%s': %s",
10961 utils.CommaJoin(utils.NiceSort(self.instance_names)))
10963 self.LogInfo("No instances to evacuate from node '%s'",
10966 if self.op.remote_node is not None:
10967 for i in self.instances:
10968 if i.primary_node == self.op.remote_node:
10969 raise errors.OpPrereqError("Node %s is the primary node of"
10970 " instance %s, cannot use it as"
10972 (self.op.remote_node, i.name),
10973 errors.ECODE_INVAL)
10975 def Exec(self, feedback_fn):
10976 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10978 if not self.instance_names:
10979 # No instances to evacuate
10982 elif self.op.iallocator is not None:
10983 # TODO: Implement relocation to other group
10984 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10985 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10986 instances=list(self.instance_names))
10988 ial.Run(self.op.iallocator)
10990 if not ial.success:
10991 raise errors.OpPrereqError("Can't compute node evacuation using"
10992 " iallocator '%s': %s" %
10993 (self.op.iallocator, ial.info),
10994 errors.ECODE_NORES)
10996 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10998 elif self.op.remote_node is not None:
10999 assert self.op.mode == constants.NODE_EVAC_SEC
11001 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11002 remote_node=self.op.remote_node,
11004 mode=constants.REPLACE_DISK_CHG,
11005 early_release=self.op.early_release)]
11006 for instance_name in self.instance_names
11010 raise errors.ProgrammerError("No iallocator or remote node")
11012 return ResultWithJobs(jobs)
11015 def _SetOpEarlyRelease(early_release, op):
11016 """Sets C{early_release} flag on opcodes if available.
11020 op.early_release = early_release
11021 except AttributeError:
11022 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11027 def _NodeEvacDest(use_nodes, group, nodes):
11028 """Returns group or nodes depending on caller's choice.
11032 return utils.CommaJoin(nodes)
11037 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11038 """Unpacks the result of change-group and node-evacuate iallocator requests.
11040 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11041 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11043 @type lu: L{LogicalUnit}
11044 @param lu: Logical unit instance
11045 @type alloc_result: tuple/list
11046 @param alloc_result: Result from iallocator
11047 @type early_release: bool
11048 @param early_release: Whether to release locks early if possible
11049 @type use_nodes: bool
11050 @param use_nodes: Whether to display node names instead of groups
11053 (moved, failed, jobs) = alloc_result
11056 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11057 for (name, reason) in failed)
11058 lu.LogWarning("Unable to evacuate instances %s", failreason)
11059 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11062 lu.LogInfo("Instances to be moved: %s",
11063 utils.CommaJoin("%s (to %s)" %
11064 (name, _NodeEvacDest(use_nodes, group, nodes))
11065 for (name, group, nodes) in moved))
11067 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11068 map(opcodes.OpCode.LoadOpCode, ops))
11072 class LUInstanceGrowDisk(LogicalUnit):
11073 """Grow a disk of an instance.
11076 HPATH = "disk-grow"
11077 HTYPE = constants.HTYPE_INSTANCE
11080 def ExpandNames(self):
11081 self._ExpandAndLockInstance()
11082 self.needed_locks[locking.LEVEL_NODE] = []
11083 self.needed_locks[locking.LEVEL_NODE_RES] = []
11084 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11086 def DeclareLocks(self, level):
11087 if level == locking.LEVEL_NODE:
11088 self._LockInstancesNodes()
11089 elif level == locking.LEVEL_NODE_RES:
11091 self.needed_locks[locking.LEVEL_NODE_RES] = \
11092 self.needed_locks[locking.LEVEL_NODE][:]
11094 def BuildHooksEnv(self):
11095 """Build hooks env.
11097 This runs on the master, the primary and all the secondaries.
11101 "DISK": self.op.disk,
11102 "AMOUNT": self.op.amount,
11104 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11107 def BuildHooksNodes(self):
11108 """Build hooks nodes.
11111 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11114 def CheckPrereq(self):
11115 """Check prerequisites.
11117 This checks that the instance is in the cluster.
11120 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11121 assert instance is not None, \
11122 "Cannot retrieve locked instance %s" % self.op.instance_name
11123 nodenames = list(instance.all_nodes)
11124 for node in nodenames:
11125 _CheckNodeOnline(self, node)
11127 self.instance = instance
11129 if instance.disk_template not in constants.DTS_GROWABLE:
11130 raise errors.OpPrereqError("Instance's disk layout does not support"
11131 " growing", errors.ECODE_INVAL)
11133 self.disk = instance.FindDisk(self.op.disk)
11135 if instance.disk_template not in (constants.DT_FILE,
11136 constants.DT_SHARED_FILE):
11137 # TODO: check the free disk space for file, when that feature will be
11139 _CheckNodesFreeDiskPerVG(self, nodenames,
11140 self.disk.ComputeGrowth(self.op.amount))
11142 def Exec(self, feedback_fn):
11143 """Execute disk grow.
11146 instance = self.instance
11149 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11150 assert (self.owned_locks(locking.LEVEL_NODE) ==
11151 self.owned_locks(locking.LEVEL_NODE_RES))
11153 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11155 raise errors.OpExecError("Cannot activate block device to grow")
11157 feedback_fn("Growing disk %s of instance '%s' by %s" %
11158 (self.op.disk, instance.name,
11159 utils.FormatUnit(self.op.amount, "h")))
11161 # First run all grow ops in dry-run mode
11162 for node in instance.all_nodes:
11163 self.cfg.SetDiskID(disk, node)
11164 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11165 result.Raise("Grow request failed to node %s" % node)
11167 # We know that (as far as we can test) operations across different
11168 # nodes will succeed, time to run it for real
11169 for node in instance.all_nodes:
11170 self.cfg.SetDiskID(disk, node)
11171 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11172 result.Raise("Grow request failed to node %s" % node)
11174 # TODO: Rewrite code to work properly
11175 # DRBD goes into sync mode for a short amount of time after executing the
11176 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11177 # calling "resize" in sync mode fails. Sleeping for a short amount of
11178 # time is a work-around.
11181 disk.RecordGrow(self.op.amount)
11182 self.cfg.Update(instance, feedback_fn)
11184 # Changes have been recorded, release node lock
11185 _ReleaseLocks(self, locking.LEVEL_NODE)
11187 # Downgrade lock while waiting for sync
11188 self.glm.downgrade(locking.LEVEL_INSTANCE)
11190 if self.op.wait_for_sync:
11191 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11193 self.proc.LogWarning("Disk sync-ing has not returned a good"
11194 " status; please check the instance")
11195 if instance.admin_state != constants.ADMINST_UP:
11196 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11197 elif instance.admin_state != constants.ADMINST_UP:
11198 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11199 " not supposed to be running because no wait for"
11200 " sync mode was requested")
11202 assert self.owned_locks(locking.LEVEL_NODE_RES)
11203 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11206 class LUInstanceQueryData(NoHooksLU):
11207 """Query runtime instance data.
11212 def ExpandNames(self):
11213 self.needed_locks = {}
11215 # Use locking if requested or when non-static information is wanted
11216 if not (self.op.static or self.op.use_locking):
11217 self.LogWarning("Non-static data requested, locks need to be acquired")
11218 self.op.use_locking = True
11220 if self.op.instances or not self.op.use_locking:
11221 # Expand instance names right here
11222 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11224 # Will use acquired locks
11225 self.wanted_names = None
11227 if self.op.use_locking:
11228 self.share_locks = _ShareAll()
11230 if self.wanted_names is None:
11231 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11233 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11235 self.needed_locks[locking.LEVEL_NODE] = []
11236 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11238 def DeclareLocks(self, level):
11239 if self.op.use_locking and level == locking.LEVEL_NODE:
11240 self._LockInstancesNodes()
11242 def CheckPrereq(self):
11243 """Check prerequisites.
11245 This only checks the optional instance list against the existing names.
11248 if self.wanted_names is None:
11249 assert self.op.use_locking, "Locking was not used"
11250 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11252 self.wanted_instances = \
11253 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11255 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11256 """Returns the status of a block device
11259 if self.op.static or not node:
11262 self.cfg.SetDiskID(dev, node)
11264 result = self.rpc.call_blockdev_find(node, dev)
11268 result.Raise("Can't compute disk status for %s" % instance_name)
11270 status = result.payload
11274 return (status.dev_path, status.major, status.minor,
11275 status.sync_percent, status.estimated_time,
11276 status.is_degraded, status.ldisk_status)
11278 def _ComputeDiskStatus(self, instance, snode, dev):
11279 """Compute block device status.
11282 if dev.dev_type in constants.LDS_DRBD:
11283 # we change the snode then (otherwise we use the one passed in)
11284 if dev.logical_id[0] == instance.primary_node:
11285 snode = dev.logical_id[1]
11287 snode = dev.logical_id[0]
11289 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11290 instance.name, dev)
11291 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11294 dev_children = map(compat.partial(self._ComputeDiskStatus,
11301 "iv_name": dev.iv_name,
11302 "dev_type": dev.dev_type,
11303 "logical_id": dev.logical_id,
11304 "physical_id": dev.physical_id,
11305 "pstatus": dev_pstatus,
11306 "sstatus": dev_sstatus,
11307 "children": dev_children,
11312 def Exec(self, feedback_fn):
11313 """Gather and return data"""
11316 cluster = self.cfg.GetClusterInfo()
11318 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11319 for i in self.wanted_instances)
11320 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11321 if self.op.static or pnode.offline:
11322 remote_state = None
11324 self.LogWarning("Primary node %s is marked offline, returning static"
11325 " information only for instance %s" %
11326 (pnode.name, instance.name))
11328 remote_info = self.rpc.call_instance_info(instance.primary_node,
11330 instance.hypervisor)
11331 remote_info.Raise("Error checking node %s" % instance.primary_node)
11332 remote_info = remote_info.payload
11333 if remote_info and "state" in remote_info:
11334 remote_state = "up"
11336 if instance.admin_state == constants.ADMINST_UP:
11337 remote_state = "down"
11339 remote_state = instance.admin_state
11341 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11344 result[instance.name] = {
11345 "name": instance.name,
11346 "config_state": instance.admin_state,
11347 "run_state": remote_state,
11348 "pnode": instance.primary_node,
11349 "snodes": instance.secondary_nodes,
11351 # this happens to be the same format used for hooks
11352 "nics": _NICListToTuple(self, instance.nics),
11353 "disk_template": instance.disk_template,
11355 "hypervisor": instance.hypervisor,
11356 "network_port": instance.network_port,
11357 "hv_instance": instance.hvparams,
11358 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11359 "be_instance": instance.beparams,
11360 "be_actual": cluster.FillBE(instance),
11361 "os_instance": instance.osparams,
11362 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11363 "serial_no": instance.serial_no,
11364 "mtime": instance.mtime,
11365 "ctime": instance.ctime,
11366 "uuid": instance.uuid,
11372 class LUInstanceSetParams(LogicalUnit):
11373 """Modifies an instances's parameters.
11376 HPATH = "instance-modify"
11377 HTYPE = constants.HTYPE_INSTANCE
11380 def CheckArguments(self):
11381 if not (self.op.nics or self.op.disks or self.op.disk_template or
11382 self.op.hvparams or self.op.beparams or self.op.os_name or
11383 self.op.online_inst or self.op.offline_inst):
11384 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11386 if self.op.hvparams:
11387 _CheckGlobalHvParams(self.op.hvparams)
11391 for disk_op, disk_dict in self.op.disks:
11392 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11393 if disk_op == constants.DDM_REMOVE:
11394 disk_addremove += 1
11396 elif disk_op == constants.DDM_ADD:
11397 disk_addremove += 1
11399 if not isinstance(disk_op, int):
11400 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11401 if not isinstance(disk_dict, dict):
11402 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11403 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11405 if disk_op == constants.DDM_ADD:
11406 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11407 if mode not in constants.DISK_ACCESS_SET:
11408 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11409 errors.ECODE_INVAL)
11410 size = disk_dict.get(constants.IDISK_SIZE, None)
11412 raise errors.OpPrereqError("Required disk parameter size missing",
11413 errors.ECODE_INVAL)
11416 except (TypeError, ValueError), err:
11417 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11418 str(err), errors.ECODE_INVAL)
11419 disk_dict[constants.IDISK_SIZE] = size
11421 # modification of disk
11422 if constants.IDISK_SIZE in disk_dict:
11423 raise errors.OpPrereqError("Disk size change not possible, use"
11424 " grow-disk", errors.ECODE_INVAL)
11426 if disk_addremove > 1:
11427 raise errors.OpPrereqError("Only one disk add or remove operation"
11428 " supported at a time", errors.ECODE_INVAL)
11430 if self.op.disks and self.op.disk_template is not None:
11431 raise errors.OpPrereqError("Disk template conversion and other disk"
11432 " changes not supported at the same time",
11433 errors.ECODE_INVAL)
11435 if (self.op.disk_template and
11436 self.op.disk_template in constants.DTS_INT_MIRROR and
11437 self.op.remote_node is None):
11438 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11439 " one requires specifying a secondary node",
11440 errors.ECODE_INVAL)
11444 for nic_op, nic_dict in self.op.nics:
11445 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11446 if nic_op == constants.DDM_REMOVE:
11449 elif nic_op == constants.DDM_ADD:
11452 if not isinstance(nic_op, int):
11453 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11454 if not isinstance(nic_dict, dict):
11455 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11456 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11458 # nic_dict should be a dict
11459 nic_ip = nic_dict.get(constants.INIC_IP, None)
11460 if nic_ip is not None:
11461 if nic_ip.lower() == constants.VALUE_NONE:
11462 nic_dict[constants.INIC_IP] = None
11464 if not netutils.IPAddress.IsValid(nic_ip):
11465 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11466 errors.ECODE_INVAL)
11468 nic_bridge = nic_dict.get("bridge", None)
11469 nic_link = nic_dict.get(constants.INIC_LINK, None)
11470 if nic_bridge and nic_link:
11471 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11472 " at the same time", errors.ECODE_INVAL)
11473 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11474 nic_dict["bridge"] = None
11475 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11476 nic_dict[constants.INIC_LINK] = None
11478 if nic_op == constants.DDM_ADD:
11479 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11480 if nic_mac is None:
11481 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11483 if constants.INIC_MAC in nic_dict:
11484 nic_mac = nic_dict[constants.INIC_MAC]
11485 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11486 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11488 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11489 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11490 " modifying an existing nic",
11491 errors.ECODE_INVAL)
11493 if nic_addremove > 1:
11494 raise errors.OpPrereqError("Only one NIC add or remove operation"
11495 " supported at a time", errors.ECODE_INVAL)
11497 def ExpandNames(self):
11498 self._ExpandAndLockInstance()
11499 # Can't even acquire node locks in shared mode as upcoming changes in
11500 # Ganeti 2.6 will start to modify the node object on disk conversion
11501 self.needed_locks[locking.LEVEL_NODE] = []
11502 self.needed_locks[locking.LEVEL_NODE_RES] = []
11503 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11505 def DeclareLocks(self, level):
11506 if level == locking.LEVEL_NODE:
11507 self._LockInstancesNodes()
11508 if self.op.disk_template and self.op.remote_node:
11509 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11510 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11511 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11513 self.needed_locks[locking.LEVEL_NODE_RES] = \
11514 self.needed_locks[locking.LEVEL_NODE][:]
11516 def BuildHooksEnv(self):
11517 """Build hooks env.
11519 This runs on the master, primary and secondaries.
11523 if constants.BE_MINMEM in self.be_new:
11524 args["minmem"] = self.be_new[constants.BE_MINMEM]
11525 if constants.BE_MAXMEM in self.be_new:
11526 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11527 if constants.BE_VCPUS in self.be_new:
11528 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11529 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11530 # information at all.
11533 nic_override = dict(self.op.nics)
11534 for idx, nic in enumerate(self.instance.nics):
11535 if idx in nic_override:
11536 this_nic_override = nic_override[idx]
11538 this_nic_override = {}
11539 if constants.INIC_IP in this_nic_override:
11540 ip = this_nic_override[constants.INIC_IP]
11543 if constants.INIC_MAC in this_nic_override:
11544 mac = this_nic_override[constants.INIC_MAC]
11547 if idx in self.nic_pnew:
11548 nicparams = self.nic_pnew[idx]
11550 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11551 mode = nicparams[constants.NIC_MODE]
11552 link = nicparams[constants.NIC_LINK]
11553 args["nics"].append((ip, mac, mode, link))
11554 if constants.DDM_ADD in nic_override:
11555 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11556 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11557 nicparams = self.nic_pnew[constants.DDM_ADD]
11558 mode = nicparams[constants.NIC_MODE]
11559 link = nicparams[constants.NIC_LINK]
11560 args["nics"].append((ip, mac, mode, link))
11561 elif constants.DDM_REMOVE in nic_override:
11562 del args["nics"][-1]
11564 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11565 if self.op.disk_template:
11566 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11570 def BuildHooksNodes(self):
11571 """Build hooks nodes.
11574 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11577 def CheckPrereq(self):
11578 """Check prerequisites.
11580 This only checks the instance list against the existing names.
11583 # checking the new params on the primary/secondary nodes
11585 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11586 cluster = self.cluster = self.cfg.GetClusterInfo()
11587 assert self.instance is not None, \
11588 "Cannot retrieve locked instance %s" % self.op.instance_name
11589 pnode = instance.primary_node
11590 nodelist = list(instance.all_nodes)
11591 pnode_info = self.cfg.GetNodeInfo(pnode)
11592 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11595 if self.op.os_name and not self.op.force:
11596 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11597 self.op.force_variant)
11598 instance_os = self.op.os_name
11600 instance_os = instance.os
11602 if self.op.disk_template:
11603 if instance.disk_template == self.op.disk_template:
11604 raise errors.OpPrereqError("Instance already has disk template %s" %
11605 instance.disk_template, errors.ECODE_INVAL)
11607 if (instance.disk_template,
11608 self.op.disk_template) not in self._DISK_CONVERSIONS:
11609 raise errors.OpPrereqError("Unsupported disk template conversion from"
11610 " %s to %s" % (instance.disk_template,
11611 self.op.disk_template),
11612 errors.ECODE_INVAL)
11613 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11614 msg="cannot change disk template")
11615 if self.op.disk_template in constants.DTS_INT_MIRROR:
11616 if self.op.remote_node == pnode:
11617 raise errors.OpPrereqError("Given new secondary node %s is the same"
11618 " as the primary node of the instance" %
11619 self.op.remote_node, errors.ECODE_STATE)
11620 _CheckNodeOnline(self, self.op.remote_node)
11621 _CheckNodeNotDrained(self, self.op.remote_node)
11622 # FIXME: here we assume that the old instance type is DT_PLAIN
11623 assert instance.disk_template == constants.DT_PLAIN
11624 disks = [{constants.IDISK_SIZE: d.size,
11625 constants.IDISK_VG: d.logical_id[0]}
11626 for d in instance.disks]
11627 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11628 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11630 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11631 if pnode_info.group != snode_info.group:
11632 self.LogWarning("The primary and secondary nodes are in two"
11633 " different node groups; the disk parameters"
11634 " from the first disk's node group will be"
11637 # hvparams processing
11638 if self.op.hvparams:
11639 hv_type = instance.hypervisor
11640 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11641 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11642 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11645 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11646 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11647 self.hv_proposed = self.hv_new = hv_new # the new actual values
11648 self.hv_inst = i_hvdict # the new dict (without defaults)
11650 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11652 self.hv_new = self.hv_inst = {}
11654 # beparams processing
11655 if self.op.beparams:
11656 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11658 objects.UpgradeBeParams(i_bedict)
11659 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11660 be_new = cluster.SimpleFillBE(i_bedict)
11661 self.be_proposed = self.be_new = be_new # the new actual values
11662 self.be_inst = i_bedict # the new dict (without defaults)
11664 self.be_new = self.be_inst = {}
11665 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11666 be_old = cluster.FillBE(instance)
11668 # CPU param validation -- checking every time a paramtere is
11669 # changed to cover all cases where either CPU mask or vcpus have
11671 if (constants.BE_VCPUS in self.be_proposed and
11672 constants.HV_CPU_MASK in self.hv_proposed):
11674 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11675 # Verify mask is consistent with number of vCPUs. Can skip this
11676 # test if only 1 entry in the CPU mask, which means same mask
11677 # is applied to all vCPUs.
11678 if (len(cpu_list) > 1 and
11679 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11680 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11682 (self.be_proposed[constants.BE_VCPUS],
11683 self.hv_proposed[constants.HV_CPU_MASK]),
11684 errors.ECODE_INVAL)
11686 # Only perform this test if a new CPU mask is given
11687 if constants.HV_CPU_MASK in self.hv_new:
11688 # Calculate the largest CPU number requested
11689 max_requested_cpu = max(map(max, cpu_list))
11690 # Check that all of the instance's nodes have enough physical CPUs to
11691 # satisfy the requested CPU mask
11692 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11693 max_requested_cpu + 1, instance.hypervisor)
11695 # osparams processing
11696 if self.op.osparams:
11697 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11698 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11699 self.os_inst = i_osdict # the new dict (without defaults)
11705 #TODO(dynmem): do the appropriate check involving MINMEM
11706 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11707 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11708 mem_check_list = [pnode]
11709 if be_new[constants.BE_AUTO_BALANCE]:
11710 # either we changed auto_balance to yes or it was from before
11711 mem_check_list.extend(instance.secondary_nodes)
11712 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11713 instance.hypervisor)
11714 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11715 [instance.hypervisor])
11716 pninfo = nodeinfo[pnode]
11717 msg = pninfo.fail_msg
11719 # Assume the primary node is unreachable and go ahead
11720 self.warn.append("Can't get info from primary node %s: %s" %
11723 (_, _, (pnhvinfo, )) = pninfo.payload
11724 if not isinstance(pnhvinfo.get("memory_free", None), int):
11725 self.warn.append("Node data from primary node %s doesn't contain"
11726 " free memory information" % pnode)
11727 elif instance_info.fail_msg:
11728 self.warn.append("Can't get instance runtime information: %s" %
11729 instance_info.fail_msg)
11731 if instance_info.payload:
11732 current_mem = int(instance_info.payload["memory"])
11734 # Assume instance not running
11735 # (there is a slight race condition here, but it's not very
11736 # probable, and we have no other way to check)
11737 # TODO: Describe race condition
11739 #TODO(dynmem): do the appropriate check involving MINMEM
11740 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11741 pnhvinfo["memory_free"])
11743 raise errors.OpPrereqError("This change will prevent the instance"
11744 " from starting, due to %d MB of memory"
11745 " missing on its primary node" %
11747 errors.ECODE_NORES)
11749 if be_new[constants.BE_AUTO_BALANCE]:
11750 for node, nres in nodeinfo.items():
11751 if node not in instance.secondary_nodes:
11753 nres.Raise("Can't get info from secondary node %s" % node,
11754 prereq=True, ecode=errors.ECODE_STATE)
11755 (_, _, (nhvinfo, )) = nres.payload
11756 if not isinstance(nhvinfo.get("memory_free", None), int):
11757 raise errors.OpPrereqError("Secondary node %s didn't return free"
11758 " memory information" % node,
11759 errors.ECODE_STATE)
11760 #TODO(dynmem): do the appropriate check involving MINMEM
11761 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11762 raise errors.OpPrereqError("This change will prevent the instance"
11763 " from failover to its secondary node"
11764 " %s, due to not enough memory" % node,
11765 errors.ECODE_STATE)
11769 self.nic_pinst = {}
11770 for nic_op, nic_dict in self.op.nics:
11771 if nic_op == constants.DDM_REMOVE:
11772 if not instance.nics:
11773 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11774 errors.ECODE_INVAL)
11776 if nic_op != constants.DDM_ADD:
11778 if not instance.nics:
11779 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11780 " no NICs" % nic_op,
11781 errors.ECODE_INVAL)
11782 if nic_op < 0 or nic_op >= len(instance.nics):
11783 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11785 (nic_op, len(instance.nics) - 1),
11786 errors.ECODE_INVAL)
11787 old_nic_params = instance.nics[nic_op].nicparams
11788 old_nic_ip = instance.nics[nic_op].ip
11790 old_nic_params = {}
11793 update_params_dict = dict([(key, nic_dict[key])
11794 for key in constants.NICS_PARAMETERS
11795 if key in nic_dict])
11797 if "bridge" in nic_dict:
11798 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11800 new_nic_params = _GetUpdatedParams(old_nic_params,
11801 update_params_dict)
11802 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11803 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11804 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11805 self.nic_pinst[nic_op] = new_nic_params
11806 self.nic_pnew[nic_op] = new_filled_nic_params
11807 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11809 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11810 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11811 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11813 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11815 self.warn.append(msg)
11817 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11818 if new_nic_mode == constants.NIC_MODE_ROUTED:
11819 if constants.INIC_IP in nic_dict:
11820 nic_ip = nic_dict[constants.INIC_IP]
11822 nic_ip = old_nic_ip
11824 raise errors.OpPrereqError("Cannot set the nic ip to None"
11825 " on a routed nic", errors.ECODE_INVAL)
11826 if constants.INIC_MAC in nic_dict:
11827 nic_mac = nic_dict[constants.INIC_MAC]
11828 if nic_mac is None:
11829 raise errors.OpPrereqError("Cannot set the nic mac to None",
11830 errors.ECODE_INVAL)
11831 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11832 # otherwise generate the mac
11833 nic_dict[constants.INIC_MAC] = \
11834 self.cfg.GenerateMAC(self.proc.GetECId())
11836 # or validate/reserve the current one
11838 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11839 except errors.ReservationError:
11840 raise errors.OpPrereqError("MAC address %s already in use"
11841 " in cluster" % nic_mac,
11842 errors.ECODE_NOTUNIQUE)
11845 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11846 raise errors.OpPrereqError("Disk operations not supported for"
11847 " diskless instances",
11848 errors.ECODE_INVAL)
11849 for disk_op, _ in self.op.disks:
11850 if disk_op == constants.DDM_REMOVE:
11851 if len(instance.disks) == 1:
11852 raise errors.OpPrereqError("Cannot remove the last disk of"
11853 " an instance", errors.ECODE_INVAL)
11854 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11855 msg="cannot remove disks")
11857 if (disk_op == constants.DDM_ADD and
11858 len(instance.disks) >= constants.MAX_DISKS):
11859 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11860 " add more" % constants.MAX_DISKS,
11861 errors.ECODE_STATE)
11862 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11864 if disk_op < 0 or disk_op >= len(instance.disks):
11865 raise errors.OpPrereqError("Invalid disk index %s, valid values"
11867 (disk_op, len(instance.disks)),
11868 errors.ECODE_INVAL)
11870 # disabling the instance
11871 if self.op.offline_inst:
11872 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11873 msg="cannot change instance state to offline")
11875 # enabling the instance
11876 if self.op.online_inst:
11877 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11878 msg="cannot make instance go online")
11880 def _ConvertPlainToDrbd(self, feedback_fn):
11881 """Converts an instance from plain to drbd.
11884 feedback_fn("Converting template to drbd")
11885 instance = self.instance
11886 pnode = instance.primary_node
11887 snode = self.op.remote_node
11889 assert instance.disk_template == constants.DT_PLAIN
11891 # create a fake disk info for _GenerateDiskTemplate
11892 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11893 constants.IDISK_VG: d.logical_id[0]}
11894 for d in instance.disks]
11895 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11896 instance.name, pnode, [snode],
11897 disk_info, None, None, 0, feedback_fn,
11899 info = _GetInstanceInfoText(instance)
11900 feedback_fn("Creating aditional volumes...")
11901 # first, create the missing data and meta devices
11902 for disk in new_disks:
11903 # unfortunately this is... not too nice
11904 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11906 for child in disk.children:
11907 _CreateSingleBlockDev(self, snode, instance, child, info, True)
11908 # at this stage, all new LVs have been created, we can rename the
11910 feedback_fn("Renaming original volumes...")
11911 rename_list = [(o, n.children[0].logical_id)
11912 for (o, n) in zip(instance.disks, new_disks)]
11913 result = self.rpc.call_blockdev_rename(pnode, rename_list)
11914 result.Raise("Failed to rename original LVs")
11916 feedback_fn("Initializing DRBD devices...")
11917 # all child devices are in place, we can now create the DRBD devices
11918 for disk in new_disks:
11919 for node in [pnode, snode]:
11920 f_create = node == pnode
11921 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11923 # at this point, the instance has been modified
11924 instance.disk_template = constants.DT_DRBD8
11925 instance.disks = new_disks
11926 self.cfg.Update(instance, feedback_fn)
11928 # Release node locks while waiting for sync
11929 _ReleaseLocks(self, locking.LEVEL_NODE)
11931 # disks are created, waiting for sync
11932 disk_abort = not _WaitForSync(self, instance,
11933 oneshot=not self.op.wait_for_sync)
11935 raise errors.OpExecError("There are some degraded disks for"
11936 " this instance, please cleanup manually")
11938 # Node resource locks will be released by caller
11940 def _ConvertDrbdToPlain(self, feedback_fn):
11941 """Converts an instance from drbd to plain.
11944 instance = self.instance
11946 assert len(instance.secondary_nodes) == 1
11947 assert instance.disk_template == constants.DT_DRBD8
11949 pnode = instance.primary_node
11950 snode = instance.secondary_nodes[0]
11951 feedback_fn("Converting template to plain")
11953 old_disks = instance.disks
11954 new_disks = [d.children[0] for d in old_disks]
11956 # copy over size and mode
11957 for parent, child in zip(old_disks, new_disks):
11958 child.size = parent.size
11959 child.mode = parent.mode
11961 # update instance structure
11962 instance.disks = new_disks
11963 instance.disk_template = constants.DT_PLAIN
11964 self.cfg.Update(instance, feedback_fn)
11966 # Release locks in case removing disks takes a while
11967 _ReleaseLocks(self, locking.LEVEL_NODE)
11969 feedback_fn("Removing volumes on the secondary node...")
11970 for disk in old_disks:
11971 self.cfg.SetDiskID(disk, snode)
11972 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11974 self.LogWarning("Could not remove block device %s on node %s,"
11975 " continuing anyway: %s", disk.iv_name, snode, msg)
11977 feedback_fn("Removing unneeded volumes on the primary node...")
11978 for idx, disk in enumerate(old_disks):
11979 meta = disk.children[1]
11980 self.cfg.SetDiskID(meta, pnode)
11981 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11983 self.LogWarning("Could not remove metadata for disk %d on node %s,"
11984 " continuing anyway: %s", idx, pnode, msg)
11986 # this is a DRBD disk, return its port to the pool
11987 for disk in old_disks:
11988 tcp_port = disk.logical_id[2]
11989 self.cfg.AddTcpUdpPort(tcp_port)
11991 # Node resource locks will be released by caller
11993 def Exec(self, feedback_fn):
11994 """Modifies an instance.
11996 All parameters take effect only at the next restart of the instance.
11999 # Process here the warnings from CheckPrereq, as we don't have a
12000 # feedback_fn there.
12001 for warn in self.warn:
12002 feedback_fn("WARNING: %s" % warn)
12004 assert ((self.op.disk_template is None) ^
12005 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12006 "Not owning any node resource locks"
12009 instance = self.instance
12011 for disk_op, disk_dict in self.op.disks:
12012 if disk_op == constants.DDM_REMOVE:
12013 # remove the last disk
12014 device = instance.disks.pop()
12015 device_idx = len(instance.disks)
12016 for node, disk in device.ComputeNodeTree(instance.primary_node):
12017 self.cfg.SetDiskID(disk, node)
12018 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12020 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12021 " continuing anyway", device_idx, node, msg)
12022 result.append(("disk/%d" % device_idx, "remove"))
12024 # if this is a DRBD disk, return its port to the pool
12025 if device.dev_type in constants.LDS_DRBD:
12026 tcp_port = device.logical_id[2]
12027 self.cfg.AddTcpUdpPort(tcp_port)
12028 elif disk_op == constants.DDM_ADD:
12030 if instance.disk_template in (constants.DT_FILE,
12031 constants.DT_SHARED_FILE):
12032 file_driver, file_path = instance.disks[0].logical_id
12033 file_path = os.path.dirname(file_path)
12035 file_driver = file_path = None
12036 disk_idx_base = len(instance.disks)
12037 new_disk = _GenerateDiskTemplate(self,
12038 instance.disk_template,
12039 instance.name, instance.primary_node,
12040 instance.secondary_nodes,
12046 self.diskparams)[0]
12047 instance.disks.append(new_disk)
12048 info = _GetInstanceInfoText(instance)
12050 logging.info("Creating volume %s for instance %s",
12051 new_disk.iv_name, instance.name)
12052 # Note: this needs to be kept in sync with _CreateDisks
12054 for node in instance.all_nodes:
12055 f_create = node == instance.primary_node
12057 _CreateBlockDev(self, node, instance, new_disk,
12058 f_create, info, f_create)
12059 except errors.OpExecError, err:
12060 self.LogWarning("Failed to create volume %s (%s) on"
12062 new_disk.iv_name, new_disk, node, err)
12063 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12064 (new_disk.size, new_disk.mode)))
12066 # change a given disk
12067 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12068 result.append(("disk.mode/%d" % disk_op,
12069 disk_dict[constants.IDISK_MODE]))
12071 if self.op.disk_template:
12073 check_nodes = set(instance.all_nodes)
12074 if self.op.remote_node:
12075 check_nodes.add(self.op.remote_node)
12076 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12077 owned = self.owned_locks(level)
12078 assert not (check_nodes - owned), \
12079 ("Not owning the correct locks, owning %r, expected at least %r" %
12080 (owned, check_nodes))
12082 r_shut = _ShutdownInstanceDisks(self, instance)
12084 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12085 " proceed with disk template conversion")
12086 mode = (instance.disk_template, self.op.disk_template)
12088 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12090 self.cfg.ReleaseDRBDMinors(instance.name)
12092 result.append(("disk_template", self.op.disk_template))
12094 assert instance.disk_template == self.op.disk_template, \
12095 ("Expected disk template '%s', found '%s'" %
12096 (self.op.disk_template, instance.disk_template))
12098 # Release node and resource locks if there are any (they might already have
12099 # been released during disk conversion)
12100 _ReleaseLocks(self, locking.LEVEL_NODE)
12101 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12104 for nic_op, nic_dict in self.op.nics:
12105 if nic_op == constants.DDM_REMOVE:
12106 # remove the last nic
12107 del instance.nics[-1]
12108 result.append(("nic.%d" % len(instance.nics), "remove"))
12109 elif nic_op == constants.DDM_ADD:
12110 # mac and bridge should be set, by now
12111 mac = nic_dict[constants.INIC_MAC]
12112 ip = nic_dict.get(constants.INIC_IP, None)
12113 nicparams = self.nic_pinst[constants.DDM_ADD]
12114 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12115 instance.nics.append(new_nic)
12116 result.append(("nic.%d" % (len(instance.nics) - 1),
12117 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12118 (new_nic.mac, new_nic.ip,
12119 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12120 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12123 for key in (constants.INIC_MAC, constants.INIC_IP):
12124 if key in nic_dict:
12125 setattr(instance.nics[nic_op], key, nic_dict[key])
12126 if nic_op in self.nic_pinst:
12127 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12128 for key, val in nic_dict.iteritems():
12129 result.append(("nic.%s/%d" % (key, nic_op), val))
12132 if self.op.hvparams:
12133 instance.hvparams = self.hv_inst
12134 for key, val in self.op.hvparams.iteritems():
12135 result.append(("hv/%s" % key, val))
12138 if self.op.beparams:
12139 instance.beparams = self.be_inst
12140 for key, val in self.op.beparams.iteritems():
12141 result.append(("be/%s" % key, val))
12144 if self.op.os_name:
12145 instance.os = self.op.os_name
12148 if self.op.osparams:
12149 instance.osparams = self.os_inst
12150 for key, val in self.op.osparams.iteritems():
12151 result.append(("os/%s" % key, val))
12153 # online/offline instance
12154 if self.op.online_inst:
12155 self.cfg.MarkInstanceDown(instance.name)
12156 result.append(("admin_state", constants.ADMINST_DOWN))
12157 if self.op.offline_inst:
12158 self.cfg.MarkInstanceOffline(instance.name)
12159 result.append(("admin_state", constants.ADMINST_OFFLINE))
12161 self.cfg.Update(instance, feedback_fn)
12163 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12164 self.owned_locks(locking.LEVEL_NODE)), \
12165 "All node locks should have been released by now"
12169 _DISK_CONVERSIONS = {
12170 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12171 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12175 class LUInstanceChangeGroup(LogicalUnit):
12176 HPATH = "instance-change-group"
12177 HTYPE = constants.HTYPE_INSTANCE
12180 def ExpandNames(self):
12181 self.share_locks = _ShareAll()
12182 self.needed_locks = {
12183 locking.LEVEL_NODEGROUP: [],
12184 locking.LEVEL_NODE: [],
12187 self._ExpandAndLockInstance()
12189 if self.op.target_groups:
12190 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12191 self.op.target_groups)
12193 self.req_target_uuids = None
12195 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12197 def DeclareLocks(self, level):
12198 if level == locking.LEVEL_NODEGROUP:
12199 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12201 if self.req_target_uuids:
12202 lock_groups = set(self.req_target_uuids)
12204 # Lock all groups used by instance optimistically; this requires going
12205 # via the node before it's locked, requiring verification later on
12206 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12207 lock_groups.update(instance_groups)
12209 # No target groups, need to lock all of them
12210 lock_groups = locking.ALL_SET
12212 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12214 elif level == locking.LEVEL_NODE:
12215 if self.req_target_uuids:
12216 # Lock all nodes used by instances
12217 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12218 self._LockInstancesNodes()
12220 # Lock all nodes in all potential target groups
12221 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12222 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12223 member_nodes = [node_name
12224 for group in lock_groups
12225 for node_name in self.cfg.GetNodeGroup(group).members]
12226 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12228 # Lock all nodes as all groups are potential targets
12229 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12231 def CheckPrereq(self):
12232 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12233 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12234 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12236 assert (self.req_target_uuids is None or
12237 owned_groups.issuperset(self.req_target_uuids))
12238 assert owned_instances == set([self.op.instance_name])
12240 # Get instance information
12241 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12243 # Check if node groups for locked instance are still correct
12244 assert owned_nodes.issuperset(self.instance.all_nodes), \
12245 ("Instance %s's nodes changed while we kept the lock" %
12246 self.op.instance_name)
12248 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12251 if self.req_target_uuids:
12252 # User requested specific target groups
12253 self.target_uuids = self.req_target_uuids
12255 # All groups except those used by the instance are potential targets
12256 self.target_uuids = owned_groups - inst_groups
12258 conflicting_groups = self.target_uuids & inst_groups
12259 if conflicting_groups:
12260 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12261 " used by the instance '%s'" %
12262 (utils.CommaJoin(conflicting_groups),
12263 self.op.instance_name),
12264 errors.ECODE_INVAL)
12266 if not self.target_uuids:
12267 raise errors.OpPrereqError("There are no possible target groups",
12268 errors.ECODE_INVAL)
12270 def BuildHooksEnv(self):
12271 """Build hooks env.
12274 assert self.target_uuids
12277 "TARGET_GROUPS": " ".join(self.target_uuids),
12280 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12284 def BuildHooksNodes(self):
12285 """Build hooks nodes.
12288 mn = self.cfg.GetMasterNode()
12289 return ([mn], [mn])
12291 def Exec(self, feedback_fn):
12292 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12294 assert instances == [self.op.instance_name], "Instance not locked"
12296 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12297 instances=instances, target_groups=list(self.target_uuids))
12299 ial.Run(self.op.iallocator)
12301 if not ial.success:
12302 raise errors.OpPrereqError("Can't compute solution for changing group of"
12303 " instance '%s' using iallocator '%s': %s" %
12304 (self.op.instance_name, self.op.iallocator,
12306 errors.ECODE_NORES)
12308 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12310 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12311 " instance '%s'", len(jobs), self.op.instance_name)
12313 return ResultWithJobs(jobs)
12316 class LUBackupQuery(NoHooksLU):
12317 """Query the exports list
12322 def ExpandNames(self):
12323 self.needed_locks = {}
12324 self.share_locks[locking.LEVEL_NODE] = 1
12325 if not self.op.nodes:
12326 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12328 self.needed_locks[locking.LEVEL_NODE] = \
12329 _GetWantedNodes(self, self.op.nodes)
12331 def Exec(self, feedback_fn):
12332 """Compute the list of all the exported system images.
12335 @return: a dictionary with the structure node->(export-list)
12336 where export-list is a list of the instances exported on
12340 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12341 rpcresult = self.rpc.call_export_list(self.nodes)
12343 for node in rpcresult:
12344 if rpcresult[node].fail_msg:
12345 result[node] = False
12347 result[node] = rpcresult[node].payload
12352 class LUBackupPrepare(NoHooksLU):
12353 """Prepares an instance for an export and returns useful information.
12358 def ExpandNames(self):
12359 self._ExpandAndLockInstance()
12361 def CheckPrereq(self):
12362 """Check prerequisites.
12365 instance_name = self.op.instance_name
12367 self.instance = self.cfg.GetInstanceInfo(instance_name)
12368 assert self.instance is not None, \
12369 "Cannot retrieve locked instance %s" % self.op.instance_name
12370 _CheckNodeOnline(self, self.instance.primary_node)
12372 self._cds = _GetClusterDomainSecret()
12374 def Exec(self, feedback_fn):
12375 """Prepares an instance for an export.
12378 instance = self.instance
12380 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12381 salt = utils.GenerateSecret(8)
12383 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12384 result = self.rpc.call_x509_cert_create(instance.primary_node,
12385 constants.RIE_CERT_VALIDITY)
12386 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12388 (name, cert_pem) = result.payload
12390 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12394 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12395 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12397 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12403 class LUBackupExport(LogicalUnit):
12404 """Export an instance to an image in the cluster.
12407 HPATH = "instance-export"
12408 HTYPE = constants.HTYPE_INSTANCE
12411 def CheckArguments(self):
12412 """Check the arguments.
12415 self.x509_key_name = self.op.x509_key_name
12416 self.dest_x509_ca_pem = self.op.destination_x509_ca
12418 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12419 if not self.x509_key_name:
12420 raise errors.OpPrereqError("Missing X509 key name for encryption",
12421 errors.ECODE_INVAL)
12423 if not self.dest_x509_ca_pem:
12424 raise errors.OpPrereqError("Missing destination X509 CA",
12425 errors.ECODE_INVAL)
12427 def ExpandNames(self):
12428 self._ExpandAndLockInstance()
12430 # Lock all nodes for local exports
12431 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12432 # FIXME: lock only instance primary and destination node
12434 # Sad but true, for now we have do lock all nodes, as we don't know where
12435 # the previous export might be, and in this LU we search for it and
12436 # remove it from its current node. In the future we could fix this by:
12437 # - making a tasklet to search (share-lock all), then create the
12438 # new one, then one to remove, after
12439 # - removing the removal operation altogether
12440 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12442 def DeclareLocks(self, level):
12443 """Last minute lock declaration."""
12444 # All nodes are locked anyway, so nothing to do here.
12446 def BuildHooksEnv(self):
12447 """Build hooks env.
12449 This will run on the master, primary node and target node.
12453 "EXPORT_MODE": self.op.mode,
12454 "EXPORT_NODE": self.op.target_node,
12455 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12456 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12457 # TODO: Generic function for boolean env variables
12458 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12461 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12465 def BuildHooksNodes(self):
12466 """Build hooks nodes.
12469 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12471 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12472 nl.append(self.op.target_node)
12476 def CheckPrereq(self):
12477 """Check prerequisites.
12479 This checks that the instance and node names are valid.
12482 instance_name = self.op.instance_name
12484 self.instance = self.cfg.GetInstanceInfo(instance_name)
12485 assert self.instance is not None, \
12486 "Cannot retrieve locked instance %s" % self.op.instance_name
12487 _CheckNodeOnline(self, self.instance.primary_node)
12489 if (self.op.remove_instance and
12490 self.instance.admin_state == constants.ADMINST_UP and
12491 not self.op.shutdown):
12492 raise errors.OpPrereqError("Can not remove instance without shutting it"
12495 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12496 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12497 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12498 assert self.dst_node is not None
12500 _CheckNodeOnline(self, self.dst_node.name)
12501 _CheckNodeNotDrained(self, self.dst_node.name)
12504 self.dest_disk_info = None
12505 self.dest_x509_ca = None
12507 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12508 self.dst_node = None
12510 if len(self.op.target_node) != len(self.instance.disks):
12511 raise errors.OpPrereqError(("Received destination information for %s"
12512 " disks, but instance %s has %s disks") %
12513 (len(self.op.target_node), instance_name,
12514 len(self.instance.disks)),
12515 errors.ECODE_INVAL)
12517 cds = _GetClusterDomainSecret()
12519 # Check X509 key name
12521 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12522 except (TypeError, ValueError), err:
12523 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12525 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12526 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12527 errors.ECODE_INVAL)
12529 # Load and verify CA
12531 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12532 except OpenSSL.crypto.Error, err:
12533 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12534 (err, ), errors.ECODE_INVAL)
12536 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12537 if errcode is not None:
12538 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12539 (msg, ), errors.ECODE_INVAL)
12541 self.dest_x509_ca = cert
12543 # Verify target information
12545 for idx, disk_data in enumerate(self.op.target_node):
12547 (host, port, magic) = \
12548 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12549 except errors.GenericError, err:
12550 raise errors.OpPrereqError("Target info for disk %s: %s" %
12551 (idx, err), errors.ECODE_INVAL)
12553 disk_info.append((host, port, magic))
12555 assert len(disk_info) == len(self.op.target_node)
12556 self.dest_disk_info = disk_info
12559 raise errors.ProgrammerError("Unhandled export mode %r" %
12562 # instance disk type verification
12563 # TODO: Implement export support for file-based disks
12564 for disk in self.instance.disks:
12565 if disk.dev_type == constants.LD_FILE:
12566 raise errors.OpPrereqError("Export not supported for instances with"
12567 " file-based disks", errors.ECODE_INVAL)
12569 def _CleanupExports(self, feedback_fn):
12570 """Removes exports of current instance from all other nodes.
12572 If an instance in a cluster with nodes A..D was exported to node C, its
12573 exports will be removed from the nodes A, B and D.
12576 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12578 nodelist = self.cfg.GetNodeList()
12579 nodelist.remove(self.dst_node.name)
12581 # on one-node clusters nodelist will be empty after the removal
12582 # if we proceed the backup would be removed because OpBackupQuery
12583 # substitutes an empty list with the full cluster node list.
12584 iname = self.instance.name
12586 feedback_fn("Removing old exports for instance %s" % iname)
12587 exportlist = self.rpc.call_export_list(nodelist)
12588 for node in exportlist:
12589 if exportlist[node].fail_msg:
12591 if iname in exportlist[node].payload:
12592 msg = self.rpc.call_export_remove(node, iname).fail_msg
12594 self.LogWarning("Could not remove older export for instance %s"
12595 " on node %s: %s", iname, node, msg)
12597 def Exec(self, feedback_fn):
12598 """Export an instance to an image in the cluster.
12601 assert self.op.mode in constants.EXPORT_MODES
12603 instance = self.instance
12604 src_node = instance.primary_node
12606 if self.op.shutdown:
12607 # shutdown the instance, but not the disks
12608 feedback_fn("Shutting down instance %s" % instance.name)
12609 result = self.rpc.call_instance_shutdown(src_node, instance,
12610 self.op.shutdown_timeout)
12611 # TODO: Maybe ignore failures if ignore_remove_failures is set
12612 result.Raise("Could not shutdown instance %s on"
12613 " node %s" % (instance.name, src_node))
12615 # set the disks ID correctly since call_instance_start needs the
12616 # correct drbd minor to create the symlinks
12617 for disk in instance.disks:
12618 self.cfg.SetDiskID(disk, src_node)
12620 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12623 # Activate the instance disks if we'exporting a stopped instance
12624 feedback_fn("Activating disks for %s" % instance.name)
12625 _StartInstanceDisks(self, instance, None)
12628 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12631 helper.CreateSnapshots()
12633 if (self.op.shutdown and
12634 instance.admin_state == constants.ADMINST_UP and
12635 not self.op.remove_instance):
12636 assert not activate_disks
12637 feedback_fn("Starting instance %s" % instance.name)
12638 result = self.rpc.call_instance_start(src_node,
12639 (instance, None, None), False)
12640 msg = result.fail_msg
12642 feedback_fn("Failed to start instance: %s" % msg)
12643 _ShutdownInstanceDisks(self, instance)
12644 raise errors.OpExecError("Could not start instance: %s" % msg)
12646 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12647 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12648 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12649 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12650 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12652 (key_name, _, _) = self.x509_key_name
12655 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12658 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12659 key_name, dest_ca_pem,
12664 # Check for backwards compatibility
12665 assert len(dresults) == len(instance.disks)
12666 assert compat.all(isinstance(i, bool) for i in dresults), \
12667 "Not all results are boolean: %r" % dresults
12671 feedback_fn("Deactivating disks for %s" % instance.name)
12672 _ShutdownInstanceDisks(self, instance)
12674 if not (compat.all(dresults) and fin_resu):
12677 failures.append("export finalization")
12678 if not compat.all(dresults):
12679 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12681 failures.append("disk export: disk(s) %s" % fdsk)
12683 raise errors.OpExecError("Export failed, errors in %s" %
12684 utils.CommaJoin(failures))
12686 # At this point, the export was successful, we can cleanup/finish
12688 # Remove instance if requested
12689 if self.op.remove_instance:
12690 feedback_fn("Removing instance %s" % instance.name)
12691 _RemoveInstance(self, feedback_fn, instance,
12692 self.op.ignore_remove_failures)
12694 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12695 self._CleanupExports(feedback_fn)
12697 return fin_resu, dresults
12700 class LUBackupRemove(NoHooksLU):
12701 """Remove exports related to the named instance.
12706 def ExpandNames(self):
12707 self.needed_locks = {}
12708 # We need all nodes to be locked in order for RemoveExport to work, but we
12709 # don't need to lock the instance itself, as nothing will happen to it (and
12710 # we can remove exports also for a removed instance)
12711 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12713 def Exec(self, feedback_fn):
12714 """Remove any export.
12717 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12718 # If the instance was not found we'll try with the name that was passed in.
12719 # This will only work if it was an FQDN, though.
12721 if not instance_name:
12723 instance_name = self.op.instance_name
12725 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12726 exportlist = self.rpc.call_export_list(locked_nodes)
12728 for node in exportlist:
12729 msg = exportlist[node].fail_msg
12731 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12733 if instance_name in exportlist[node].payload:
12735 result = self.rpc.call_export_remove(node, instance_name)
12736 msg = result.fail_msg
12738 logging.error("Could not remove export for instance %s"
12739 " on node %s: %s", instance_name, node, msg)
12741 if fqdn_warn and not found:
12742 feedback_fn("Export not found. If trying to remove an export belonging"
12743 " to a deleted instance please use its Fully Qualified"
12747 class LUGroupAdd(LogicalUnit):
12748 """Logical unit for creating node groups.
12751 HPATH = "group-add"
12752 HTYPE = constants.HTYPE_GROUP
12755 def ExpandNames(self):
12756 # We need the new group's UUID here so that we can create and acquire the
12757 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12758 # that it should not check whether the UUID exists in the configuration.
12759 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12760 self.needed_locks = {}
12761 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12763 def CheckPrereq(self):
12764 """Check prerequisites.
12766 This checks that the given group name is not an existing node group
12771 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12772 except errors.OpPrereqError:
12775 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12776 " node group (UUID: %s)" %
12777 (self.op.group_name, existing_uuid),
12778 errors.ECODE_EXISTS)
12780 if self.op.ndparams:
12781 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12783 if self.op.diskparams:
12784 for templ in constants.DISK_TEMPLATES:
12785 if templ not in self.op.diskparams:
12786 self.op.diskparams[templ] = {}
12787 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12789 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12791 cluster = self.cfg.GetClusterInfo()
12792 full_ipolicy = cluster.SimpleFillIpolicy(self.op.ipolicy)
12793 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12795 def BuildHooksEnv(self):
12796 """Build hooks env.
12800 "GROUP_NAME": self.op.group_name,
12803 def BuildHooksNodes(self):
12804 """Build hooks nodes.
12807 mn = self.cfg.GetMasterNode()
12808 return ([mn], [mn])
12810 def Exec(self, feedback_fn):
12811 """Add the node group to the cluster.
12814 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12815 uuid=self.group_uuid,
12816 alloc_policy=self.op.alloc_policy,
12817 ndparams=self.op.ndparams,
12818 diskparams=self.op.diskparams,
12819 ipolicy=self.op.ipolicy)
12821 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12822 del self.remove_locks[locking.LEVEL_NODEGROUP]
12825 class LUGroupAssignNodes(NoHooksLU):
12826 """Logical unit for assigning nodes to groups.
12831 def ExpandNames(self):
12832 # These raise errors.OpPrereqError on their own:
12833 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12834 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12836 # We want to lock all the affected nodes and groups. We have readily
12837 # available the list of nodes, and the *destination* group. To gather the
12838 # list of "source" groups, we need to fetch node information later on.
12839 self.needed_locks = {
12840 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12841 locking.LEVEL_NODE: self.op.nodes,
12844 def DeclareLocks(self, level):
12845 if level == locking.LEVEL_NODEGROUP:
12846 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12848 # Try to get all affected nodes' groups without having the group or node
12849 # lock yet. Needs verification later in the code flow.
12850 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12852 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12854 def CheckPrereq(self):
12855 """Check prerequisites.
12858 assert self.needed_locks[locking.LEVEL_NODEGROUP]
12859 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12860 frozenset(self.op.nodes))
12862 expected_locks = (set([self.group_uuid]) |
12863 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12864 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12865 if actual_locks != expected_locks:
12866 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12867 " current groups are '%s', used to be '%s'" %
12868 (utils.CommaJoin(expected_locks),
12869 utils.CommaJoin(actual_locks)))
12871 self.node_data = self.cfg.GetAllNodesInfo()
12872 self.group = self.cfg.GetNodeGroup(self.group_uuid)
12873 instance_data = self.cfg.GetAllInstancesInfo()
12875 if self.group is None:
12876 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12877 (self.op.group_name, self.group_uuid))
12879 (new_splits, previous_splits) = \
12880 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12881 for node in self.op.nodes],
12882 self.node_data, instance_data)
12885 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12887 if not self.op.force:
12888 raise errors.OpExecError("The following instances get split by this"
12889 " change and --force was not given: %s" %
12892 self.LogWarning("This operation will split the following instances: %s",
12895 if previous_splits:
12896 self.LogWarning("In addition, these already-split instances continue"
12897 " to be split across groups: %s",
12898 utils.CommaJoin(utils.NiceSort(previous_splits)))
12900 def Exec(self, feedback_fn):
12901 """Assign nodes to a new group.
12904 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12906 self.cfg.AssignGroupNodes(mods)
12909 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12910 """Check for split instances after a node assignment.
12912 This method considers a series of node assignments as an atomic operation,
12913 and returns information about split instances after applying the set of
12916 In particular, it returns information about newly split instances, and
12917 instances that were already split, and remain so after the change.
12919 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12922 @type changes: list of (node_name, new_group_uuid) pairs.
12923 @param changes: list of node assignments to consider.
12924 @param node_data: a dict with data for all nodes
12925 @param instance_data: a dict with all instances to consider
12926 @rtype: a two-tuple
12927 @return: a list of instances that were previously okay and result split as a
12928 consequence of this change, and a list of instances that were previously
12929 split and this change does not fix.
12932 changed_nodes = dict((node, group) for node, group in changes
12933 if node_data[node].group != group)
12935 all_split_instances = set()
12936 previously_split_instances = set()
12938 def InstanceNodes(instance):
12939 return [instance.primary_node] + list(instance.secondary_nodes)
12941 for inst in instance_data.values():
12942 if inst.disk_template not in constants.DTS_INT_MIRROR:
12945 instance_nodes = InstanceNodes(inst)
12947 if len(set(node_data[node].group for node in instance_nodes)) > 1:
12948 previously_split_instances.add(inst.name)
12950 if len(set(changed_nodes.get(node, node_data[node].group)
12951 for node in instance_nodes)) > 1:
12952 all_split_instances.add(inst.name)
12954 return (list(all_split_instances - previously_split_instances),
12955 list(previously_split_instances & all_split_instances))
12958 class _GroupQuery(_QueryBase):
12959 FIELDS = query.GROUP_FIELDS
12961 def ExpandNames(self, lu):
12962 lu.needed_locks = {}
12964 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12965 self._cluster = lu.cfg.GetClusterInfo()
12966 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12969 self.wanted = [name_to_uuid[name]
12970 for name in utils.NiceSort(name_to_uuid.keys())]
12972 # Accept names to be either names or UUIDs.
12975 all_uuid = frozenset(self._all_groups.keys())
12977 for name in self.names:
12978 if name in all_uuid:
12979 self.wanted.append(name)
12980 elif name in name_to_uuid:
12981 self.wanted.append(name_to_uuid[name])
12983 missing.append(name)
12986 raise errors.OpPrereqError("Some groups do not exist: %s" %
12987 utils.CommaJoin(missing),
12988 errors.ECODE_NOENT)
12990 def DeclareLocks(self, lu, level):
12993 def _GetQueryData(self, lu):
12994 """Computes the list of node groups and their attributes.
12997 do_nodes = query.GQ_NODE in self.requested_data
12998 do_instances = query.GQ_INST in self.requested_data
13000 group_to_nodes = None
13001 group_to_instances = None
13003 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13004 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13005 # latter GetAllInstancesInfo() is not enough, for we have to go through
13006 # instance->node. Hence, we will need to process nodes even if we only need
13007 # instance information.
13008 if do_nodes or do_instances:
13009 all_nodes = lu.cfg.GetAllNodesInfo()
13010 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13013 for node in all_nodes.values():
13014 if node.group in group_to_nodes:
13015 group_to_nodes[node.group].append(node.name)
13016 node_to_group[node.name] = node.group
13019 all_instances = lu.cfg.GetAllInstancesInfo()
13020 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13022 for instance in all_instances.values():
13023 node = instance.primary_node
13024 if node in node_to_group:
13025 group_to_instances[node_to_group[node]].append(instance.name)
13028 # Do not pass on node information if it was not requested.
13029 group_to_nodes = None
13031 return query.GroupQueryData(self._cluster,
13032 [self._all_groups[uuid]
13033 for uuid in self.wanted],
13034 group_to_nodes, group_to_instances)
13037 class LUGroupQuery(NoHooksLU):
13038 """Logical unit for querying node groups.
13043 def CheckArguments(self):
13044 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13045 self.op.output_fields, False)
13047 def ExpandNames(self):
13048 self.gq.ExpandNames(self)
13050 def DeclareLocks(self, level):
13051 self.gq.DeclareLocks(self, level)
13053 def Exec(self, feedback_fn):
13054 return self.gq.OldStyleQuery(self)
13057 class LUGroupSetParams(LogicalUnit):
13058 """Modifies the parameters of a node group.
13061 HPATH = "group-modify"
13062 HTYPE = constants.HTYPE_GROUP
13065 def CheckArguments(self):
13068 self.op.diskparams,
13069 self.op.alloc_policy,
13071 self.op.disk_state,
13075 if all_changes.count(None) == len(all_changes):
13076 raise errors.OpPrereqError("Please pass at least one modification",
13077 errors.ECODE_INVAL)
13079 def ExpandNames(self):
13080 # This raises errors.OpPrereqError on its own:
13081 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13083 self.needed_locks = {
13084 locking.LEVEL_NODEGROUP: [self.group_uuid],
13087 def CheckPrereq(self):
13088 """Check prerequisites.
13091 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13093 if self.group is None:
13094 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13095 (self.op.group_name, self.group_uuid))
13097 if self.op.ndparams:
13098 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13099 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13100 self.new_ndparams = new_ndparams
13102 if self.op.diskparams:
13103 self.new_diskparams = dict()
13104 for templ in constants.DISK_TEMPLATES:
13105 if templ not in self.op.diskparams:
13106 self.op.diskparams[templ] = {}
13107 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13108 self.op.diskparams[templ])
13109 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13110 self.new_diskparams[templ] = new_templ_params
13112 if self.op.hv_state:
13113 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13114 self.group.hv_state_static)
13116 if self.op.disk_state:
13117 self.new_disk_state = \
13118 _MergeAndVerifyDiskState(self.op.disk_state,
13119 self.group.disk_state_static)
13121 if self.op.ipolicy:
13123 for key, value in self.op.ipolicy.iteritems():
13124 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13127 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13128 self.new_ipolicy = g_ipolicy
13129 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13131 def BuildHooksEnv(self):
13132 """Build hooks env.
13136 "GROUP_NAME": self.op.group_name,
13137 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13140 def BuildHooksNodes(self):
13141 """Build hooks nodes.
13144 mn = self.cfg.GetMasterNode()
13145 return ([mn], [mn])
13147 def Exec(self, feedback_fn):
13148 """Modifies the node group.
13153 if self.op.ndparams:
13154 self.group.ndparams = self.new_ndparams
13155 result.append(("ndparams", str(self.group.ndparams)))
13157 if self.op.diskparams:
13158 self.group.diskparams = self.new_diskparams
13159 result.append(("diskparams", str(self.group.diskparams)))
13161 if self.op.alloc_policy:
13162 self.group.alloc_policy = self.op.alloc_policy
13164 if self.op.hv_state:
13165 self.group.hv_state_static = self.new_hv_state
13167 if self.op.disk_state:
13168 self.group.disk_state_static = self.new_disk_state
13170 if self.op.ipolicy:
13171 self.group.ipolicy = self.new_ipolicy
13173 self.cfg.Update(self.group, feedback_fn)
13177 class LUGroupRemove(LogicalUnit):
13178 HPATH = "group-remove"
13179 HTYPE = constants.HTYPE_GROUP
13182 def ExpandNames(self):
13183 # This will raises errors.OpPrereqError on its own:
13184 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13185 self.needed_locks = {
13186 locking.LEVEL_NODEGROUP: [self.group_uuid],
13189 def CheckPrereq(self):
13190 """Check prerequisites.
13192 This checks that the given group name exists as a node group, that is
13193 empty (i.e., contains no nodes), and that is not the last group of the
13197 # Verify that the group is empty.
13198 group_nodes = [node.name
13199 for node in self.cfg.GetAllNodesInfo().values()
13200 if node.group == self.group_uuid]
13203 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13205 (self.op.group_name,
13206 utils.CommaJoin(utils.NiceSort(group_nodes))),
13207 errors.ECODE_STATE)
13209 # Verify the cluster would not be left group-less.
13210 if len(self.cfg.GetNodeGroupList()) == 1:
13211 raise errors.OpPrereqError("Group '%s' is the only group,"
13212 " cannot be removed" %
13213 self.op.group_name,
13214 errors.ECODE_STATE)
13216 def BuildHooksEnv(self):
13217 """Build hooks env.
13221 "GROUP_NAME": self.op.group_name,
13224 def BuildHooksNodes(self):
13225 """Build hooks nodes.
13228 mn = self.cfg.GetMasterNode()
13229 return ([mn], [mn])
13231 def Exec(self, feedback_fn):
13232 """Remove the node group.
13236 self.cfg.RemoveNodeGroup(self.group_uuid)
13237 except errors.ConfigurationError:
13238 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13239 (self.op.group_name, self.group_uuid))
13241 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13244 class LUGroupRename(LogicalUnit):
13245 HPATH = "group-rename"
13246 HTYPE = constants.HTYPE_GROUP
13249 def ExpandNames(self):
13250 # This raises errors.OpPrereqError on its own:
13251 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13253 self.needed_locks = {
13254 locking.LEVEL_NODEGROUP: [self.group_uuid],
13257 def CheckPrereq(self):
13258 """Check prerequisites.
13260 Ensures requested new name is not yet used.
13264 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13265 except errors.OpPrereqError:
13268 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13269 " node group (UUID: %s)" %
13270 (self.op.new_name, new_name_uuid),
13271 errors.ECODE_EXISTS)
13273 def BuildHooksEnv(self):
13274 """Build hooks env.
13278 "OLD_NAME": self.op.group_name,
13279 "NEW_NAME": self.op.new_name,
13282 def BuildHooksNodes(self):
13283 """Build hooks nodes.
13286 mn = self.cfg.GetMasterNode()
13288 all_nodes = self.cfg.GetAllNodesInfo()
13289 all_nodes.pop(mn, None)
13292 run_nodes.extend(node.name for node in all_nodes.values()
13293 if node.group == self.group_uuid)
13295 return (run_nodes, run_nodes)
13297 def Exec(self, feedback_fn):
13298 """Rename the node group.
13301 group = self.cfg.GetNodeGroup(self.group_uuid)
13304 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13305 (self.op.group_name, self.group_uuid))
13307 group.name = self.op.new_name
13308 self.cfg.Update(group, feedback_fn)
13310 return self.op.new_name
13313 class LUGroupEvacuate(LogicalUnit):
13314 HPATH = "group-evacuate"
13315 HTYPE = constants.HTYPE_GROUP
13318 def ExpandNames(self):
13319 # This raises errors.OpPrereqError on its own:
13320 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13322 if self.op.target_groups:
13323 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13324 self.op.target_groups)
13326 self.req_target_uuids = []
13328 if self.group_uuid in self.req_target_uuids:
13329 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13330 " as a target group (targets are %s)" %
13332 utils.CommaJoin(self.req_target_uuids)),
13333 errors.ECODE_INVAL)
13335 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13337 self.share_locks = _ShareAll()
13338 self.needed_locks = {
13339 locking.LEVEL_INSTANCE: [],
13340 locking.LEVEL_NODEGROUP: [],
13341 locking.LEVEL_NODE: [],
13344 def DeclareLocks(self, level):
13345 if level == locking.LEVEL_INSTANCE:
13346 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13348 # Lock instances optimistically, needs verification once node and group
13349 # locks have been acquired
13350 self.needed_locks[locking.LEVEL_INSTANCE] = \
13351 self.cfg.GetNodeGroupInstances(self.group_uuid)
13353 elif level == locking.LEVEL_NODEGROUP:
13354 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13356 if self.req_target_uuids:
13357 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13359 # Lock all groups used by instances optimistically; this requires going
13360 # via the node before it's locked, requiring verification later on
13361 lock_groups.update(group_uuid
13362 for instance_name in
13363 self.owned_locks(locking.LEVEL_INSTANCE)
13365 self.cfg.GetInstanceNodeGroups(instance_name))
13367 # No target groups, need to lock all of them
13368 lock_groups = locking.ALL_SET
13370 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13372 elif level == locking.LEVEL_NODE:
13373 # This will only lock the nodes in the group to be evacuated which
13374 # contain actual instances
13375 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13376 self._LockInstancesNodes()
13378 # Lock all nodes in group to be evacuated and target groups
13379 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13380 assert self.group_uuid in owned_groups
13381 member_nodes = [node_name
13382 for group in owned_groups
13383 for node_name in self.cfg.GetNodeGroup(group).members]
13384 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13386 def CheckPrereq(self):
13387 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13388 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13389 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13391 assert owned_groups.issuperset(self.req_target_uuids)
13392 assert self.group_uuid in owned_groups
13394 # Check if locked instances are still correct
13395 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13397 # Get instance information
13398 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13400 # Check if node groups for locked instances are still correct
13401 for instance_name in owned_instances:
13402 inst = self.instances[instance_name]
13403 assert owned_nodes.issuperset(inst.all_nodes), \
13404 "Instance %s's nodes changed while we kept the lock" % instance_name
13406 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13409 assert self.group_uuid in inst_groups, \
13410 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13412 if self.req_target_uuids:
13413 # User requested specific target groups
13414 self.target_uuids = self.req_target_uuids
13416 # All groups except the one to be evacuated are potential targets
13417 self.target_uuids = [group_uuid for group_uuid in owned_groups
13418 if group_uuid != self.group_uuid]
13420 if not self.target_uuids:
13421 raise errors.OpPrereqError("There are no possible target groups",
13422 errors.ECODE_INVAL)
13424 def BuildHooksEnv(self):
13425 """Build hooks env.
13429 "GROUP_NAME": self.op.group_name,
13430 "TARGET_GROUPS": " ".join(self.target_uuids),
13433 def BuildHooksNodes(self):
13434 """Build hooks nodes.
13437 mn = self.cfg.GetMasterNode()
13439 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13441 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13443 return (run_nodes, run_nodes)
13445 def Exec(self, feedback_fn):
13446 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13448 assert self.group_uuid not in self.target_uuids
13450 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13451 instances=instances, target_groups=self.target_uuids)
13453 ial.Run(self.op.iallocator)
13455 if not ial.success:
13456 raise errors.OpPrereqError("Can't compute group evacuation using"
13457 " iallocator '%s': %s" %
13458 (self.op.iallocator, ial.info),
13459 errors.ECODE_NORES)
13461 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13463 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13464 len(jobs), self.op.group_name)
13466 return ResultWithJobs(jobs)
13469 class TagsLU(NoHooksLU): # pylint: disable=W0223
13470 """Generic tags LU.
13472 This is an abstract class which is the parent of all the other tags LUs.
13475 def ExpandNames(self):
13476 self.group_uuid = None
13477 self.needed_locks = {}
13478 if self.op.kind == constants.TAG_NODE:
13479 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13480 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13481 elif self.op.kind == constants.TAG_INSTANCE:
13482 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13483 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13484 elif self.op.kind == constants.TAG_NODEGROUP:
13485 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13487 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13488 # not possible to acquire the BGL based on opcode parameters)
13490 def CheckPrereq(self):
13491 """Check prerequisites.
13494 if self.op.kind == constants.TAG_CLUSTER:
13495 self.target = self.cfg.GetClusterInfo()
13496 elif self.op.kind == constants.TAG_NODE:
13497 self.target = self.cfg.GetNodeInfo(self.op.name)
13498 elif self.op.kind == constants.TAG_INSTANCE:
13499 self.target = self.cfg.GetInstanceInfo(self.op.name)
13500 elif self.op.kind == constants.TAG_NODEGROUP:
13501 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13503 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13504 str(self.op.kind), errors.ECODE_INVAL)
13507 class LUTagsGet(TagsLU):
13508 """Returns the tags of a given object.
13513 def ExpandNames(self):
13514 TagsLU.ExpandNames(self)
13516 # Share locks as this is only a read operation
13517 self.share_locks = _ShareAll()
13519 def Exec(self, feedback_fn):
13520 """Returns the tag list.
13523 return list(self.target.GetTags())
13526 class LUTagsSearch(NoHooksLU):
13527 """Searches the tags for a given pattern.
13532 def ExpandNames(self):
13533 self.needed_locks = {}
13535 def CheckPrereq(self):
13536 """Check prerequisites.
13538 This checks the pattern passed for validity by compiling it.
13542 self.re = re.compile(self.op.pattern)
13543 except re.error, err:
13544 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13545 (self.op.pattern, err), errors.ECODE_INVAL)
13547 def Exec(self, feedback_fn):
13548 """Returns the tag list.
13552 tgts = [("/cluster", cfg.GetClusterInfo())]
13553 ilist = cfg.GetAllInstancesInfo().values()
13554 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13555 nlist = cfg.GetAllNodesInfo().values()
13556 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13557 tgts.extend(("/nodegroup/%s" % n.name, n)
13558 for n in cfg.GetAllNodeGroupsInfo().values())
13560 for path, target in tgts:
13561 for tag in target.GetTags():
13562 if self.re.search(tag):
13563 results.append((path, tag))
13567 class LUTagsSet(TagsLU):
13568 """Sets a tag on a given object.
13573 def CheckPrereq(self):
13574 """Check prerequisites.
13576 This checks the type and length of the tag name and value.
13579 TagsLU.CheckPrereq(self)
13580 for tag in self.op.tags:
13581 objects.TaggableObject.ValidateTag(tag)
13583 def Exec(self, feedback_fn):
13588 for tag in self.op.tags:
13589 self.target.AddTag(tag)
13590 except errors.TagError, err:
13591 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13592 self.cfg.Update(self.target, feedback_fn)
13595 class LUTagsDel(TagsLU):
13596 """Delete a list of tags from a given object.
13601 def CheckPrereq(self):
13602 """Check prerequisites.
13604 This checks that we have the given tag.
13607 TagsLU.CheckPrereq(self)
13608 for tag in self.op.tags:
13609 objects.TaggableObject.ValidateTag(tag)
13610 del_tags = frozenset(self.op.tags)
13611 cur_tags = self.target.GetTags()
13613 diff_tags = del_tags - cur_tags
13615 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13616 raise errors.OpPrereqError("Tag(s) %s not found" %
13617 (utils.CommaJoin(diff_names), ),
13618 errors.ECODE_NOENT)
13620 def Exec(self, feedback_fn):
13621 """Remove the tag from the object.
13624 for tag in self.op.tags:
13625 self.target.RemoveTag(tag)
13626 self.cfg.Update(self.target, feedback_fn)
13629 class LUTestDelay(NoHooksLU):
13630 """Sleep for a specified amount of time.
13632 This LU sleeps on the master and/or nodes for a specified amount of
13638 def ExpandNames(self):
13639 """Expand names and set required locks.
13641 This expands the node list, if any.
13644 self.needed_locks = {}
13645 if self.op.on_nodes:
13646 # _GetWantedNodes can be used here, but is not always appropriate to use
13647 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13648 # more information.
13649 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13650 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13652 def _TestDelay(self):
13653 """Do the actual sleep.
13656 if self.op.on_master:
13657 if not utils.TestDelay(self.op.duration):
13658 raise errors.OpExecError("Error during master delay test")
13659 if self.op.on_nodes:
13660 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13661 for node, node_result in result.items():
13662 node_result.Raise("Failure during rpc call to node %s" % node)
13664 def Exec(self, feedback_fn):
13665 """Execute the test delay opcode, with the wanted repetitions.
13668 if self.op.repeat == 0:
13671 top_value = self.op.repeat - 1
13672 for i in range(self.op.repeat):
13673 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13677 class LUTestJqueue(NoHooksLU):
13678 """Utility LU to test some aspects of the job queue.
13683 # Must be lower than default timeout for WaitForJobChange to see whether it
13684 # notices changed jobs
13685 _CLIENT_CONNECT_TIMEOUT = 20.0
13686 _CLIENT_CONFIRM_TIMEOUT = 60.0
13689 def _NotifyUsingSocket(cls, cb, errcls):
13690 """Opens a Unix socket and waits for another program to connect.
13693 @param cb: Callback to send socket name to client
13694 @type errcls: class
13695 @param errcls: Exception class to use for errors
13698 # Using a temporary directory as there's no easy way to create temporary
13699 # sockets without writing a custom loop around tempfile.mktemp and
13701 tmpdir = tempfile.mkdtemp()
13703 tmpsock = utils.PathJoin(tmpdir, "sock")
13705 logging.debug("Creating temporary socket at %s", tmpsock)
13706 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13711 # Send details to client
13714 # Wait for client to connect before continuing
13715 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13717 (conn, _) = sock.accept()
13718 except socket.error, err:
13719 raise errcls("Client didn't connect in time (%s)" % err)
13723 # Remove as soon as client is connected
13724 shutil.rmtree(tmpdir)
13726 # Wait for client to close
13729 # pylint: disable=E1101
13730 # Instance of '_socketobject' has no ... member
13731 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13733 except socket.error, err:
13734 raise errcls("Client failed to confirm notification (%s)" % err)
13738 def _SendNotification(self, test, arg, sockname):
13739 """Sends a notification to the client.
13742 @param test: Test name
13743 @param arg: Test argument (depends on test)
13744 @type sockname: string
13745 @param sockname: Socket path
13748 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13750 def _Notify(self, prereq, test, arg):
13751 """Notifies the client of a test.
13754 @param prereq: Whether this is a prereq-phase test
13756 @param test: Test name
13757 @param arg: Test argument (depends on test)
13761 errcls = errors.OpPrereqError
13763 errcls = errors.OpExecError
13765 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13769 def CheckArguments(self):
13770 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13771 self.expandnames_calls = 0
13773 def ExpandNames(self):
13774 checkargs_calls = getattr(self, "checkargs_calls", 0)
13775 if checkargs_calls < 1:
13776 raise errors.ProgrammerError("CheckArguments was not called")
13778 self.expandnames_calls += 1
13780 if self.op.notify_waitlock:
13781 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13783 self.LogInfo("Expanding names")
13785 # Get lock on master node (just to get a lock, not for a particular reason)
13786 self.needed_locks = {
13787 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13790 def Exec(self, feedback_fn):
13791 if self.expandnames_calls < 1:
13792 raise errors.ProgrammerError("ExpandNames was not called")
13794 if self.op.notify_exec:
13795 self._Notify(False, constants.JQT_EXEC, None)
13797 self.LogInfo("Executing")
13799 if self.op.log_messages:
13800 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13801 for idx, msg in enumerate(self.op.log_messages):
13802 self.LogInfo("Sending log message %s", idx + 1)
13803 feedback_fn(constants.JQT_MSGPREFIX + msg)
13804 # Report how many test messages have been sent
13805 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13808 raise errors.OpExecError("Opcode failure was requested")
13813 class IAllocator(object):
13814 """IAllocator framework.
13816 An IAllocator instance has three sets of attributes:
13817 - cfg that is needed to query the cluster
13818 - input data (all members of the _KEYS class attribute are required)
13819 - four buffer attributes (in|out_data|text), that represent the
13820 input (to the external script) in text and data structure format,
13821 and the output from it, again in two formats
13822 - the result variables from the script (success, info, nodes) for
13826 # pylint: disable=R0902
13827 # lots of instance attributes
13829 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13831 self.rpc = rpc_runner
13832 # init buffer variables
13833 self.in_text = self.out_text = self.in_data = self.out_data = None
13834 # init all input fields so that pylint is happy
13836 self.memory = self.disks = self.disk_template = None
13837 self.os = self.tags = self.nics = self.vcpus = None
13838 self.hypervisor = None
13839 self.relocate_from = None
13841 self.instances = None
13842 self.evac_mode = None
13843 self.target_groups = []
13845 self.required_nodes = None
13846 # init result fields
13847 self.success = self.info = self.result = None
13850 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13852 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13853 " IAllocator" % self.mode)
13855 keyset = [n for (n, _) in keydata]
13858 if key not in keyset:
13859 raise errors.ProgrammerError("Invalid input parameter '%s' to"
13860 " IAllocator" % key)
13861 setattr(self, key, kwargs[key])
13864 if key not in kwargs:
13865 raise errors.ProgrammerError("Missing input parameter '%s' to"
13866 " IAllocator" % key)
13867 self._BuildInputData(compat.partial(fn, self), keydata)
13869 def _ComputeClusterData(self):
13870 """Compute the generic allocator input data.
13872 This is the data that is independent of the actual operation.
13876 cluster_info = cfg.GetClusterInfo()
13879 "version": constants.IALLOCATOR_VERSION,
13880 "cluster_name": cfg.GetClusterName(),
13881 "cluster_tags": list(cluster_info.GetTags()),
13882 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13883 # we don't have job IDs
13885 ninfo = cfg.GetAllNodesInfo()
13886 iinfo = cfg.GetAllInstancesInfo().values()
13887 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13890 node_list = [n.name for n in ninfo.values() if n.vm_capable]
13892 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13893 hypervisor_name = self.hypervisor
13894 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13895 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13897 hypervisor_name = cluster_info.primary_hypervisor
13899 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13902 self.rpc.call_all_instances_info(node_list,
13903 cluster_info.enabled_hypervisors)
13905 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13907 config_ndata = self._ComputeBasicNodeData(ninfo)
13908 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13909 i_list, config_ndata)
13910 assert len(data["nodes"]) == len(ninfo), \
13911 "Incomplete node data computed"
13913 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13915 self.in_data = data
13918 def _ComputeNodeGroupData(cfg):
13919 """Compute node groups data.
13922 ng = dict((guuid, {
13923 "name": gdata.name,
13924 "alloc_policy": gdata.alloc_policy,
13926 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13931 def _ComputeBasicNodeData(node_cfg):
13932 """Compute global node data.
13935 @returns: a dict of name: (node dict, node config)
13938 # fill in static (config-based) values
13939 node_results = dict((ninfo.name, {
13940 "tags": list(ninfo.GetTags()),
13941 "primary_ip": ninfo.primary_ip,
13942 "secondary_ip": ninfo.secondary_ip,
13943 "offline": ninfo.offline,
13944 "drained": ninfo.drained,
13945 "master_candidate": ninfo.master_candidate,
13946 "group": ninfo.group,
13947 "master_capable": ninfo.master_capable,
13948 "vm_capable": ninfo.vm_capable,
13950 for ninfo in node_cfg.values())
13952 return node_results
13955 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13957 """Compute global node data.
13959 @param node_results: the basic node structures as filled from the config
13962 #TODO(dynmem): compute the right data on MAX and MIN memory
13963 # make a copy of the current dict
13964 node_results = dict(node_results)
13965 for nname, nresult in node_data.items():
13966 assert nname in node_results, "Missing basic data for node %s" % nname
13967 ninfo = node_cfg[nname]
13969 if not (ninfo.offline or ninfo.drained):
13970 nresult.Raise("Can't get data for node %s" % nname)
13971 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13973 remote_info = _MakeLegacyNodeInfo(nresult.payload)
13975 for attr in ["memory_total", "memory_free", "memory_dom0",
13976 "vg_size", "vg_free", "cpu_total"]:
13977 if attr not in remote_info:
13978 raise errors.OpExecError("Node '%s' didn't return attribute"
13979 " '%s'" % (nname, attr))
13980 if not isinstance(remote_info[attr], int):
13981 raise errors.OpExecError("Node '%s' returned invalid value"
13983 (nname, attr, remote_info[attr]))
13984 # compute memory used by primary instances
13985 i_p_mem = i_p_up_mem = 0
13986 for iinfo, beinfo in i_list:
13987 if iinfo.primary_node == nname:
13988 i_p_mem += beinfo[constants.BE_MAXMEM]
13989 if iinfo.name not in node_iinfo[nname].payload:
13992 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13993 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13994 remote_info["memory_free"] -= max(0, i_mem_diff)
13996 if iinfo.admin_state == constants.ADMINST_UP:
13997 i_p_up_mem += beinfo[constants.BE_MAXMEM]
13999 # compute memory used by instances
14001 "total_memory": remote_info["memory_total"],
14002 "reserved_memory": remote_info["memory_dom0"],
14003 "free_memory": remote_info["memory_free"],
14004 "total_disk": remote_info["vg_size"],
14005 "free_disk": remote_info["vg_free"],
14006 "total_cpus": remote_info["cpu_total"],
14007 "i_pri_memory": i_p_mem,
14008 "i_pri_up_memory": i_p_up_mem,
14010 pnr_dyn.update(node_results[nname])
14011 node_results[nname] = pnr_dyn
14013 return node_results
14016 def _ComputeInstanceData(cluster_info, i_list):
14017 """Compute global instance data.
14021 for iinfo, beinfo in i_list:
14023 for nic in iinfo.nics:
14024 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14028 "mode": filled_params[constants.NIC_MODE],
14029 "link": filled_params[constants.NIC_LINK],
14031 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14032 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14033 nic_data.append(nic_dict)
14035 "tags": list(iinfo.GetTags()),
14036 "admin_state": iinfo.admin_state,
14037 "vcpus": beinfo[constants.BE_VCPUS],
14038 "memory": beinfo[constants.BE_MAXMEM],
14040 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14042 "disks": [{constants.IDISK_SIZE: dsk.size,
14043 constants.IDISK_MODE: dsk.mode}
14044 for dsk in iinfo.disks],
14045 "disk_template": iinfo.disk_template,
14046 "hypervisor": iinfo.hypervisor,
14048 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14050 instance_data[iinfo.name] = pir
14052 return instance_data
14054 def _AddNewInstance(self):
14055 """Add new instance data to allocator structure.
14057 This in combination with _AllocatorGetClusterData will create the
14058 correct structure needed as input for the allocator.
14060 The checks for the completeness of the opcode must have already been
14064 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14066 if self.disk_template in constants.DTS_INT_MIRROR:
14067 self.required_nodes = 2
14069 self.required_nodes = 1
14073 "disk_template": self.disk_template,
14076 "vcpus": self.vcpus,
14077 "memory": self.memory,
14078 "disks": self.disks,
14079 "disk_space_total": disk_space,
14081 "required_nodes": self.required_nodes,
14082 "hypervisor": self.hypervisor,
14087 def _AddRelocateInstance(self):
14088 """Add relocate instance data to allocator structure.
14090 This in combination with _IAllocatorGetClusterData will create the
14091 correct structure needed as input for the allocator.
14093 The checks for the completeness of the opcode must have already been
14097 instance = self.cfg.GetInstanceInfo(self.name)
14098 if instance is None:
14099 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14100 " IAllocator" % self.name)
14102 if instance.disk_template not in constants.DTS_MIRRORED:
14103 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14104 errors.ECODE_INVAL)
14106 if instance.disk_template in constants.DTS_INT_MIRROR and \
14107 len(instance.secondary_nodes) != 1:
14108 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14109 errors.ECODE_STATE)
14111 self.required_nodes = 1
14112 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14113 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14117 "disk_space_total": disk_space,
14118 "required_nodes": self.required_nodes,
14119 "relocate_from": self.relocate_from,
14123 def _AddNodeEvacuate(self):
14124 """Get data for node-evacuate requests.
14128 "instances": self.instances,
14129 "evac_mode": self.evac_mode,
14132 def _AddChangeGroup(self):
14133 """Get data for node-evacuate requests.
14137 "instances": self.instances,
14138 "target_groups": self.target_groups,
14141 def _BuildInputData(self, fn, keydata):
14142 """Build input data structures.
14145 self._ComputeClusterData()
14148 request["type"] = self.mode
14149 for keyname, keytype in keydata:
14150 if keyname not in request:
14151 raise errors.ProgrammerError("Request parameter %s is missing" %
14153 val = request[keyname]
14154 if not keytype(val):
14155 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14156 " validation, value %s, expected"
14157 " type %s" % (keyname, val, keytype))
14158 self.in_data["request"] = request
14160 self.in_text = serializer.Dump(self.in_data)
14162 _STRING_LIST = ht.TListOf(ht.TString)
14163 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14164 # pylint: disable=E1101
14165 # Class '...' has no 'OP_ID' member
14166 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14167 opcodes.OpInstanceMigrate.OP_ID,
14168 opcodes.OpInstanceReplaceDisks.OP_ID])
14172 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14173 ht.TItems([ht.TNonEmptyString,
14174 ht.TNonEmptyString,
14175 ht.TListOf(ht.TNonEmptyString),
14178 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14179 ht.TItems([ht.TNonEmptyString,
14182 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14183 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14186 constants.IALLOCATOR_MODE_ALLOC:
14189 ("name", ht.TString),
14190 ("memory", ht.TInt),
14191 ("disks", ht.TListOf(ht.TDict)),
14192 ("disk_template", ht.TString),
14193 ("os", ht.TString),
14194 ("tags", _STRING_LIST),
14195 ("nics", ht.TListOf(ht.TDict)),
14196 ("vcpus", ht.TInt),
14197 ("hypervisor", ht.TString),
14199 constants.IALLOCATOR_MODE_RELOC:
14200 (_AddRelocateInstance,
14201 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14203 constants.IALLOCATOR_MODE_NODE_EVAC:
14204 (_AddNodeEvacuate, [
14205 ("instances", _STRING_LIST),
14206 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14208 constants.IALLOCATOR_MODE_CHG_GROUP:
14209 (_AddChangeGroup, [
14210 ("instances", _STRING_LIST),
14211 ("target_groups", _STRING_LIST),
14215 def Run(self, name, validate=True, call_fn=None):
14216 """Run an instance allocator and return the results.
14219 if call_fn is None:
14220 call_fn = self.rpc.call_iallocator_runner
14222 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14223 result.Raise("Failure while running the iallocator script")
14225 self.out_text = result.payload
14227 self._ValidateResult()
14229 def _ValidateResult(self):
14230 """Process the allocator results.
14232 This will process and if successful save the result in
14233 self.out_data and the other parameters.
14237 rdict = serializer.Load(self.out_text)
14238 except Exception, err:
14239 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14241 if not isinstance(rdict, dict):
14242 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14244 # TODO: remove backwards compatiblity in later versions
14245 if "nodes" in rdict and "result" not in rdict:
14246 rdict["result"] = rdict["nodes"]
14249 for key in "success", "info", "result":
14250 if key not in rdict:
14251 raise errors.OpExecError("Can't parse iallocator results:"
14252 " missing key '%s'" % key)
14253 setattr(self, key, rdict[key])
14255 if not self._result_check(self.result):
14256 raise errors.OpExecError("Iallocator returned invalid result,"
14257 " expected %s, got %s" %
14258 (self._result_check, self.result),
14259 errors.ECODE_INVAL)
14261 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14262 assert self.relocate_from is not None
14263 assert self.required_nodes == 1
14265 node2group = dict((name, ndata["group"])
14266 for (name, ndata) in self.in_data["nodes"].items())
14268 fn = compat.partial(self._NodesToGroups, node2group,
14269 self.in_data["nodegroups"])
14271 instance = self.cfg.GetInstanceInfo(self.name)
14272 request_groups = fn(self.relocate_from + [instance.primary_node])
14273 result_groups = fn(rdict["result"] + [instance.primary_node])
14275 if self.success and not set(result_groups).issubset(request_groups):
14276 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14277 " differ from original groups (%s)" %
14278 (utils.CommaJoin(result_groups),
14279 utils.CommaJoin(request_groups)))
14281 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14282 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14284 self.out_data = rdict
14287 def _NodesToGroups(node2group, groups, nodes):
14288 """Returns a list of unique group names for a list of nodes.
14290 @type node2group: dict
14291 @param node2group: Map from node name to group UUID
14293 @param groups: Group information
14295 @param nodes: Node names
14302 group_uuid = node2group[node]
14304 # Ignore unknown node
14308 group = groups[group_uuid]
14310 # Can't find group, let's use UUID
14311 group_name = group_uuid
14313 group_name = group["name"]
14315 result.add(group_name)
14317 return sorted(result)
14320 class LUTestAllocator(NoHooksLU):
14321 """Run allocator tests.
14323 This LU runs the allocator tests
14326 def CheckPrereq(self):
14327 """Check prerequisites.
14329 This checks the opcode parameters depending on the director and mode test.
14332 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14333 for attr in ["memory", "disks", "disk_template",
14334 "os", "tags", "nics", "vcpus"]:
14335 if not hasattr(self.op, attr):
14336 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14337 attr, errors.ECODE_INVAL)
14338 iname = self.cfg.ExpandInstanceName(self.op.name)
14339 if iname is not None:
14340 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14341 iname, errors.ECODE_EXISTS)
14342 if not isinstance(self.op.nics, list):
14343 raise errors.OpPrereqError("Invalid parameter 'nics'",
14344 errors.ECODE_INVAL)
14345 if not isinstance(self.op.disks, list):
14346 raise errors.OpPrereqError("Invalid parameter 'disks'",
14347 errors.ECODE_INVAL)
14348 for row in self.op.disks:
14349 if (not isinstance(row, dict) or
14350 constants.IDISK_SIZE not in row or
14351 not isinstance(row[constants.IDISK_SIZE], int) or
14352 constants.IDISK_MODE not in row or
14353 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14354 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14355 " parameter", errors.ECODE_INVAL)
14356 if self.op.hypervisor is None:
14357 self.op.hypervisor = self.cfg.GetHypervisorType()
14358 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14359 fname = _ExpandInstanceName(self.cfg, self.op.name)
14360 self.op.name = fname
14361 self.relocate_from = \
14362 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14363 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14364 constants.IALLOCATOR_MODE_NODE_EVAC):
14365 if not self.op.instances:
14366 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14367 self.op.instances = _GetWantedInstances(self, self.op.instances)
14369 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14370 self.op.mode, errors.ECODE_INVAL)
14372 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14373 if self.op.allocator is None:
14374 raise errors.OpPrereqError("Missing allocator name",
14375 errors.ECODE_INVAL)
14376 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14377 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14378 self.op.direction, errors.ECODE_INVAL)
14380 def Exec(self, feedback_fn):
14381 """Run the allocator test.
14384 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14385 ial = IAllocator(self.cfg, self.rpc,
14388 memory=self.op.memory,
14389 disks=self.op.disks,
14390 disk_template=self.op.disk_template,
14394 vcpus=self.op.vcpus,
14395 hypervisor=self.op.hypervisor,
14397 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14398 ial = IAllocator(self.cfg, self.rpc,
14401 relocate_from=list(self.relocate_from),
14403 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14404 ial = IAllocator(self.cfg, self.rpc,
14406 instances=self.op.instances,
14407 target_groups=self.op.target_groups)
14408 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14409 ial = IAllocator(self.cfg, self.rpc,
14411 instances=self.op.instances,
14412 evac_mode=self.op.evac_mode)
14414 raise errors.ProgrammerError("Uncatched mode %s in"
14415 " LUTestAllocator.Exec", self.op.mode)
14417 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14418 result = ial.in_text
14420 ial.Run(self.op.allocator, validate=False)
14421 result = ial.out_text
14425 #: Query type implementations
14427 constants.QR_INSTANCE: _InstanceQuery,
14428 constants.QR_NODE: _NodeQuery,
14429 constants.QR_GROUP: _GroupQuery,
14430 constants.QR_OS: _OsQuery,
14433 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14436 def _GetQueryImplementation(name):
14437 """Returns the implemtnation for a query type.
14439 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14443 return _QUERY_IMPL[name]
14445 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14446 errors.ECODE_INVAL)