4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1040 nic_count, disk_sizes,
1041 _check_spec_fn=_CheckMinMaxSpecs):
1042 """Verifies ipolicy against provided specs.
1045 @param ipolicy: The ipolicy
1047 @param mem_size: The memory size
1048 @type cpu_count: int
1049 @param cpu_count: Used cpu cores
1050 @type disk_count: int
1051 @param disk_count: Number of disks used
1052 @type nic_count: int
1053 @param nic_count: Number of nics used
1054 @type disk_sizes: list of ints
1055 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1056 @param _check_spec_fn: The checking function (unittest only)
1057 @return: A list of violations, or an empty list of no violations are found
1060 assert disk_count == len(disk_sizes)
1063 (constants.ISPEC_MEM_SIZE, mem_size),
1064 (constants.ISPEC_CPU_COUNT, cpu_count),
1065 (constants.ISPEC_DISK_COUNT, disk_count),
1066 (constants.ISPEC_NIC_COUNT, nic_count),
1067 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1070 (_check_spec_fn(name, ipolicy, value)
1071 for (name, value) in test_settings))
1074 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1075 _compute_fn=_ComputeIPolicySpecViolation):
1076 """Compute if instance meets the specs of ipolicy.
1079 @param ipolicy: The ipolicy to verify against
1080 @type instance: L{objects.Instance}
1081 @param instance: The instance to verify
1082 @param _compute_fn: The function to verify ipolicy (unittest only)
1083 @see: L{_ComputeIPolicySpecViolation}
1086 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1087 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1088 disk_count = len(instance.disks)
1089 disk_sizes = [disk.size for disk in instance.disks]
1090 nic_count = len(instance.nics)
1092 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1096 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1097 _compute_fn=_ComputeIPolicySpecViolation):
1098 """Compute if instance specs meets the specs of ipolicy.
1101 @param ipolicy: The ipolicy to verify against
1102 @param instance_spec: dict
1103 @param instance_spec: The instance spec to verify
1104 @param _compute_fn: The function to verify ipolicy (unittest only)
1105 @see: L{_ComputeIPolicySpecViolation}
1108 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1109 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1110 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1111 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1112 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1114 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1118 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1120 _compute_fn=_ComputeIPolicyInstanceViolation):
1121 """Compute if instance meets the specs of the new target group.
1123 @param ipolicy: The ipolicy to verify
1124 @param instance: The instance object to verify
1125 @param current_group: The current group of the instance
1126 @param target_group: The new group of the instance
1127 @param _compute_fn: The function to verify ipolicy (unittest only)
1128 @see: L{_ComputeIPolicySpecViolation}
1131 if current_group == target_group:
1134 return _compute_fn(ipolicy, instance)
1137 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1138 _compute_fn=_ComputeIPolicyNodeViolation):
1139 """Checks that the target node is correct in terms of instance policy.
1141 @param ipolicy: The ipolicy to verify
1142 @param instance: The instance object to verify
1143 @param node: The new node to relocate
1144 @param ignore: Ignore violations of the ipolicy
1145 @param _compute_fn: The function to verify ipolicy (unittest only)
1146 @see: L{_ComputeIPolicySpecViolation}
1149 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1150 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1153 msg = ("Instance does not meet target node group's (%s) instance"
1154 " policy: %s") % (node.group, utils.CommaJoin(res))
1158 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1161 def _ExpandItemName(fn, name, kind):
1162 """Expand an item name.
1164 @param fn: the function to use for expansion
1165 @param name: requested item name
1166 @param kind: text description ('Node' or 'Instance')
1167 @return: the resolved (full) name
1168 @raise errors.OpPrereqError: if the item is not found
1171 full_name = fn(name)
1172 if full_name is None:
1173 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1178 def _ExpandNodeName(cfg, name):
1179 """Wrapper over L{_ExpandItemName} for nodes."""
1180 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1183 def _ExpandInstanceName(cfg, name):
1184 """Wrapper over L{_ExpandItemName} for instance."""
1185 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1188 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1189 minmem, maxmem, vcpus, nics, disk_template, disks,
1190 bep, hvp, hypervisor_name, tags):
1191 """Builds instance related env variables for hooks
1193 This builds the hook environment from individual variables.
1196 @param name: the name of the instance
1197 @type primary_node: string
1198 @param primary_node: the name of the instance's primary node
1199 @type secondary_nodes: list
1200 @param secondary_nodes: list of secondary nodes as strings
1201 @type os_type: string
1202 @param os_type: the name of the instance's OS
1203 @type status: string
1204 @param status: the desired status of the instance
1205 @type minmem: string
1206 @param minmem: the minimum memory size of the instance
1207 @type maxmem: string
1208 @param maxmem: the maximum memory size of the instance
1210 @param vcpus: the count of VCPUs the instance has
1212 @param nics: list of tuples (ip, mac, mode, link) representing
1213 the NICs the instance has
1214 @type disk_template: string
1215 @param disk_template: the disk template of the instance
1217 @param disks: the list of (size, mode) pairs
1219 @param bep: the backend parameters for the instance
1221 @param hvp: the hypervisor parameters for the instance
1222 @type hypervisor_name: string
1223 @param hypervisor_name: the hypervisor for the instance
1225 @param tags: list of instance tags as strings
1227 @return: the hook environment for this instance
1232 "INSTANCE_NAME": name,
1233 "INSTANCE_PRIMARY": primary_node,
1234 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1235 "INSTANCE_OS_TYPE": os_type,
1236 "INSTANCE_STATUS": status,
1237 "INSTANCE_MINMEM": minmem,
1238 "INSTANCE_MAXMEM": maxmem,
1239 # TODO(2.7) remove deprecated "memory" value
1240 "INSTANCE_MEMORY": maxmem,
1241 "INSTANCE_VCPUS": vcpus,
1242 "INSTANCE_DISK_TEMPLATE": disk_template,
1243 "INSTANCE_HYPERVISOR": hypervisor_name,
1246 nic_count = len(nics)
1247 for idx, (ip, mac, mode, link) in enumerate(nics):
1250 env["INSTANCE_NIC%d_IP" % idx] = ip
1251 env["INSTANCE_NIC%d_MAC" % idx] = mac
1252 env["INSTANCE_NIC%d_MODE" % idx] = mode
1253 env["INSTANCE_NIC%d_LINK" % idx] = link
1254 if mode == constants.NIC_MODE_BRIDGED:
1255 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1259 env["INSTANCE_NIC_COUNT"] = nic_count
1262 disk_count = len(disks)
1263 for idx, (size, mode) in enumerate(disks):
1264 env["INSTANCE_DISK%d_SIZE" % idx] = size
1265 env["INSTANCE_DISK%d_MODE" % idx] = mode
1269 env["INSTANCE_DISK_COUNT"] = disk_count
1274 env["INSTANCE_TAGS"] = " ".join(tags)
1276 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1277 for key, value in source.items():
1278 env["INSTANCE_%s_%s" % (kind, key)] = value
1283 def _NICListToTuple(lu, nics):
1284 """Build a list of nic information tuples.
1286 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1287 value in LUInstanceQueryData.
1289 @type lu: L{LogicalUnit}
1290 @param lu: the logical unit on whose behalf we execute
1291 @type nics: list of L{objects.NIC}
1292 @param nics: list of nics to convert to hooks tuples
1296 cluster = lu.cfg.GetClusterInfo()
1300 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1301 mode = filled_params[constants.NIC_MODE]
1302 link = filled_params[constants.NIC_LINK]
1303 hooks_nics.append((ip, mac, mode, link))
1307 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1308 """Builds instance related env variables for hooks from an object.
1310 @type lu: L{LogicalUnit}
1311 @param lu: the logical unit on whose behalf we execute
1312 @type instance: L{objects.Instance}
1313 @param instance: the instance for which we should build the
1315 @type override: dict
1316 @param override: dictionary with key/values that will override
1319 @return: the hook environment dictionary
1322 cluster = lu.cfg.GetClusterInfo()
1323 bep = cluster.FillBE(instance)
1324 hvp = cluster.FillHV(instance)
1326 "name": instance.name,
1327 "primary_node": instance.primary_node,
1328 "secondary_nodes": instance.secondary_nodes,
1329 "os_type": instance.os,
1330 "status": instance.admin_state,
1331 "maxmem": bep[constants.BE_MAXMEM],
1332 "minmem": bep[constants.BE_MINMEM],
1333 "vcpus": bep[constants.BE_VCPUS],
1334 "nics": _NICListToTuple(lu, instance.nics),
1335 "disk_template": instance.disk_template,
1336 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1339 "hypervisor_name": instance.hypervisor,
1340 "tags": instance.tags,
1343 args.update(override)
1344 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1347 def _AdjustCandidatePool(lu, exceptions):
1348 """Adjust the candidate pool after node operations.
1351 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1353 lu.LogInfo("Promoted nodes to master candidate role: %s",
1354 utils.CommaJoin(node.name for node in mod_list))
1355 for name in mod_list:
1356 lu.context.ReaddNode(name)
1357 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1359 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1363 def _DecideSelfPromotion(lu, exceptions=None):
1364 """Decide whether I should promote myself as a master candidate.
1367 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1368 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1369 # the new node will increase mc_max with one, so:
1370 mc_should = min(mc_should + 1, cp_size)
1371 return mc_now < mc_should
1374 def _CalculateGroupIPolicy(cluster, group):
1375 """Calculate instance policy for group.
1378 return cluster.SimpleFillIPolicy(group.ipolicy)
1381 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1382 """Check that the brigdes needed by a list of nics exist.
1385 cluster = lu.cfg.GetClusterInfo()
1386 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1387 brlist = [params[constants.NIC_LINK] for params in paramslist
1388 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1390 result = lu.rpc.call_bridges_exist(target_node, brlist)
1391 result.Raise("Error checking bridges on destination node '%s'" %
1392 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1395 def _CheckInstanceBridgesExist(lu, instance, node=None):
1396 """Check that the brigdes needed by an instance exist.
1400 node = instance.primary_node
1401 _CheckNicsBridgesExist(lu, instance.nics, node)
1404 def _CheckOSVariant(os_obj, name):
1405 """Check whether an OS name conforms to the os variants specification.
1407 @type os_obj: L{objects.OS}
1408 @param os_obj: OS object to check
1410 @param name: OS name passed by the user, to check for validity
1413 variant = objects.OS.GetVariant(name)
1414 if not os_obj.supported_variants:
1416 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1417 " passed)" % (os_obj.name, variant),
1421 raise errors.OpPrereqError("OS name must include a variant",
1424 if variant not in os_obj.supported_variants:
1425 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1428 def _GetNodeInstancesInner(cfg, fn):
1429 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1432 def _GetNodeInstances(cfg, node_name):
1433 """Returns a list of all primary and secondary instances on a node.
1437 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1440 def _GetNodePrimaryInstances(cfg, node_name):
1441 """Returns primary instances on a node.
1444 return _GetNodeInstancesInner(cfg,
1445 lambda inst: node_name == inst.primary_node)
1448 def _GetNodeSecondaryInstances(cfg, node_name):
1449 """Returns secondary instances on a node.
1452 return _GetNodeInstancesInner(cfg,
1453 lambda inst: node_name in inst.secondary_nodes)
1456 def _GetStorageTypeArgs(cfg, storage_type):
1457 """Returns the arguments for a storage type.
1460 # Special case for file storage
1461 if storage_type == constants.ST_FILE:
1462 # storage.FileStorage wants a list of storage directories
1463 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1468 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1471 for dev in instance.disks:
1472 cfg.SetDiskID(dev, node_name)
1474 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1475 result.Raise("Failed to get disk status from node %s" % node_name,
1476 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1478 for idx, bdev_status in enumerate(result.payload):
1479 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1485 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1486 """Check the sanity of iallocator and node arguments and use the
1487 cluster-wide iallocator if appropriate.
1489 Check that at most one of (iallocator, node) is specified. If none is
1490 specified, then the LU's opcode's iallocator slot is filled with the
1491 cluster-wide default iallocator.
1493 @type iallocator_slot: string
1494 @param iallocator_slot: the name of the opcode iallocator slot
1495 @type node_slot: string
1496 @param node_slot: the name of the opcode target node slot
1499 node = getattr(lu.op, node_slot, None)
1500 iallocator = getattr(lu.op, iallocator_slot, None)
1502 if node is not None and iallocator is not None:
1503 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1505 elif node is None and iallocator is None:
1506 default_iallocator = lu.cfg.GetDefaultIAllocator()
1507 if default_iallocator:
1508 setattr(lu.op, iallocator_slot, default_iallocator)
1510 raise errors.OpPrereqError("No iallocator or node given and no"
1511 " cluster-wide default iallocator found;"
1512 " please specify either an iallocator or a"
1513 " node, or set a cluster-wide default"
1517 def _GetDefaultIAllocator(cfg, iallocator):
1518 """Decides on which iallocator to use.
1520 @type cfg: L{config.ConfigWriter}
1521 @param cfg: Cluster configuration object
1522 @type iallocator: string or None
1523 @param iallocator: Iallocator specified in opcode
1525 @return: Iallocator name
1529 # Use default iallocator
1530 iallocator = cfg.GetDefaultIAllocator()
1533 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1534 " opcode nor as a cluster-wide default",
1540 class LUClusterPostInit(LogicalUnit):
1541 """Logical unit for running hooks after cluster initialization.
1544 HPATH = "cluster-init"
1545 HTYPE = constants.HTYPE_CLUSTER
1547 def BuildHooksEnv(self):
1552 "OP_TARGET": self.cfg.GetClusterName(),
1555 def BuildHooksNodes(self):
1556 """Build hooks nodes.
1559 return ([], [self.cfg.GetMasterNode()])
1561 def Exec(self, feedback_fn):
1568 class LUClusterDestroy(LogicalUnit):
1569 """Logical unit for destroying the cluster.
1572 HPATH = "cluster-destroy"
1573 HTYPE = constants.HTYPE_CLUSTER
1575 def BuildHooksEnv(self):
1580 "OP_TARGET": self.cfg.GetClusterName(),
1583 def BuildHooksNodes(self):
1584 """Build hooks nodes.
1589 def CheckPrereq(self):
1590 """Check prerequisites.
1592 This checks whether the cluster is empty.
1594 Any errors are signaled by raising errors.OpPrereqError.
1597 master = self.cfg.GetMasterNode()
1599 nodelist = self.cfg.GetNodeList()
1600 if len(nodelist) != 1 or nodelist[0] != master:
1601 raise errors.OpPrereqError("There are still %d node(s) in"
1602 " this cluster." % (len(nodelist) - 1),
1604 instancelist = self.cfg.GetInstanceList()
1606 raise errors.OpPrereqError("There are still %d instance(s) in"
1607 " this cluster." % len(instancelist),
1610 def Exec(self, feedback_fn):
1611 """Destroys the cluster.
1614 master_params = self.cfg.GetMasterNetworkParameters()
1616 # Run post hooks on master node before it's removed
1617 _RunPostHook(self, master_params.name)
1619 ems = self.cfg.GetUseExternalMipScript()
1620 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1623 self.LogWarning("Error disabling the master IP address: %s",
1626 return master_params.name
1629 def _VerifyCertificate(filename):
1630 """Verifies a certificate for L{LUClusterVerifyConfig}.
1632 @type filename: string
1633 @param filename: Path to PEM file
1637 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1638 utils.ReadFile(filename))
1639 except Exception, err: # pylint: disable=W0703
1640 return (LUClusterVerifyConfig.ETYPE_ERROR,
1641 "Failed to load X509 certificate %s: %s" % (filename, err))
1644 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1645 constants.SSL_CERT_EXPIRATION_ERROR)
1648 fnamemsg = "While verifying %s: %s" % (filename, msg)
1653 return (None, fnamemsg)
1654 elif errcode == utils.CERT_WARNING:
1655 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1656 elif errcode == utils.CERT_ERROR:
1657 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1659 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1662 def _GetAllHypervisorParameters(cluster, instances):
1663 """Compute the set of all hypervisor parameters.
1665 @type cluster: L{objects.Cluster}
1666 @param cluster: the cluster object
1667 @param instances: list of L{objects.Instance}
1668 @param instances: additional instances from which to obtain parameters
1669 @rtype: list of (origin, hypervisor, parameters)
1670 @return: a list with all parameters found, indicating the hypervisor they
1671 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1676 for hv_name in cluster.enabled_hypervisors:
1677 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1679 for os_name, os_hvp in cluster.os_hvp.items():
1680 for hv_name, hv_params in os_hvp.items():
1682 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1683 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1685 # TODO: collapse identical parameter values in a single one
1686 for instance in instances:
1687 if instance.hvparams:
1688 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1689 cluster.FillHV(instance)))
1694 class _VerifyErrors(object):
1695 """Mix-in for cluster/group verify LUs.
1697 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1698 self.op and self._feedback_fn to be available.)
1702 ETYPE_FIELD = "code"
1703 ETYPE_ERROR = "ERROR"
1704 ETYPE_WARNING = "WARNING"
1706 def _Error(self, ecode, item, msg, *args, **kwargs):
1707 """Format an error message.
1709 Based on the opcode's error_codes parameter, either format a
1710 parseable error code, or a simpler error string.
1712 This must be called only from Exec and functions called from Exec.
1715 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1716 itype, etxt, _ = ecode
1717 # first complete the msg
1720 # then format the whole message
1721 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1722 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1728 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1729 # and finally report it via the feedback_fn
1730 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1732 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1733 """Log an error message if the passed condition is True.
1737 or self.op.debug_simulate_errors) # pylint: disable=E1101
1739 # If the error code is in the list of ignored errors, demote the error to a
1741 (_, etxt, _) = ecode
1742 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1743 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1746 self._Error(ecode, *args, **kwargs)
1748 # do not mark the operation as failed for WARN cases only
1749 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1750 self.bad = self.bad or cond
1753 class LUClusterVerify(NoHooksLU):
1754 """Submits all jobs necessary to verify the cluster.
1759 def ExpandNames(self):
1760 self.needed_locks = {}
1762 def Exec(self, feedback_fn):
1765 if self.op.group_name:
1766 groups = [self.op.group_name]
1767 depends_fn = lambda: None
1769 groups = self.cfg.GetNodeGroupList()
1771 # Verify global configuration
1773 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1776 # Always depend on global verification
1777 depends_fn = lambda: [(-len(jobs), [])]
1779 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1780 ignore_errors=self.op.ignore_errors,
1781 depends=depends_fn())]
1782 for group in groups)
1784 # Fix up all parameters
1785 for op in itertools.chain(*jobs): # pylint: disable=W0142
1786 op.debug_simulate_errors = self.op.debug_simulate_errors
1787 op.verbose = self.op.verbose
1788 op.error_codes = self.op.error_codes
1790 op.skip_checks = self.op.skip_checks
1791 except AttributeError:
1792 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1794 return ResultWithJobs(jobs)
1797 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1798 """Verifies the cluster config.
1803 def _VerifyHVP(self, hvp_data):
1804 """Verifies locally the syntax of the hypervisor parameters.
1807 for item, hv_name, hv_params in hvp_data:
1808 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1811 hv_class = hypervisor.GetHypervisor(hv_name)
1812 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1813 hv_class.CheckParameterSyntax(hv_params)
1814 except errors.GenericError, err:
1815 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1817 def ExpandNames(self):
1818 # Information can be safely retrieved as the BGL is acquired in exclusive
1820 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1821 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1822 self.all_node_info = self.cfg.GetAllNodesInfo()
1823 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1824 self.needed_locks = {}
1826 def Exec(self, feedback_fn):
1827 """Verify integrity of cluster, performing various test on nodes.
1831 self._feedback_fn = feedback_fn
1833 feedback_fn("* Verifying cluster config")
1835 for msg in self.cfg.VerifyConfig():
1836 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1838 feedback_fn("* Verifying cluster certificate files")
1840 for cert_filename in constants.ALL_CERT_FILES:
1841 (errcode, msg) = _VerifyCertificate(cert_filename)
1842 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1844 feedback_fn("* Verifying hypervisor parameters")
1846 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1847 self.all_inst_info.values()))
1849 feedback_fn("* Verifying all nodes belong to an existing group")
1851 # We do this verification here because, should this bogus circumstance
1852 # occur, it would never be caught by VerifyGroup, which only acts on
1853 # nodes/instances reachable from existing node groups.
1855 dangling_nodes = set(node.name for node in self.all_node_info.values()
1856 if node.group not in self.all_group_info)
1858 dangling_instances = {}
1859 no_node_instances = []
1861 for inst in self.all_inst_info.values():
1862 if inst.primary_node in dangling_nodes:
1863 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1864 elif inst.primary_node not in self.all_node_info:
1865 no_node_instances.append(inst.name)
1870 utils.CommaJoin(dangling_instances.get(node.name,
1872 for node in dangling_nodes]
1874 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1876 "the following nodes (and their instances) belong to a non"
1877 " existing group: %s", utils.CommaJoin(pretty_dangling))
1879 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1881 "the following instances have a non-existing primary-node:"
1882 " %s", utils.CommaJoin(no_node_instances))
1887 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1888 """Verifies the status of a node group.
1891 HPATH = "cluster-verify"
1892 HTYPE = constants.HTYPE_CLUSTER
1895 _HOOKS_INDENT_RE = re.compile("^", re.M)
1897 class NodeImage(object):
1898 """A class representing the logical and physical status of a node.
1901 @ivar name: the node name to which this object refers
1902 @ivar volumes: a structure as returned from
1903 L{ganeti.backend.GetVolumeList} (runtime)
1904 @ivar instances: a list of running instances (runtime)
1905 @ivar pinst: list of configured primary instances (config)
1906 @ivar sinst: list of configured secondary instances (config)
1907 @ivar sbp: dictionary of {primary-node: list of instances} for all
1908 instances for which this node is secondary (config)
1909 @ivar mfree: free memory, as reported by hypervisor (runtime)
1910 @ivar dfree: free disk, as reported by the node (runtime)
1911 @ivar offline: the offline status (config)
1912 @type rpc_fail: boolean
1913 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1914 not whether the individual keys were correct) (runtime)
1915 @type lvm_fail: boolean
1916 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1917 @type hyp_fail: boolean
1918 @ivar hyp_fail: whether the RPC call didn't return the instance list
1919 @type ghost: boolean
1920 @ivar ghost: whether this is a known node or not (config)
1921 @type os_fail: boolean
1922 @ivar os_fail: whether the RPC call didn't return valid OS data
1924 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1925 @type vm_capable: boolean
1926 @ivar vm_capable: whether the node can host instances
1929 def __init__(self, offline=False, name=None, vm_capable=True):
1938 self.offline = offline
1939 self.vm_capable = vm_capable
1940 self.rpc_fail = False
1941 self.lvm_fail = False
1942 self.hyp_fail = False
1944 self.os_fail = False
1947 def ExpandNames(self):
1948 # This raises errors.OpPrereqError on its own:
1949 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1951 # Get instances in node group; this is unsafe and needs verification later
1952 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1954 self.needed_locks = {
1955 locking.LEVEL_INSTANCE: inst_names,
1956 locking.LEVEL_NODEGROUP: [self.group_uuid],
1957 locking.LEVEL_NODE: [],
1960 self.share_locks = _ShareAll()
1962 def DeclareLocks(self, level):
1963 if level == locking.LEVEL_NODE:
1964 # Get members of node group; this is unsafe and needs verification later
1965 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1967 all_inst_info = self.cfg.GetAllInstancesInfo()
1969 # In Exec(), we warn about mirrored instances that have primary and
1970 # secondary living in separate node groups. To fully verify that
1971 # volumes for these instances are healthy, we will need to do an
1972 # extra call to their secondaries. We ensure here those nodes will
1974 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1975 # Important: access only the instances whose lock is owned
1976 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1977 nodes.update(all_inst_info[inst].secondary_nodes)
1979 self.needed_locks[locking.LEVEL_NODE] = nodes
1981 def CheckPrereq(self):
1982 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1983 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1985 group_nodes = set(self.group_info.members)
1986 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1989 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1991 unlocked_instances = \
1992 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1995 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1996 utils.CommaJoin(unlocked_nodes))
1998 if unlocked_instances:
1999 raise errors.OpPrereqError("Missing lock for instances: %s" %
2000 utils.CommaJoin(unlocked_instances))
2002 self.all_node_info = self.cfg.GetAllNodesInfo()
2003 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2005 self.my_node_names = utils.NiceSort(group_nodes)
2006 self.my_inst_names = utils.NiceSort(group_instances)
2008 self.my_node_info = dict((name, self.all_node_info[name])
2009 for name in self.my_node_names)
2011 self.my_inst_info = dict((name, self.all_inst_info[name])
2012 for name in self.my_inst_names)
2014 # We detect here the nodes that will need the extra RPC calls for verifying
2015 # split LV volumes; they should be locked.
2016 extra_lv_nodes = set()
2018 for inst in self.my_inst_info.values():
2019 if inst.disk_template in constants.DTS_INT_MIRROR:
2020 group = self.my_node_info[inst.primary_node].group
2021 for nname in inst.secondary_nodes:
2022 if self.all_node_info[nname].group != group:
2023 extra_lv_nodes.add(nname)
2025 unlocked_lv_nodes = \
2026 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2028 if unlocked_lv_nodes:
2029 raise errors.OpPrereqError("these nodes could be locked: %s" %
2030 utils.CommaJoin(unlocked_lv_nodes))
2031 self.extra_lv_nodes = list(extra_lv_nodes)
2033 def _VerifyNode(self, ninfo, nresult):
2034 """Perform some basic validation on data returned from a node.
2036 - check the result data structure is well formed and has all the
2038 - check ganeti version
2040 @type ninfo: L{objects.Node}
2041 @param ninfo: the node to check
2042 @param nresult: the results from the node
2044 @return: whether overall this call was successful (and we can expect
2045 reasonable values in the respose)
2049 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2051 # main result, nresult should be a non-empty dict
2052 test = not nresult or not isinstance(nresult, dict)
2053 _ErrorIf(test, constants.CV_ENODERPC, node,
2054 "unable to verify node: no data returned")
2058 # compares ganeti version
2059 local_version = constants.PROTOCOL_VERSION
2060 remote_version = nresult.get("version", None)
2061 test = not (remote_version and
2062 isinstance(remote_version, (list, tuple)) and
2063 len(remote_version) == 2)
2064 _ErrorIf(test, constants.CV_ENODERPC, node,
2065 "connection to node returned invalid data")
2069 test = local_version != remote_version[0]
2070 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2071 "incompatible protocol versions: master %s,"
2072 " node %s", local_version, remote_version[0])
2076 # node seems compatible, we can actually try to look into its results
2078 # full package version
2079 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2080 constants.CV_ENODEVERSION, node,
2081 "software version mismatch: master %s, node %s",
2082 constants.RELEASE_VERSION, remote_version[1],
2083 code=self.ETYPE_WARNING)
2085 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2086 if ninfo.vm_capable and isinstance(hyp_result, dict):
2087 for hv_name, hv_result in hyp_result.iteritems():
2088 test = hv_result is not None
2089 _ErrorIf(test, constants.CV_ENODEHV, node,
2090 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2092 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2093 if ninfo.vm_capable and isinstance(hvp_result, list):
2094 for item, hv_name, hv_result in hvp_result:
2095 _ErrorIf(True, constants.CV_ENODEHV, node,
2096 "hypervisor %s parameter verify failure (source %s): %s",
2097 hv_name, item, hv_result)
2099 test = nresult.get(constants.NV_NODESETUP,
2100 ["Missing NODESETUP results"])
2101 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2106 def _VerifyNodeTime(self, ninfo, nresult,
2107 nvinfo_starttime, nvinfo_endtime):
2108 """Check the node time.
2110 @type ninfo: L{objects.Node}
2111 @param ninfo: the node to check
2112 @param nresult: the remote results for the node
2113 @param nvinfo_starttime: the start time of the RPC call
2114 @param nvinfo_endtime: the end time of the RPC call
2118 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2120 ntime = nresult.get(constants.NV_TIME, None)
2122 ntime_merged = utils.MergeTime(ntime)
2123 except (ValueError, TypeError):
2124 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2127 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2128 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2129 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2130 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2134 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2135 "Node time diverges by at least %s from master node time",
2138 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2139 """Check the node LVM results.
2141 @type ninfo: L{objects.Node}
2142 @param ninfo: the node to check
2143 @param nresult: the remote results for the node
2144 @param vg_name: the configured VG name
2151 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2153 # checks vg existence and size > 20G
2154 vglist = nresult.get(constants.NV_VGLIST, None)
2156 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2158 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2159 constants.MIN_VG_SIZE)
2160 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2163 pvlist = nresult.get(constants.NV_PVLIST, None)
2164 test = pvlist is None
2165 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2167 # check that ':' is not present in PV names, since it's a
2168 # special character for lvcreate (denotes the range of PEs to
2170 for _, pvname, owner_vg in pvlist:
2171 test = ":" in pvname
2172 _ErrorIf(test, constants.CV_ENODELVM, node,
2173 "Invalid character ':' in PV '%s' of VG '%s'",
2176 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2177 """Check the node bridges.
2179 @type ninfo: L{objects.Node}
2180 @param ninfo: the node to check
2181 @param nresult: the remote results for the node
2182 @param bridges: the expected list of bridges
2189 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2191 missing = nresult.get(constants.NV_BRIDGES, None)
2192 test = not isinstance(missing, list)
2193 _ErrorIf(test, constants.CV_ENODENET, node,
2194 "did not return valid bridge information")
2196 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2197 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2199 def _VerifyNodeUserScripts(self, ninfo, nresult):
2200 """Check the results of user scripts presence and executability on the node
2202 @type ninfo: L{objects.Node}
2203 @param ninfo: the node to check
2204 @param nresult: the remote results for the node
2209 test = not constants.NV_USERSCRIPTS in nresult
2210 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2211 "did not return user scripts information")
2213 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2215 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2216 "user scripts not present or not executable: %s" %
2217 utils.CommaJoin(sorted(broken_scripts)))
2219 def _VerifyNodeNetwork(self, ninfo, nresult):
2220 """Check the node network connectivity results.
2222 @type ninfo: L{objects.Node}
2223 @param ninfo: the node to check
2224 @param nresult: the remote results for the node
2228 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2230 test = constants.NV_NODELIST not in nresult
2231 _ErrorIf(test, constants.CV_ENODESSH, node,
2232 "node hasn't returned node ssh connectivity data")
2234 if nresult[constants.NV_NODELIST]:
2235 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2236 _ErrorIf(True, constants.CV_ENODESSH, node,
2237 "ssh communication with node '%s': %s", a_node, a_msg)
2239 test = constants.NV_NODENETTEST not in nresult
2240 _ErrorIf(test, constants.CV_ENODENET, node,
2241 "node hasn't returned node tcp connectivity data")
2243 if nresult[constants.NV_NODENETTEST]:
2244 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2246 _ErrorIf(True, constants.CV_ENODENET, node,
2247 "tcp communication with node '%s': %s",
2248 anode, nresult[constants.NV_NODENETTEST][anode])
2250 test = constants.NV_MASTERIP not in nresult
2251 _ErrorIf(test, constants.CV_ENODENET, node,
2252 "node hasn't returned node master IP reachability data")
2254 if not nresult[constants.NV_MASTERIP]:
2255 if node == self.master_node:
2256 msg = "the master node cannot reach the master IP (not configured?)"
2258 msg = "cannot reach the master IP"
2259 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2261 def _VerifyInstance(self, instance, instanceconfig, node_image,
2263 """Verify an instance.
2265 This function checks to see if the required block devices are
2266 available on the instance's node.
2269 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2270 node_current = instanceconfig.primary_node
2272 node_vol_should = {}
2273 instanceconfig.MapLVsByNode(node_vol_should)
2275 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2276 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2277 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2279 for node in node_vol_should:
2280 n_img = node_image[node]
2281 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2282 # ignore missing volumes on offline or broken nodes
2284 for volume in node_vol_should[node]:
2285 test = volume not in n_img.volumes
2286 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2287 "volume %s missing on node %s", volume, node)
2289 if instanceconfig.admin_state == constants.ADMINST_UP:
2290 pri_img = node_image[node_current]
2291 test = instance not in pri_img.instances and not pri_img.offline
2292 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2293 "instance not running on its primary node %s",
2296 diskdata = [(nname, success, status, idx)
2297 for (nname, disks) in diskstatus.items()
2298 for idx, (success, status) in enumerate(disks)]
2300 for nname, success, bdev_status, idx in diskdata:
2301 # the 'ghost node' construction in Exec() ensures that we have a
2303 snode = node_image[nname]
2304 bad_snode = snode.ghost or snode.offline
2305 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2306 not success and not bad_snode,
2307 constants.CV_EINSTANCEFAULTYDISK, instance,
2308 "couldn't retrieve status for disk/%s on %s: %s",
2309 idx, nname, bdev_status)
2310 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2311 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2312 constants.CV_EINSTANCEFAULTYDISK, instance,
2313 "disk/%s on %s is faulty", idx, nname)
2315 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2316 """Verify if there are any unknown volumes in the cluster.
2318 The .os, .swap and backup volumes are ignored. All other volumes are
2319 reported as unknown.
2321 @type reserved: L{ganeti.utils.FieldSet}
2322 @param reserved: a FieldSet of reserved volume names
2325 for node, n_img in node_image.items():
2326 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2327 # skip non-healthy nodes
2329 for volume in n_img.volumes:
2330 test = ((node not in node_vol_should or
2331 volume not in node_vol_should[node]) and
2332 not reserved.Matches(volume))
2333 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2334 "volume %s is unknown", volume)
2336 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2337 """Verify N+1 Memory Resilience.
2339 Check that if one single node dies we can still start all the
2340 instances it was primary for.
2343 cluster_info = self.cfg.GetClusterInfo()
2344 for node, n_img in node_image.items():
2345 # This code checks that every node which is now listed as
2346 # secondary has enough memory to host all instances it is
2347 # supposed to should a single other node in the cluster fail.
2348 # FIXME: not ready for failover to an arbitrary node
2349 # FIXME: does not support file-backed instances
2350 # WARNING: we currently take into account down instances as well
2351 # as up ones, considering that even if they're down someone
2352 # might want to start them even in the event of a node failure.
2354 # we're skipping offline nodes from the N+1 warning, since
2355 # most likely we don't have good memory infromation from them;
2356 # we already list instances living on such nodes, and that's
2359 #TODO(dynmem): use MINMEM for checking
2360 #TODO(dynmem): also consider ballooning out other instances
2361 for prinode, instances in n_img.sbp.items():
2363 for instance in instances:
2364 bep = cluster_info.FillBE(instance_cfg[instance])
2365 if bep[constants.BE_AUTO_BALANCE]:
2366 needed_mem += bep[constants.BE_MAXMEM]
2367 test = n_img.mfree < needed_mem
2368 self._ErrorIf(test, constants.CV_ENODEN1, node,
2369 "not enough memory to accomodate instance failovers"
2370 " should node %s fail (%dMiB needed, %dMiB available)",
2371 prinode, needed_mem, n_img.mfree)
2374 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2375 (files_all, files_opt, files_mc, files_vm)):
2376 """Verifies file checksums collected from all nodes.
2378 @param errorif: Callback for reporting errors
2379 @param nodeinfo: List of L{objects.Node} objects
2380 @param master_node: Name of master node
2381 @param all_nvinfo: RPC results
2384 # Define functions determining which nodes to consider for a file
2387 (files_mc, lambda node: (node.master_candidate or
2388 node.name == master_node)),
2389 (files_vm, lambda node: node.vm_capable),
2392 # Build mapping from filename to list of nodes which should have the file
2394 for (files, fn) in files2nodefn:
2396 filenodes = nodeinfo
2398 filenodes = filter(fn, nodeinfo)
2399 nodefiles.update((filename,
2400 frozenset(map(operator.attrgetter("name"), filenodes)))
2401 for filename in files)
2403 assert set(nodefiles) == (files_all | files_mc | files_vm)
2405 fileinfo = dict((filename, {}) for filename in nodefiles)
2406 ignore_nodes = set()
2408 for node in nodeinfo:
2410 ignore_nodes.add(node.name)
2413 nresult = all_nvinfo[node.name]
2415 if nresult.fail_msg or not nresult.payload:
2418 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2420 test = not (node_files and isinstance(node_files, dict))
2421 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2422 "Node did not return file checksum data")
2424 ignore_nodes.add(node.name)
2427 # Build per-checksum mapping from filename to nodes having it
2428 for (filename, checksum) in node_files.items():
2429 assert filename in nodefiles
2430 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2432 for (filename, checksums) in fileinfo.items():
2433 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2435 # Nodes having the file
2436 with_file = frozenset(node_name
2437 for nodes in fileinfo[filename].values()
2438 for node_name in nodes) - ignore_nodes
2440 expected_nodes = nodefiles[filename] - ignore_nodes
2442 # Nodes missing file
2443 missing_file = expected_nodes - with_file
2445 if filename in files_opt:
2447 errorif(missing_file and missing_file != expected_nodes,
2448 constants.CV_ECLUSTERFILECHECK, None,
2449 "File %s is optional, but it must exist on all or no"
2450 " nodes (not found on %s)",
2451 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2453 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2454 "File %s is missing from node(s) %s", filename,
2455 utils.CommaJoin(utils.NiceSort(missing_file)))
2457 # Warn if a node has a file it shouldn't
2458 unexpected = with_file - expected_nodes
2460 constants.CV_ECLUSTERFILECHECK, None,
2461 "File %s should not exist on node(s) %s",
2462 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2464 # See if there are multiple versions of the file
2465 test = len(checksums) > 1
2467 variants = ["variant %s on %s" %
2468 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2469 for (idx, (checksum, nodes)) in
2470 enumerate(sorted(checksums.items()))]
2474 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2475 "File %s found with %s different checksums (%s)",
2476 filename, len(checksums), "; ".join(variants))
2478 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2480 """Verifies and the node DRBD status.
2482 @type ninfo: L{objects.Node}
2483 @param ninfo: the node to check
2484 @param nresult: the remote results for the node
2485 @param instanceinfo: the dict of instances
2486 @param drbd_helper: the configured DRBD usermode helper
2487 @param drbd_map: the DRBD map as returned by
2488 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2492 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2496 test = (helper_result == None)
2497 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2498 "no drbd usermode helper returned")
2500 status, payload = helper_result
2502 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2503 "drbd usermode helper check unsuccessful: %s", payload)
2504 test = status and (payload != drbd_helper)
2505 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2506 "wrong drbd usermode helper: %s", payload)
2508 # compute the DRBD minors
2510 for minor, instance in drbd_map[node].items():
2511 test = instance not in instanceinfo
2512 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2513 "ghost instance '%s' in temporary DRBD map", instance)
2514 # ghost instance should not be running, but otherwise we
2515 # don't give double warnings (both ghost instance and
2516 # unallocated minor in use)
2518 node_drbd[minor] = (instance, False)
2520 instance = instanceinfo[instance]
2521 node_drbd[minor] = (instance.name,
2522 instance.admin_state == constants.ADMINST_UP)
2524 # and now check them
2525 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2526 test = not isinstance(used_minors, (tuple, list))
2527 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2528 "cannot parse drbd status file: %s", str(used_minors))
2530 # we cannot check drbd status
2533 for minor, (iname, must_exist) in node_drbd.items():
2534 test = minor not in used_minors and must_exist
2535 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2536 "drbd minor %d of instance %s is not active", minor, iname)
2537 for minor in used_minors:
2538 test = minor not in node_drbd
2539 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2540 "unallocated drbd minor %d is in use", minor)
2542 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2543 """Builds the node OS structures.
2545 @type ninfo: L{objects.Node}
2546 @param ninfo: the node to check
2547 @param nresult: the remote results for the node
2548 @param nimg: the node image object
2552 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2554 remote_os = nresult.get(constants.NV_OSLIST, None)
2555 test = (not isinstance(remote_os, list) or
2556 not compat.all(isinstance(v, list) and len(v) == 7
2557 for v in remote_os))
2559 _ErrorIf(test, constants.CV_ENODEOS, node,
2560 "node hasn't returned valid OS data")
2569 for (name, os_path, status, diagnose,
2570 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2572 if name not in os_dict:
2575 # parameters is a list of lists instead of list of tuples due to
2576 # JSON lacking a real tuple type, fix it:
2577 parameters = [tuple(v) for v in parameters]
2578 os_dict[name].append((os_path, status, diagnose,
2579 set(variants), set(parameters), set(api_ver)))
2581 nimg.oslist = os_dict
2583 def _VerifyNodeOS(self, ninfo, nimg, base):
2584 """Verifies the node OS list.
2586 @type ninfo: L{objects.Node}
2587 @param ninfo: the node to check
2588 @param nimg: the node image object
2589 @param base: the 'template' node we match against (e.g. from the master)
2593 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2595 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2597 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2598 for os_name, os_data in nimg.oslist.items():
2599 assert os_data, "Empty OS status for OS %s?!" % os_name
2600 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2601 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2602 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2603 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2604 "OS '%s' has multiple entries (first one shadows the rest): %s",
2605 os_name, utils.CommaJoin([v[0] for v in os_data]))
2606 # comparisons with the 'base' image
2607 test = os_name not in base.oslist
2608 _ErrorIf(test, constants.CV_ENODEOS, node,
2609 "Extra OS %s not present on reference node (%s)",
2613 assert base.oslist[os_name], "Base node has empty OS status?"
2614 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2616 # base OS is invalid, skipping
2618 for kind, a, b in [("API version", f_api, b_api),
2619 ("variants list", f_var, b_var),
2620 ("parameters", beautify_params(f_param),
2621 beautify_params(b_param))]:
2622 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2623 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2624 kind, os_name, base.name,
2625 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2627 # check any missing OSes
2628 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2629 _ErrorIf(missing, constants.CV_ENODEOS, node,
2630 "OSes present on reference node %s but missing on this node: %s",
2631 base.name, utils.CommaJoin(missing))
2633 def _VerifyOob(self, ninfo, nresult):
2634 """Verifies out of band functionality of a node.
2636 @type ninfo: L{objects.Node}
2637 @param ninfo: the node to check
2638 @param nresult: the remote results for the node
2642 # We just have to verify the paths on master and/or master candidates
2643 # as the oob helper is invoked on the master
2644 if ((ninfo.master_candidate or ninfo.master_capable) and
2645 constants.NV_OOB_PATHS in nresult):
2646 for path_result in nresult[constants.NV_OOB_PATHS]:
2647 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2649 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2650 """Verifies and updates the node volume data.
2652 This function will update a L{NodeImage}'s internal structures
2653 with data from the remote call.
2655 @type ninfo: L{objects.Node}
2656 @param ninfo: the node to check
2657 @param nresult: the remote results for the node
2658 @param nimg: the node image object
2659 @param vg_name: the configured VG name
2663 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2665 nimg.lvm_fail = True
2666 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2669 elif isinstance(lvdata, basestring):
2670 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2671 utils.SafeEncode(lvdata))
2672 elif not isinstance(lvdata, dict):
2673 _ErrorIf(True, constants.CV_ENODELVM, node,
2674 "rpc call to node failed (lvlist)")
2676 nimg.volumes = lvdata
2677 nimg.lvm_fail = False
2679 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2680 """Verifies and updates the node instance list.
2682 If the listing was successful, then updates this node's instance
2683 list. Otherwise, it marks the RPC call as failed for the instance
2686 @type ninfo: L{objects.Node}
2687 @param ninfo: the node to check
2688 @param nresult: the remote results for the node
2689 @param nimg: the node image object
2692 idata = nresult.get(constants.NV_INSTANCELIST, None)
2693 test = not isinstance(idata, list)
2694 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2695 "rpc call to node failed (instancelist): %s",
2696 utils.SafeEncode(str(idata)))
2698 nimg.hyp_fail = True
2700 nimg.instances = idata
2702 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2703 """Verifies and computes a node information map
2705 @type ninfo: L{objects.Node}
2706 @param ninfo: the node to check
2707 @param nresult: the remote results for the node
2708 @param nimg: the node image object
2709 @param vg_name: the configured VG name
2713 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2715 # try to read free memory (from the hypervisor)
2716 hv_info = nresult.get(constants.NV_HVINFO, None)
2717 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2718 _ErrorIf(test, constants.CV_ENODEHV, node,
2719 "rpc call to node failed (hvinfo)")
2722 nimg.mfree = int(hv_info["memory_free"])
2723 except (ValueError, TypeError):
2724 _ErrorIf(True, constants.CV_ENODERPC, node,
2725 "node returned invalid nodeinfo, check hypervisor")
2727 # FIXME: devise a free space model for file based instances as well
2728 if vg_name is not None:
2729 test = (constants.NV_VGLIST not in nresult or
2730 vg_name not in nresult[constants.NV_VGLIST])
2731 _ErrorIf(test, constants.CV_ENODELVM, node,
2732 "node didn't return data for the volume group '%s'"
2733 " - it is either missing or broken", vg_name)
2736 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2737 except (ValueError, TypeError):
2738 _ErrorIf(True, constants.CV_ENODERPC, node,
2739 "node returned invalid LVM info, check LVM status")
2741 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2742 """Gets per-disk status information for all instances.
2744 @type nodelist: list of strings
2745 @param nodelist: Node names
2746 @type node_image: dict of (name, L{objects.Node})
2747 @param node_image: Node objects
2748 @type instanceinfo: dict of (name, L{objects.Instance})
2749 @param instanceinfo: Instance objects
2750 @rtype: {instance: {node: [(succes, payload)]}}
2751 @return: a dictionary of per-instance dictionaries with nodes as
2752 keys and disk information as values; the disk information is a
2753 list of tuples (success, payload)
2756 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2759 node_disks_devonly = {}
2760 diskless_instances = set()
2761 diskless = constants.DT_DISKLESS
2763 for nname in nodelist:
2764 node_instances = list(itertools.chain(node_image[nname].pinst,
2765 node_image[nname].sinst))
2766 diskless_instances.update(inst for inst in node_instances
2767 if instanceinfo[inst].disk_template == diskless)
2768 disks = [(inst, disk)
2769 for inst in node_instances
2770 for disk in instanceinfo[inst].disks]
2773 # No need to collect data
2776 node_disks[nname] = disks
2778 # Creating copies as SetDiskID below will modify the objects and that can
2779 # lead to incorrect data returned from nodes
2780 devonly = [dev.Copy() for (_, dev) in disks]
2783 self.cfg.SetDiskID(dev, nname)
2785 node_disks_devonly[nname] = devonly
2787 assert len(node_disks) == len(node_disks_devonly)
2789 # Collect data from all nodes with disks
2790 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2793 assert len(result) == len(node_disks)
2797 for (nname, nres) in result.items():
2798 disks = node_disks[nname]
2801 # No data from this node
2802 data = len(disks) * [(False, "node offline")]
2805 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2806 "while getting disk information: %s", msg)
2808 # No data from this node
2809 data = len(disks) * [(False, msg)]
2812 for idx, i in enumerate(nres.payload):
2813 if isinstance(i, (tuple, list)) and len(i) == 2:
2816 logging.warning("Invalid result from node %s, entry %d: %s",
2818 data.append((False, "Invalid result from the remote node"))
2820 for ((inst, _), status) in zip(disks, data):
2821 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2823 # Add empty entries for diskless instances.
2824 for inst in diskless_instances:
2825 assert inst not in instdisk
2828 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2829 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2830 compat.all(isinstance(s, (tuple, list)) and
2831 len(s) == 2 for s in statuses)
2832 for inst, nnames in instdisk.items()
2833 for nname, statuses in nnames.items())
2834 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2839 def _SshNodeSelector(group_uuid, all_nodes):
2840 """Create endless iterators for all potential SSH check hosts.
2843 nodes = [node for node in all_nodes
2844 if (node.group != group_uuid and
2846 keyfunc = operator.attrgetter("group")
2848 return map(itertools.cycle,
2849 [sorted(map(operator.attrgetter("name"), names))
2850 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2854 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2855 """Choose which nodes should talk to which other nodes.
2857 We will make nodes contact all nodes in their group, and one node from
2860 @warning: This algorithm has a known issue if one node group is much
2861 smaller than others (e.g. just one node). In such a case all other
2862 nodes will talk to the single node.
2865 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2866 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2868 return (online_nodes,
2869 dict((name, sorted([i.next() for i in sel]))
2870 for name in online_nodes))
2872 def BuildHooksEnv(self):
2875 Cluster-Verify hooks just ran in the post phase and their failure makes
2876 the output be logged in the verify output and the verification to fail.
2880 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2883 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2884 for node in self.my_node_info.values())
2888 def BuildHooksNodes(self):
2889 """Build hooks nodes.
2892 return ([], self.my_node_names)
2894 def Exec(self, feedback_fn):
2895 """Verify integrity of the node group, performing various test on nodes.
2898 # This method has too many local variables. pylint: disable=R0914
2899 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2901 if not self.my_node_names:
2903 feedback_fn("* Empty node group, skipping verification")
2907 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2908 verbose = self.op.verbose
2909 self._feedback_fn = feedback_fn
2911 vg_name = self.cfg.GetVGName()
2912 drbd_helper = self.cfg.GetDRBDHelper()
2913 cluster = self.cfg.GetClusterInfo()
2914 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2915 hypervisors = cluster.enabled_hypervisors
2916 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2918 i_non_redundant = [] # Non redundant instances
2919 i_non_a_balanced = [] # Non auto-balanced instances
2920 i_offline = 0 # Count of offline instances
2921 n_offline = 0 # Count of offline nodes
2922 n_drained = 0 # Count of nodes being drained
2923 node_vol_should = {}
2925 # FIXME: verify OS list
2928 filemap = _ComputeAncillaryFiles(cluster, False)
2930 # do local checksums
2931 master_node = self.master_node = self.cfg.GetMasterNode()
2932 master_ip = self.cfg.GetMasterIP()
2934 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2937 if self.cfg.GetUseExternalMipScript():
2938 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2940 node_verify_param = {
2941 constants.NV_FILELIST:
2942 utils.UniqueSequence(filename
2943 for files in filemap
2944 for filename in files),
2945 constants.NV_NODELIST:
2946 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2947 self.all_node_info.values()),
2948 constants.NV_HYPERVISOR: hypervisors,
2949 constants.NV_HVPARAMS:
2950 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2951 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2952 for node in node_data_list
2953 if not node.offline],
2954 constants.NV_INSTANCELIST: hypervisors,
2955 constants.NV_VERSION: None,
2956 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2957 constants.NV_NODESETUP: None,
2958 constants.NV_TIME: None,
2959 constants.NV_MASTERIP: (master_node, master_ip),
2960 constants.NV_OSLIST: None,
2961 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2962 constants.NV_USERSCRIPTS: user_scripts,
2965 if vg_name is not None:
2966 node_verify_param[constants.NV_VGLIST] = None
2967 node_verify_param[constants.NV_LVLIST] = vg_name
2968 node_verify_param[constants.NV_PVLIST] = [vg_name]
2969 node_verify_param[constants.NV_DRBDLIST] = None
2972 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2975 # FIXME: this needs to be changed per node-group, not cluster-wide
2977 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2978 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2979 bridges.add(default_nicpp[constants.NIC_LINK])
2980 for instance in self.my_inst_info.values():
2981 for nic in instance.nics:
2982 full_nic = cluster.SimpleFillNIC(nic.nicparams)
2983 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2984 bridges.add(full_nic[constants.NIC_LINK])
2987 node_verify_param[constants.NV_BRIDGES] = list(bridges)
2989 # Build our expected cluster state
2990 node_image = dict((node.name, self.NodeImage(offline=node.offline,
2992 vm_capable=node.vm_capable))
2993 for node in node_data_list)
2997 for node in self.all_node_info.values():
2998 path = _SupportsOob(self.cfg, node)
2999 if path and path not in oob_paths:
3000 oob_paths.append(path)
3003 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3005 for instance in self.my_inst_names:
3006 inst_config = self.my_inst_info[instance]
3008 for nname in inst_config.all_nodes:
3009 if nname not in node_image:
3010 gnode = self.NodeImage(name=nname)
3011 gnode.ghost = (nname not in self.all_node_info)
3012 node_image[nname] = gnode
3014 inst_config.MapLVsByNode(node_vol_should)
3016 pnode = inst_config.primary_node
3017 node_image[pnode].pinst.append(instance)
3019 for snode in inst_config.secondary_nodes:
3020 nimg = node_image[snode]
3021 nimg.sinst.append(instance)
3022 if pnode not in nimg.sbp:
3023 nimg.sbp[pnode] = []
3024 nimg.sbp[pnode].append(instance)
3026 # At this point, we have the in-memory data structures complete,
3027 # except for the runtime information, which we'll gather next
3029 # Due to the way our RPC system works, exact response times cannot be
3030 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3031 # time before and after executing the request, we can at least have a time
3033 nvinfo_starttime = time.time()
3034 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3036 self.cfg.GetClusterName())
3037 nvinfo_endtime = time.time()
3039 if self.extra_lv_nodes and vg_name is not None:
3041 self.rpc.call_node_verify(self.extra_lv_nodes,
3042 {constants.NV_LVLIST: vg_name},
3043 self.cfg.GetClusterName())
3045 extra_lv_nvinfo = {}
3047 all_drbd_map = self.cfg.ComputeDRBDMap()
3049 feedback_fn("* Gathering disk information (%s nodes)" %
3050 len(self.my_node_names))
3051 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3054 feedback_fn("* Verifying configuration file consistency")
3056 # If not all nodes are being checked, we need to make sure the master node
3057 # and a non-checked vm_capable node are in the list.
3058 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3060 vf_nvinfo = all_nvinfo.copy()
3061 vf_node_info = list(self.my_node_info.values())
3062 additional_nodes = []
3063 if master_node not in self.my_node_info:
3064 additional_nodes.append(master_node)
3065 vf_node_info.append(self.all_node_info[master_node])
3066 # Add the first vm_capable node we find which is not included
3067 for node in absent_nodes:
3068 nodeinfo = self.all_node_info[node]
3069 if nodeinfo.vm_capable and not nodeinfo.offline:
3070 additional_nodes.append(node)
3071 vf_node_info.append(self.all_node_info[node])
3073 key = constants.NV_FILELIST
3074 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3075 {key: node_verify_param[key]},
3076 self.cfg.GetClusterName()))
3078 vf_nvinfo = all_nvinfo
3079 vf_node_info = self.my_node_info.values()
3081 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3083 feedback_fn("* Verifying node status")
3087 for node_i in node_data_list:
3089 nimg = node_image[node]
3093 feedback_fn("* Skipping offline node %s" % (node,))
3097 if node == master_node:
3099 elif node_i.master_candidate:
3100 ntype = "master candidate"
3101 elif node_i.drained:
3107 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3109 msg = all_nvinfo[node].fail_msg
3110 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3113 nimg.rpc_fail = True
3116 nresult = all_nvinfo[node].payload
3118 nimg.call_ok = self._VerifyNode(node_i, nresult)
3119 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3120 self._VerifyNodeNetwork(node_i, nresult)
3121 self._VerifyNodeUserScripts(node_i, nresult)
3122 self._VerifyOob(node_i, nresult)
3125 self._VerifyNodeLVM(node_i, nresult, vg_name)
3126 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3129 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3130 self._UpdateNodeInstances(node_i, nresult, nimg)
3131 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3132 self._UpdateNodeOS(node_i, nresult, nimg)
3134 if not nimg.os_fail:
3135 if refos_img is None:
3137 self._VerifyNodeOS(node_i, nimg, refos_img)
3138 self._VerifyNodeBridges(node_i, nresult, bridges)
3140 # Check whether all running instancies are primary for the node. (This
3141 # can no longer be done from _VerifyInstance below, since some of the
3142 # wrong instances could be from other node groups.)
3143 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3145 for inst in non_primary_inst:
3146 # FIXME: investigate best way to handle offline insts
3147 if inst.admin_state == constants.ADMINST_OFFLINE:
3149 feedback_fn("* Skipping offline instance %s" % inst.name)
3152 test = inst in self.all_inst_info
3153 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3154 "instance should not run on node %s", node_i.name)
3155 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3156 "node is running unknown instance %s", inst)
3158 for node, result in extra_lv_nvinfo.items():
3159 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3160 node_image[node], vg_name)
3162 feedback_fn("* Verifying instance status")
3163 for instance in self.my_inst_names:
3165 feedback_fn("* Verifying instance %s" % instance)
3166 inst_config = self.my_inst_info[instance]
3167 self._VerifyInstance(instance, inst_config, node_image,
3169 inst_nodes_offline = []
3171 pnode = inst_config.primary_node
3172 pnode_img = node_image[pnode]
3173 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3174 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3175 " primary node failed", instance)
3177 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3179 constants.CV_EINSTANCEBADNODE, instance,
3180 "instance is marked as running and lives on offline node %s",
3181 inst_config.primary_node)
3183 # If the instance is non-redundant we cannot survive losing its primary
3184 # node, so we are not N+1 compliant. On the other hand we have no disk
3185 # templates with more than one secondary so that situation is not well
3187 # FIXME: does not support file-backed instances
3188 if not inst_config.secondary_nodes:
3189 i_non_redundant.append(instance)
3191 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3192 constants.CV_EINSTANCELAYOUT,
3193 instance, "instance has multiple secondary nodes: %s",
3194 utils.CommaJoin(inst_config.secondary_nodes),
3195 code=self.ETYPE_WARNING)
3197 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3198 pnode = inst_config.primary_node
3199 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3200 instance_groups = {}
3202 for node in instance_nodes:
3203 instance_groups.setdefault(self.all_node_info[node].group,
3207 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3208 # Sort so that we always list the primary node first.
3209 for group, nodes in sorted(instance_groups.items(),
3210 key=lambda (_, nodes): pnode in nodes,
3213 self._ErrorIf(len(instance_groups) > 1,
3214 constants.CV_EINSTANCESPLITGROUPS,
3215 instance, "instance has primary and secondary nodes in"
3216 " different groups: %s", utils.CommaJoin(pretty_list),
3217 code=self.ETYPE_WARNING)
3219 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3220 i_non_a_balanced.append(instance)
3222 for snode in inst_config.secondary_nodes:
3223 s_img = node_image[snode]
3224 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3225 snode, "instance %s, connection to secondary node failed",
3229 inst_nodes_offline.append(snode)
3231 # warn that the instance lives on offline nodes
3232 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3233 "instance has offline secondary node(s) %s",
3234 utils.CommaJoin(inst_nodes_offline))
3235 # ... or ghost/non-vm_capable nodes
3236 for node in inst_config.all_nodes:
3237 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3238 instance, "instance lives on ghost node %s", node)
3239 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3240 instance, "instance lives on non-vm_capable node %s", node)
3242 feedback_fn("* Verifying orphan volumes")
3243 reserved = utils.FieldSet(*cluster.reserved_lvs)
3245 # We will get spurious "unknown volume" warnings if any node of this group
3246 # is secondary for an instance whose primary is in another group. To avoid
3247 # them, we find these instances and add their volumes to node_vol_should.
3248 for inst in self.all_inst_info.values():
3249 for secondary in inst.secondary_nodes:
3250 if (secondary in self.my_node_info
3251 and inst.name not in self.my_inst_info):
3252 inst.MapLVsByNode(node_vol_should)
3255 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3257 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3258 feedback_fn("* Verifying N+1 Memory redundancy")
3259 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3261 feedback_fn("* Other Notes")
3263 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3264 % len(i_non_redundant))
3266 if i_non_a_balanced:
3267 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3268 % len(i_non_a_balanced))
3271 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3274 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3277 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3281 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3282 """Analyze the post-hooks' result
3284 This method analyses the hook result, handles it, and sends some
3285 nicely-formatted feedback back to the user.
3287 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3288 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3289 @param hooks_results: the results of the multi-node hooks rpc call
3290 @param feedback_fn: function used send feedback back to the caller
3291 @param lu_result: previous Exec result
3292 @return: the new Exec result, based on the previous result
3296 # We only really run POST phase hooks, only for non-empty groups,
3297 # and are only interested in their results
3298 if not self.my_node_names:
3301 elif phase == constants.HOOKS_PHASE_POST:
3302 # Used to change hooks' output to proper indentation
3303 feedback_fn("* Hooks Results")
3304 assert hooks_results, "invalid result from hooks"
3306 for node_name in hooks_results:
3307 res = hooks_results[node_name]
3309 test = msg and not res.offline
3310 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3311 "Communication failure in hooks execution: %s", msg)
3312 if res.offline or msg:
3313 # No need to investigate payload if node is offline or gave
3316 for script, hkr, output in res.payload:
3317 test = hkr == constants.HKR_FAIL
3318 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3319 "Script %s failed, output:", script)
3321 output = self._HOOKS_INDENT_RE.sub(" ", output)
3322 feedback_fn("%s" % output)
3328 class LUClusterVerifyDisks(NoHooksLU):
3329 """Verifies the cluster disks status.
3334 def ExpandNames(self):
3335 self.share_locks = _ShareAll()
3336 self.needed_locks = {
3337 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3340 def Exec(self, feedback_fn):
3341 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3343 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3344 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3345 for group in group_names])
3348 class LUGroupVerifyDisks(NoHooksLU):
3349 """Verifies the status of all disks in a node group.
3354 def ExpandNames(self):
3355 # Raises errors.OpPrereqError on its own if group can't be found
3356 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3358 self.share_locks = _ShareAll()
3359 self.needed_locks = {
3360 locking.LEVEL_INSTANCE: [],
3361 locking.LEVEL_NODEGROUP: [],
3362 locking.LEVEL_NODE: [],
3365 def DeclareLocks(self, level):
3366 if level == locking.LEVEL_INSTANCE:
3367 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3369 # Lock instances optimistically, needs verification once node and group
3370 # locks have been acquired
3371 self.needed_locks[locking.LEVEL_INSTANCE] = \
3372 self.cfg.GetNodeGroupInstances(self.group_uuid)
3374 elif level == locking.LEVEL_NODEGROUP:
3375 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3377 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3378 set([self.group_uuid] +
3379 # Lock all groups used by instances optimistically; this requires
3380 # going via the node before it's locked, requiring verification
3383 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3384 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3386 elif level == locking.LEVEL_NODE:
3387 # This will only lock the nodes in the group to be verified which contain
3389 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3390 self._LockInstancesNodes()
3392 # Lock all nodes in group to be verified
3393 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3394 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3395 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3397 def CheckPrereq(self):
3398 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3399 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3400 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3402 assert self.group_uuid in owned_groups
3404 # Check if locked instances are still correct
3405 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3407 # Get instance information
3408 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3410 # Check if node groups for locked instances are still correct
3411 for (instance_name, inst) in self.instances.items():
3412 assert owned_nodes.issuperset(inst.all_nodes), \
3413 "Instance %s's nodes changed while we kept the lock" % instance_name
3415 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3418 assert self.group_uuid in inst_groups, \
3419 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3421 def Exec(self, feedback_fn):
3422 """Verify integrity of cluster disks.
3424 @rtype: tuple of three items
3425 @return: a tuple of (dict of node-to-node_error, list of instances
3426 which need activate-disks, dict of instance: (node, volume) for
3431 res_instances = set()
3434 nv_dict = _MapInstanceDisksToNodes([inst
3435 for inst in self.instances.values()
3436 if inst.admin_state == constants.ADMINST_UP])
3439 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3440 set(self.cfg.GetVmCapableNodeList()))
3442 node_lvs = self.rpc.call_lv_list(nodes, [])
3444 for (node, node_res) in node_lvs.items():
3445 if node_res.offline:
3448 msg = node_res.fail_msg
3450 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3451 res_nodes[node] = msg
3454 for lv_name, (_, _, lv_online) in node_res.payload.items():
3455 inst = nv_dict.pop((node, lv_name), None)
3456 if not (lv_online or inst is None):
3457 res_instances.add(inst)
3459 # any leftover items in nv_dict are missing LVs, let's arrange the data
3461 for key, inst in nv_dict.iteritems():
3462 res_missing.setdefault(inst, []).append(list(key))
3464 return (res_nodes, list(res_instances), res_missing)
3467 class LUClusterRepairDiskSizes(NoHooksLU):
3468 """Verifies the cluster disks sizes.
3473 def ExpandNames(self):
3474 if self.op.instances:
3475 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3476 self.needed_locks = {
3477 locking.LEVEL_NODE_RES: [],
3478 locking.LEVEL_INSTANCE: self.wanted_names,
3480 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3482 self.wanted_names = None
3483 self.needed_locks = {
3484 locking.LEVEL_NODE_RES: locking.ALL_SET,
3485 locking.LEVEL_INSTANCE: locking.ALL_SET,
3487 self.share_locks = {
3488 locking.LEVEL_NODE_RES: 1,
3489 locking.LEVEL_INSTANCE: 0,
3492 def DeclareLocks(self, level):
3493 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3494 self._LockInstancesNodes(primary_only=True, level=level)
3496 def CheckPrereq(self):
3497 """Check prerequisites.
3499 This only checks the optional instance list against the existing names.
3502 if self.wanted_names is None:
3503 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3505 self.wanted_instances = \
3506 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3508 def _EnsureChildSizes(self, disk):
3509 """Ensure children of the disk have the needed disk size.
3511 This is valid mainly for DRBD8 and fixes an issue where the
3512 children have smaller disk size.
3514 @param disk: an L{ganeti.objects.Disk} object
3517 if disk.dev_type == constants.LD_DRBD8:
3518 assert disk.children, "Empty children for DRBD8?"
3519 fchild = disk.children[0]
3520 mismatch = fchild.size < disk.size
3522 self.LogInfo("Child disk has size %d, parent %d, fixing",
3523 fchild.size, disk.size)
3524 fchild.size = disk.size
3526 # and we recurse on this child only, not on the metadev
3527 return self._EnsureChildSizes(fchild) or mismatch
3531 def Exec(self, feedback_fn):
3532 """Verify the size of cluster disks.
3535 # TODO: check child disks too
3536 # TODO: check differences in size between primary/secondary nodes
3538 for instance in self.wanted_instances:
3539 pnode = instance.primary_node
3540 if pnode not in per_node_disks:
3541 per_node_disks[pnode] = []
3542 for idx, disk in enumerate(instance.disks):
3543 per_node_disks[pnode].append((instance, idx, disk))
3545 assert not (frozenset(per_node_disks.keys()) -
3546 self.owned_locks(locking.LEVEL_NODE_RES)), \
3547 "Not owning correct locks"
3548 assert not self.owned_locks(locking.LEVEL_NODE)
3551 for node, dskl in per_node_disks.items():
3552 newl = [v[2].Copy() for v in dskl]
3554 self.cfg.SetDiskID(dsk, node)
3555 result = self.rpc.call_blockdev_getsize(node, newl)
3557 self.LogWarning("Failure in blockdev_getsize call to node"
3558 " %s, ignoring", node)
3560 if len(result.payload) != len(dskl):
3561 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3562 " result.payload=%s", node, len(dskl), result.payload)
3563 self.LogWarning("Invalid result from node %s, ignoring node results",
3566 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3568 self.LogWarning("Disk %d of instance %s did not return size"
3569 " information, ignoring", idx, instance.name)
3571 if not isinstance(size, (int, long)):
3572 self.LogWarning("Disk %d of instance %s did not return valid"
3573 " size information, ignoring", idx, instance.name)
3576 if size != disk.size:
3577 self.LogInfo("Disk %d of instance %s has mismatched size,"
3578 " correcting: recorded %d, actual %d", idx,
3579 instance.name, disk.size, size)
3581 self.cfg.Update(instance, feedback_fn)
3582 changed.append((instance.name, idx, size))
3583 if self._EnsureChildSizes(disk):
3584 self.cfg.Update(instance, feedback_fn)
3585 changed.append((instance.name, idx, disk.size))
3589 class LUClusterRename(LogicalUnit):
3590 """Rename the cluster.
3593 HPATH = "cluster-rename"
3594 HTYPE = constants.HTYPE_CLUSTER
3596 def BuildHooksEnv(self):
3601 "OP_TARGET": self.cfg.GetClusterName(),
3602 "NEW_NAME": self.op.name,
3605 def BuildHooksNodes(self):
3606 """Build hooks nodes.
3609 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3611 def CheckPrereq(self):
3612 """Verify that the passed name is a valid one.
3615 hostname = netutils.GetHostname(name=self.op.name,
3616 family=self.cfg.GetPrimaryIPFamily())
3618 new_name = hostname.name
3619 self.ip = new_ip = hostname.ip
3620 old_name = self.cfg.GetClusterName()
3621 old_ip = self.cfg.GetMasterIP()
3622 if new_name == old_name and new_ip == old_ip:
3623 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3624 " cluster has changed",
3626 if new_ip != old_ip:
3627 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3628 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3629 " reachable on the network" %
3630 new_ip, errors.ECODE_NOTUNIQUE)
3632 self.op.name = new_name
3634 def Exec(self, feedback_fn):
3635 """Rename the cluster.
3638 clustername = self.op.name
3641 # shutdown the master IP
3642 master_params = self.cfg.GetMasterNetworkParameters()
3643 ems = self.cfg.GetUseExternalMipScript()
3644 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3646 result.Raise("Could not disable the master role")
3649 cluster = self.cfg.GetClusterInfo()
3650 cluster.cluster_name = clustername
3651 cluster.master_ip = new_ip
3652 self.cfg.Update(cluster, feedback_fn)
3654 # update the known hosts file
3655 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3656 node_list = self.cfg.GetOnlineNodeList()
3658 node_list.remove(master_params.name)
3661 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3663 master_params.ip = new_ip
3664 result = self.rpc.call_node_activate_master_ip(master_params.name,
3666 msg = result.fail_msg
3668 self.LogWarning("Could not re-enable the master role on"
3669 " the master, please restart manually: %s", msg)
3674 def _ValidateNetmask(cfg, netmask):
3675 """Checks if a netmask is valid.
3677 @type cfg: L{config.ConfigWriter}
3678 @param cfg: The cluster configuration
3680 @param netmask: the netmask to be verified
3681 @raise errors.OpPrereqError: if the validation fails
3684 ip_family = cfg.GetPrimaryIPFamily()
3686 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3687 except errors.ProgrammerError:
3688 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3690 if not ipcls.ValidateNetmask(netmask):
3691 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3695 class LUClusterSetParams(LogicalUnit):
3696 """Change the parameters of the cluster.
3699 HPATH = "cluster-modify"
3700 HTYPE = constants.HTYPE_CLUSTER
3703 def CheckArguments(self):
3707 if self.op.uid_pool:
3708 uidpool.CheckUidPool(self.op.uid_pool)
3710 if self.op.add_uids:
3711 uidpool.CheckUidPool(self.op.add_uids)
3713 if self.op.remove_uids:
3714 uidpool.CheckUidPool(self.op.remove_uids)
3716 if self.op.master_netmask is not None:
3717 _ValidateNetmask(self.cfg, self.op.master_netmask)
3719 if self.op.diskparams:
3720 for dt_params in self.op.diskparams.values():
3721 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3723 def ExpandNames(self):
3724 # FIXME: in the future maybe other cluster params won't require checking on
3725 # all nodes to be modified.
3726 self.needed_locks = {
3727 locking.LEVEL_NODE: locking.ALL_SET,
3729 self.share_locks[locking.LEVEL_NODE] = 1
3731 def BuildHooksEnv(self):
3736 "OP_TARGET": self.cfg.GetClusterName(),
3737 "NEW_VG_NAME": self.op.vg_name,
3740 def BuildHooksNodes(self):
3741 """Build hooks nodes.
3744 mn = self.cfg.GetMasterNode()
3747 def CheckPrereq(self):
3748 """Check prerequisites.
3750 This checks whether the given params don't conflict and
3751 if the given volume group is valid.
3754 if self.op.vg_name is not None and not self.op.vg_name:
3755 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3756 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3757 " instances exist", errors.ECODE_INVAL)
3759 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3760 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3761 raise errors.OpPrereqError("Cannot disable drbd helper while"
3762 " drbd-based instances exist",
3765 node_list = self.owned_locks(locking.LEVEL_NODE)
3767 # if vg_name not None, checks given volume group on all nodes
3769 vglist = self.rpc.call_vg_list(node_list)
3770 for node in node_list:
3771 msg = vglist[node].fail_msg
3773 # ignoring down node
3774 self.LogWarning("Error while gathering data on node %s"
3775 " (ignoring node): %s", node, msg)
3777 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3779 constants.MIN_VG_SIZE)
3781 raise errors.OpPrereqError("Error on node '%s': %s" %
3782 (node, vgstatus), errors.ECODE_ENVIRON)
3784 if self.op.drbd_helper:
3785 # checks given drbd helper on all nodes
3786 helpers = self.rpc.call_drbd_helper(node_list)
3787 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3789 self.LogInfo("Not checking drbd helper on offline node %s", node)
3791 msg = helpers[node].fail_msg
3793 raise errors.OpPrereqError("Error checking drbd helper on node"
3794 " '%s': %s" % (node, msg),
3795 errors.ECODE_ENVIRON)
3796 node_helper = helpers[node].payload
3797 if node_helper != self.op.drbd_helper:
3798 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3799 (node, node_helper), errors.ECODE_ENVIRON)
3801 self.cluster = cluster = self.cfg.GetClusterInfo()
3802 # validate params changes
3803 if self.op.beparams:
3804 objects.UpgradeBeParams(self.op.beparams)
3805 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3806 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3808 if self.op.ndparams:
3809 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3810 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3812 # TODO: we need a more general way to handle resetting
3813 # cluster-level parameters to default values
3814 if self.new_ndparams["oob_program"] == "":
3815 self.new_ndparams["oob_program"] = \
3816 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3818 if self.op.hv_state:
3819 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3820 self.cluster.hv_state_static)
3821 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3822 for hv, values in new_hv_state.items())
3824 if self.op.disk_state:
3825 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3826 self.cluster.disk_state_static)
3827 self.new_disk_state = \
3828 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3829 for name, values in svalues.items()))
3830 for storage, svalues in new_disk_state.items())
3834 for key, value in self.op.ipolicy.items():
3835 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3836 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3838 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3839 self.new_ipolicy = ipolicy
3841 if self.op.nicparams:
3842 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3843 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3844 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3847 # check all instances for consistency
3848 for instance in self.cfg.GetAllInstancesInfo().values():
3849 for nic_idx, nic in enumerate(instance.nics):
3850 params_copy = copy.deepcopy(nic.nicparams)
3851 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3853 # check parameter syntax
3855 objects.NIC.CheckParameterSyntax(params_filled)
3856 except errors.ConfigurationError, err:
3857 nic_errors.append("Instance %s, nic/%d: %s" %
3858 (instance.name, nic_idx, err))
3860 # if we're moving instances to routed, check that they have an ip
3861 target_mode = params_filled[constants.NIC_MODE]
3862 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3863 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3864 " address" % (instance.name, nic_idx))
3866 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3867 "\n".join(nic_errors))
3869 # hypervisor list/parameters
3870 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3871 if self.op.hvparams:
3872 for hv_name, hv_dict in self.op.hvparams.items():
3873 if hv_name not in self.new_hvparams:
3874 self.new_hvparams[hv_name] = hv_dict
3876 self.new_hvparams[hv_name].update(hv_dict)
3878 # disk template parameters
3879 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3880 if self.op.diskparams:
3881 for dt_name, dt_params in self.op.diskparams.items():
3882 if dt_name not in self.op.diskparams:
3883 self.new_diskparams[dt_name] = dt_params
3885 self.new_diskparams[dt_name].update(dt_params)
3887 # os hypervisor parameters
3888 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3890 for os_name, hvs in self.op.os_hvp.items():
3891 if os_name not in self.new_os_hvp:
3892 self.new_os_hvp[os_name] = hvs
3894 for hv_name, hv_dict in hvs.items():
3895 if hv_name not in self.new_os_hvp[os_name]:
3896 self.new_os_hvp[os_name][hv_name] = hv_dict
3898 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3901 self.new_osp = objects.FillDict(cluster.osparams, {})
3902 if self.op.osparams:
3903 for os_name, osp in self.op.osparams.items():
3904 if os_name not in self.new_osp:
3905 self.new_osp[os_name] = {}
3907 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3910 if not self.new_osp[os_name]:
3911 # we removed all parameters
3912 del self.new_osp[os_name]
3914 # check the parameter validity (remote check)
3915 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3916 os_name, self.new_osp[os_name])
3918 # changes to the hypervisor list
3919 if self.op.enabled_hypervisors is not None:
3920 self.hv_list = self.op.enabled_hypervisors
3921 for hv in self.hv_list:
3922 # if the hypervisor doesn't already exist in the cluster
3923 # hvparams, we initialize it to empty, and then (in both
3924 # cases) we make sure to fill the defaults, as we might not
3925 # have a complete defaults list if the hypervisor wasn't
3927 if hv not in new_hvp:
3929 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3930 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3932 self.hv_list = cluster.enabled_hypervisors
3934 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3935 # either the enabled list has changed, or the parameters have, validate
3936 for hv_name, hv_params in self.new_hvparams.items():
3937 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3938 (self.op.enabled_hypervisors and
3939 hv_name in self.op.enabled_hypervisors)):
3940 # either this is a new hypervisor, or its parameters have changed
3941 hv_class = hypervisor.GetHypervisor(hv_name)
3942 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3943 hv_class.CheckParameterSyntax(hv_params)
3944 _CheckHVParams(self, node_list, hv_name, hv_params)
3947 # no need to check any newly-enabled hypervisors, since the
3948 # defaults have already been checked in the above code-block
3949 for os_name, os_hvp in self.new_os_hvp.items():
3950 for hv_name, hv_params in os_hvp.items():
3951 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3952 # we need to fill in the new os_hvp on top of the actual hv_p
3953 cluster_defaults = self.new_hvparams.get(hv_name, {})
3954 new_osp = objects.FillDict(cluster_defaults, hv_params)
3955 hv_class = hypervisor.GetHypervisor(hv_name)
3956 hv_class.CheckParameterSyntax(new_osp)
3957 _CheckHVParams(self, node_list, hv_name, new_osp)
3959 if self.op.default_iallocator:
3960 alloc_script = utils.FindFile(self.op.default_iallocator,
3961 constants.IALLOCATOR_SEARCH_PATH,
3963 if alloc_script is None:
3964 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3965 " specified" % self.op.default_iallocator,
3968 def Exec(self, feedback_fn):
3969 """Change the parameters of the cluster.
3972 if self.op.vg_name is not None:
3973 new_volume = self.op.vg_name
3976 if new_volume != self.cfg.GetVGName():
3977 self.cfg.SetVGName(new_volume)
3979 feedback_fn("Cluster LVM configuration already in desired"
3980 " state, not changing")
3981 if self.op.drbd_helper is not None:
3982 new_helper = self.op.drbd_helper
3985 if new_helper != self.cfg.GetDRBDHelper():
3986 self.cfg.SetDRBDHelper(new_helper)
3988 feedback_fn("Cluster DRBD helper already in desired state,"
3990 if self.op.hvparams:
3991 self.cluster.hvparams = self.new_hvparams
3993 self.cluster.os_hvp = self.new_os_hvp
3994 if self.op.enabled_hypervisors is not None:
3995 self.cluster.hvparams = self.new_hvparams
3996 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3997 if self.op.beparams:
3998 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3999 if self.op.nicparams:
4000 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4002 self.cluster.ipolicy = self.new_ipolicy
4003 if self.op.osparams:
4004 self.cluster.osparams = self.new_osp
4005 if self.op.ndparams:
4006 self.cluster.ndparams = self.new_ndparams
4007 if self.op.diskparams:
4008 self.cluster.diskparams = self.new_diskparams
4009 if self.op.hv_state:
4010 self.cluster.hv_state_static = self.new_hv_state
4011 if self.op.disk_state:
4012 self.cluster.disk_state_static = self.new_disk_state
4014 if self.op.candidate_pool_size is not None:
4015 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4016 # we need to update the pool size here, otherwise the save will fail
4017 _AdjustCandidatePool(self, [])
4019 if self.op.maintain_node_health is not None:
4020 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4021 feedback_fn("Note: CONFD was disabled at build time, node health"
4022 " maintenance is not useful (still enabling it)")
4023 self.cluster.maintain_node_health = self.op.maintain_node_health
4025 if self.op.prealloc_wipe_disks is not None:
4026 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4028 if self.op.add_uids is not None:
4029 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4031 if self.op.remove_uids is not None:
4032 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4034 if self.op.uid_pool is not None:
4035 self.cluster.uid_pool = self.op.uid_pool
4037 if self.op.default_iallocator is not None:
4038 self.cluster.default_iallocator = self.op.default_iallocator
4040 if self.op.reserved_lvs is not None:
4041 self.cluster.reserved_lvs = self.op.reserved_lvs
4043 if self.op.use_external_mip_script is not None:
4044 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4046 def helper_os(aname, mods, desc):
4048 lst = getattr(self.cluster, aname)
4049 for key, val in mods:
4050 if key == constants.DDM_ADD:
4052 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4055 elif key == constants.DDM_REMOVE:
4059 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4061 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4063 if self.op.hidden_os:
4064 helper_os("hidden_os", self.op.hidden_os, "hidden")
4066 if self.op.blacklisted_os:
4067 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4069 if self.op.master_netdev:
4070 master_params = self.cfg.GetMasterNetworkParameters()
4071 ems = self.cfg.GetUseExternalMipScript()
4072 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4073 self.cluster.master_netdev)
4074 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4076 result.Raise("Could not disable the master ip")
4077 feedback_fn("Changing master_netdev from %s to %s" %
4078 (master_params.netdev, self.op.master_netdev))
4079 self.cluster.master_netdev = self.op.master_netdev
4081 if self.op.master_netmask:
4082 master_params = self.cfg.GetMasterNetworkParameters()
4083 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4084 result = self.rpc.call_node_change_master_netmask(master_params.name,
4085 master_params.netmask,
4086 self.op.master_netmask,
4088 master_params.netdev)
4090 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4093 self.cluster.master_netmask = self.op.master_netmask
4095 self.cfg.Update(self.cluster, feedback_fn)
4097 if self.op.master_netdev:
4098 master_params = self.cfg.GetMasterNetworkParameters()
4099 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4100 self.op.master_netdev)
4101 ems = self.cfg.GetUseExternalMipScript()
4102 result = self.rpc.call_node_activate_master_ip(master_params.name,
4105 self.LogWarning("Could not re-enable the master ip on"
4106 " the master, please restart manually: %s",
4110 def _UploadHelper(lu, nodes, fname):
4111 """Helper for uploading a file and showing warnings.
4114 if os.path.exists(fname):
4115 result = lu.rpc.call_upload_file(nodes, fname)
4116 for to_node, to_result in result.items():
4117 msg = to_result.fail_msg
4119 msg = ("Copy of file %s to node %s failed: %s" %
4120 (fname, to_node, msg))
4121 lu.proc.LogWarning(msg)
4124 def _ComputeAncillaryFiles(cluster, redist):
4125 """Compute files external to Ganeti which need to be consistent.
4127 @type redist: boolean
4128 @param redist: Whether to include files which need to be redistributed
4131 # Compute files for all nodes
4133 constants.SSH_KNOWN_HOSTS_FILE,
4134 constants.CONFD_HMAC_KEY,
4135 constants.CLUSTER_DOMAIN_SECRET_FILE,
4136 constants.SPICE_CERT_FILE,
4137 constants.SPICE_CACERT_FILE,
4138 constants.RAPI_USERS_FILE,
4142 files_all.update(constants.ALL_CERT_FILES)
4143 files_all.update(ssconf.SimpleStore().GetFileList())
4145 # we need to ship at least the RAPI certificate
4146 files_all.add(constants.RAPI_CERT_FILE)
4148 if cluster.modify_etc_hosts:
4149 files_all.add(constants.ETC_HOSTS)
4151 # Files which are optional, these must:
4152 # - be present in one other category as well
4153 # - either exist or not exist on all nodes of that category (mc, vm all)
4155 constants.RAPI_USERS_FILE,
4158 # Files which should only be on master candidates
4162 files_mc.add(constants.CLUSTER_CONF_FILE)
4164 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4166 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4168 # Files which should only be on VM-capable nodes
4169 files_vm = set(filename
4170 for hv_name in cluster.enabled_hypervisors
4171 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4173 files_opt |= set(filename
4174 for hv_name in cluster.enabled_hypervisors
4175 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4177 # Filenames in each category must be unique
4178 all_files_set = files_all | files_mc | files_vm
4179 assert (len(all_files_set) ==
4180 sum(map(len, [files_all, files_mc, files_vm]))), \
4181 "Found file listed in more than one file list"
4183 # Optional files must be present in one other category
4184 assert all_files_set.issuperset(files_opt), \
4185 "Optional file not in a different required list"
4187 return (files_all, files_opt, files_mc, files_vm)
4190 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4191 """Distribute additional files which are part of the cluster configuration.
4193 ConfigWriter takes care of distributing the config and ssconf files, but
4194 there are more files which should be distributed to all nodes. This function
4195 makes sure those are copied.
4197 @param lu: calling logical unit
4198 @param additional_nodes: list of nodes not in the config to distribute to
4199 @type additional_vm: boolean
4200 @param additional_vm: whether the additional nodes are vm-capable or not
4203 # Gather target nodes
4204 cluster = lu.cfg.GetClusterInfo()
4205 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4207 online_nodes = lu.cfg.GetOnlineNodeList()
4208 vm_nodes = lu.cfg.GetVmCapableNodeList()
4210 if additional_nodes is not None:
4211 online_nodes.extend(additional_nodes)
4213 vm_nodes.extend(additional_nodes)
4215 # Never distribute to master node
4216 for nodelist in [online_nodes, vm_nodes]:
4217 if master_info.name in nodelist:
4218 nodelist.remove(master_info.name)
4221 (files_all, _, files_mc, files_vm) = \
4222 _ComputeAncillaryFiles(cluster, True)
4224 # Never re-distribute configuration file from here
4225 assert not (constants.CLUSTER_CONF_FILE in files_all or
4226 constants.CLUSTER_CONF_FILE in files_vm)
4227 assert not files_mc, "Master candidates not handled in this function"
4230 (online_nodes, files_all),
4231 (vm_nodes, files_vm),
4235 for (node_list, files) in filemap:
4237 _UploadHelper(lu, node_list, fname)
4240 class LUClusterRedistConf(NoHooksLU):
4241 """Force the redistribution of cluster configuration.
4243 This is a very simple LU.
4248 def ExpandNames(self):
4249 self.needed_locks = {
4250 locking.LEVEL_NODE: locking.ALL_SET,
4252 self.share_locks[locking.LEVEL_NODE] = 1
4254 def Exec(self, feedback_fn):
4255 """Redistribute the configuration.
4258 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4259 _RedistributeAncillaryFiles(self)
4262 class LUClusterActivateMasterIp(NoHooksLU):
4263 """Activate the master IP on the master node.
4266 def Exec(self, feedback_fn):
4267 """Activate the master IP.
4270 master_params = self.cfg.GetMasterNetworkParameters()
4271 ems = self.cfg.GetUseExternalMipScript()
4272 result = self.rpc.call_node_activate_master_ip(master_params.name,
4274 result.Raise("Could not activate the master IP")
4277 class LUClusterDeactivateMasterIp(NoHooksLU):
4278 """Deactivate the master IP on the master node.
4281 def Exec(self, feedback_fn):
4282 """Deactivate the master IP.
4285 master_params = self.cfg.GetMasterNetworkParameters()
4286 ems = self.cfg.GetUseExternalMipScript()
4287 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4289 result.Raise("Could not deactivate the master IP")
4292 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4293 """Sleep and poll for an instance's disk to sync.
4296 if not instance.disks or disks is not None and not disks:
4299 disks = _ExpandCheckDisks(instance, disks)
4302 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4304 node = instance.primary_node
4307 lu.cfg.SetDiskID(dev, node)
4309 # TODO: Convert to utils.Retry
4312 degr_retries = 10 # in seconds, as we sleep 1 second each time
4316 cumul_degraded = False
4317 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4318 msg = rstats.fail_msg
4320 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4323 raise errors.RemoteError("Can't contact node %s for mirror data,"
4324 " aborting." % node)
4327 rstats = rstats.payload
4329 for i, mstat in enumerate(rstats):
4331 lu.LogWarning("Can't compute data for node %s/%s",
4332 node, disks[i].iv_name)
4335 cumul_degraded = (cumul_degraded or
4336 (mstat.is_degraded and mstat.sync_percent is None))
4337 if mstat.sync_percent is not None:
4339 if mstat.estimated_time is not None:
4340 rem_time = ("%s remaining (estimated)" %
4341 utils.FormatSeconds(mstat.estimated_time))
4342 max_time = mstat.estimated_time
4344 rem_time = "no time estimate"
4345 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4346 (disks[i].iv_name, mstat.sync_percent, rem_time))
4348 # if we're done but degraded, let's do a few small retries, to
4349 # make sure we see a stable and not transient situation; therefore
4350 # we force restart of the loop
4351 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4352 logging.info("Degraded disks found, %d retries left", degr_retries)
4360 time.sleep(min(60, max_time))
4363 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4364 return not cumul_degraded
4367 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4368 """Check that mirrors are not degraded.
4370 The ldisk parameter, if True, will change the test from the
4371 is_degraded attribute (which represents overall non-ok status for
4372 the device(s)) to the ldisk (representing the local storage status).
4375 lu.cfg.SetDiskID(dev, node)
4379 if on_primary or dev.AssembleOnSecondary():
4380 rstats = lu.rpc.call_blockdev_find(node, dev)
4381 msg = rstats.fail_msg
4383 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4385 elif not rstats.payload:
4386 lu.LogWarning("Can't find disk on node %s", node)
4390 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4392 result = result and not rstats.payload.is_degraded
4395 for child in dev.children:
4396 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4401 class LUOobCommand(NoHooksLU):
4402 """Logical unit for OOB handling.
4406 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4408 def ExpandNames(self):
4409 """Gather locks we need.
4412 if self.op.node_names:
4413 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4414 lock_names = self.op.node_names
4416 lock_names = locking.ALL_SET
4418 self.needed_locks = {
4419 locking.LEVEL_NODE: lock_names,
4422 def CheckPrereq(self):
4423 """Check prerequisites.
4426 - the node exists in the configuration
4429 Any errors are signaled by raising errors.OpPrereqError.
4433 self.master_node = self.cfg.GetMasterNode()
4435 assert self.op.power_delay >= 0.0
4437 if self.op.node_names:
4438 if (self.op.command in self._SKIP_MASTER and
4439 self.master_node in self.op.node_names):
4440 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4441 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4443 if master_oob_handler:
4444 additional_text = ("run '%s %s %s' if you want to operate on the"
4445 " master regardless") % (master_oob_handler,
4449 additional_text = "it does not support out-of-band operations"
4451 raise errors.OpPrereqError(("Operating on the master node %s is not"
4452 " allowed for %s; %s") %
4453 (self.master_node, self.op.command,
4454 additional_text), errors.ECODE_INVAL)
4456 self.op.node_names = self.cfg.GetNodeList()
4457 if self.op.command in self._SKIP_MASTER:
4458 self.op.node_names.remove(self.master_node)
4460 if self.op.command in self._SKIP_MASTER:
4461 assert self.master_node not in self.op.node_names
4463 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4465 raise errors.OpPrereqError("Node %s not found" % node_name,
4468 self.nodes.append(node)
4470 if (not self.op.ignore_status and
4471 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4472 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4473 " not marked offline") % node_name,
4476 def Exec(self, feedback_fn):
4477 """Execute OOB and return result if we expect any.
4480 master_node = self.master_node
4483 for idx, node in enumerate(utils.NiceSort(self.nodes,
4484 key=lambda node: node.name)):
4485 node_entry = [(constants.RS_NORMAL, node.name)]
4486 ret.append(node_entry)
4488 oob_program = _SupportsOob(self.cfg, node)
4491 node_entry.append((constants.RS_UNAVAIL, None))
4494 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4495 self.op.command, oob_program, node.name)
4496 result = self.rpc.call_run_oob(master_node, oob_program,
4497 self.op.command, node.name,
4501 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4502 node.name, result.fail_msg)
4503 node_entry.append((constants.RS_NODATA, None))
4506 self._CheckPayload(result)
4507 except errors.OpExecError, err:
4508 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4510 node_entry.append((constants.RS_NODATA, None))
4512 if self.op.command == constants.OOB_HEALTH:
4513 # For health we should log important events
4514 for item, status in result.payload:
4515 if status in [constants.OOB_STATUS_WARNING,
4516 constants.OOB_STATUS_CRITICAL]:
4517 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4518 item, node.name, status)
4520 if self.op.command == constants.OOB_POWER_ON:
4522 elif self.op.command == constants.OOB_POWER_OFF:
4523 node.powered = False
4524 elif self.op.command == constants.OOB_POWER_STATUS:
4525 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4526 if powered != node.powered:
4527 logging.warning(("Recorded power state (%s) of node '%s' does not"
4528 " match actual power state (%s)"), node.powered,
4531 # For configuration changing commands we should update the node
4532 if self.op.command in (constants.OOB_POWER_ON,
4533 constants.OOB_POWER_OFF):
4534 self.cfg.Update(node, feedback_fn)
4536 node_entry.append((constants.RS_NORMAL, result.payload))
4538 if (self.op.command == constants.OOB_POWER_ON and
4539 idx < len(self.nodes) - 1):
4540 time.sleep(self.op.power_delay)
4544 def _CheckPayload(self, result):
4545 """Checks if the payload is valid.
4547 @param result: RPC result
4548 @raises errors.OpExecError: If payload is not valid
4552 if self.op.command == constants.OOB_HEALTH:
4553 if not isinstance(result.payload, list):
4554 errs.append("command 'health' is expected to return a list but got %s" %
4555 type(result.payload))
4557 for item, status in result.payload:
4558 if status not in constants.OOB_STATUSES:
4559 errs.append("health item '%s' has invalid status '%s'" %
4562 if self.op.command == constants.OOB_POWER_STATUS:
4563 if not isinstance(result.payload, dict):
4564 errs.append("power-status is expected to return a dict but got %s" %
4565 type(result.payload))
4567 if self.op.command in [
4568 constants.OOB_POWER_ON,
4569 constants.OOB_POWER_OFF,
4570 constants.OOB_POWER_CYCLE,
4572 if result.payload is not None:
4573 errs.append("%s is expected to not return payload but got '%s'" %
4574 (self.op.command, result.payload))
4577 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4578 utils.CommaJoin(errs))
4581 class _OsQuery(_QueryBase):
4582 FIELDS = query.OS_FIELDS
4584 def ExpandNames(self, lu):
4585 # Lock all nodes in shared mode
4586 # Temporary removal of locks, should be reverted later
4587 # TODO: reintroduce locks when they are lighter-weight
4588 lu.needed_locks = {}
4589 #self.share_locks[locking.LEVEL_NODE] = 1
4590 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4592 # The following variables interact with _QueryBase._GetNames
4594 self.wanted = self.names
4596 self.wanted = locking.ALL_SET
4598 self.do_locking = self.use_locking
4600 def DeclareLocks(self, lu, level):
4604 def _DiagnoseByOS(rlist):
4605 """Remaps a per-node return list into an a per-os per-node dictionary
4607 @param rlist: a map with node names as keys and OS objects as values
4610 @return: a dictionary with osnames as keys and as value another
4611 map, with nodes as keys and tuples of (path, status, diagnose,
4612 variants, parameters, api_versions) as values, eg::
4614 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4615 (/srv/..., False, "invalid api")],
4616 "node2": [(/srv/..., True, "", [], [])]}
4621 # we build here the list of nodes that didn't fail the RPC (at RPC
4622 # level), so that nodes with a non-responding node daemon don't
4623 # make all OSes invalid
4624 good_nodes = [node_name for node_name in rlist
4625 if not rlist[node_name].fail_msg]
4626 for node_name, nr in rlist.items():
4627 if nr.fail_msg or not nr.payload:
4629 for (name, path, status, diagnose, variants,
4630 params, api_versions) in nr.payload:
4631 if name not in all_os:
4632 # build a list of nodes for this os containing empty lists
4633 # for each node in node_list
4635 for nname in good_nodes:
4636 all_os[name][nname] = []
4637 # convert params from [name, help] to (name, help)
4638 params = [tuple(v) for v in params]
4639 all_os[name][node_name].append((path, status, diagnose,
4640 variants, params, api_versions))
4643 def _GetQueryData(self, lu):
4644 """Computes the list of nodes and their attributes.
4647 # Locking is not used
4648 assert not (compat.any(lu.glm.is_owned(level)
4649 for level in locking.LEVELS
4650 if level != locking.LEVEL_CLUSTER) or
4651 self.do_locking or self.use_locking)
4653 valid_nodes = [node.name
4654 for node in lu.cfg.GetAllNodesInfo().values()
4655 if not node.offline and node.vm_capable]
4656 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4657 cluster = lu.cfg.GetClusterInfo()
4661 for (os_name, os_data) in pol.items():
4662 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4663 hidden=(os_name in cluster.hidden_os),
4664 blacklisted=(os_name in cluster.blacklisted_os))
4668 api_versions = set()
4670 for idx, osl in enumerate(os_data.values()):
4671 info.valid = bool(info.valid and osl and osl[0][1])
4675 (node_variants, node_params, node_api) = osl[0][3:6]
4678 variants.update(node_variants)
4679 parameters.update(node_params)
4680 api_versions.update(node_api)
4682 # Filter out inconsistent values
4683 variants.intersection_update(node_variants)
4684 parameters.intersection_update(node_params)
4685 api_versions.intersection_update(node_api)
4687 info.variants = list(variants)
4688 info.parameters = list(parameters)
4689 info.api_versions = list(api_versions)
4691 data[os_name] = info
4693 # Prepare data in requested order
4694 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4698 class LUOsDiagnose(NoHooksLU):
4699 """Logical unit for OS diagnose/query.
4705 def _BuildFilter(fields, names):
4706 """Builds a filter for querying OSes.
4709 name_filter = qlang.MakeSimpleFilter("name", names)
4711 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4712 # respective field is not requested
4713 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4714 for fname in ["hidden", "blacklisted"]
4715 if fname not in fields]
4716 if "valid" not in fields:
4717 status_filter.append([qlang.OP_TRUE, "valid"])
4720 status_filter.insert(0, qlang.OP_AND)
4722 status_filter = None
4724 if name_filter and status_filter:
4725 return [qlang.OP_AND, name_filter, status_filter]
4729 return status_filter
4731 def CheckArguments(self):
4732 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4733 self.op.output_fields, False)
4735 def ExpandNames(self):
4736 self.oq.ExpandNames(self)
4738 def Exec(self, feedback_fn):
4739 return self.oq.OldStyleQuery(self)
4742 class LUNodeRemove(LogicalUnit):
4743 """Logical unit for removing a node.
4746 HPATH = "node-remove"
4747 HTYPE = constants.HTYPE_NODE
4749 def BuildHooksEnv(self):
4752 This doesn't run on the target node in the pre phase as a failed
4753 node would then be impossible to remove.
4757 "OP_TARGET": self.op.node_name,
4758 "NODE_NAME": self.op.node_name,
4761 def BuildHooksNodes(self):
4762 """Build hooks nodes.
4765 all_nodes = self.cfg.GetNodeList()
4767 all_nodes.remove(self.op.node_name)
4769 logging.warning("Node '%s', which is about to be removed, was not found"
4770 " in the list of all nodes", self.op.node_name)
4771 return (all_nodes, all_nodes)
4773 def CheckPrereq(self):
4774 """Check prerequisites.
4777 - the node exists in the configuration
4778 - it does not have primary or secondary instances
4779 - it's not the master
4781 Any errors are signaled by raising errors.OpPrereqError.
4784 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4785 node = self.cfg.GetNodeInfo(self.op.node_name)
4786 assert node is not None
4788 masternode = self.cfg.GetMasterNode()
4789 if node.name == masternode:
4790 raise errors.OpPrereqError("Node is the master node, failover to another"
4791 " node is required", errors.ECODE_INVAL)
4793 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4794 if node.name in instance.all_nodes:
4795 raise errors.OpPrereqError("Instance %s is still running on the node,"
4796 " please remove first" % instance_name,
4798 self.op.node_name = node.name
4801 def Exec(self, feedback_fn):
4802 """Removes the node from the cluster.
4806 logging.info("Stopping the node daemon and removing configs from node %s",
4809 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4811 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4814 # Promote nodes to master candidate as needed
4815 _AdjustCandidatePool(self, exceptions=[node.name])
4816 self.context.RemoveNode(node.name)
4818 # Run post hooks on the node before it's removed
4819 _RunPostHook(self, node.name)
4821 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4822 msg = result.fail_msg
4824 self.LogWarning("Errors encountered on the remote node while leaving"
4825 " the cluster: %s", msg)
4827 # Remove node from our /etc/hosts
4828 if self.cfg.GetClusterInfo().modify_etc_hosts:
4829 master_node = self.cfg.GetMasterNode()
4830 result = self.rpc.call_etc_hosts_modify(master_node,
4831 constants.ETC_HOSTS_REMOVE,
4833 result.Raise("Can't update hosts file with new host data")
4834 _RedistributeAncillaryFiles(self)
4837 class _NodeQuery(_QueryBase):
4838 FIELDS = query.NODE_FIELDS
4840 def ExpandNames(self, lu):
4841 lu.needed_locks = {}
4842 lu.share_locks = _ShareAll()
4845 self.wanted = _GetWantedNodes(lu, self.names)
4847 self.wanted = locking.ALL_SET
4849 self.do_locking = (self.use_locking and
4850 query.NQ_LIVE in self.requested_data)
4853 # If any non-static field is requested we need to lock the nodes
4854 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4856 def DeclareLocks(self, lu, level):
4859 def _GetQueryData(self, lu):
4860 """Computes the list of nodes and their attributes.
4863 all_info = lu.cfg.GetAllNodesInfo()
4865 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4867 # Gather data as requested
4868 if query.NQ_LIVE in self.requested_data:
4869 # filter out non-vm_capable nodes
4870 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4872 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4873 [lu.cfg.GetHypervisorType()])
4874 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4875 for (name, nresult) in node_data.items()
4876 if not nresult.fail_msg and nresult.payload)
4880 if query.NQ_INST in self.requested_data:
4881 node_to_primary = dict([(name, set()) for name in nodenames])
4882 node_to_secondary = dict([(name, set()) for name in nodenames])
4884 inst_data = lu.cfg.GetAllInstancesInfo()
4886 for inst in inst_data.values():
4887 if inst.primary_node in node_to_primary:
4888 node_to_primary[inst.primary_node].add(inst.name)
4889 for secnode in inst.secondary_nodes:
4890 if secnode in node_to_secondary:
4891 node_to_secondary[secnode].add(inst.name)
4893 node_to_primary = None
4894 node_to_secondary = None
4896 if query.NQ_OOB in self.requested_data:
4897 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4898 for name, node in all_info.iteritems())
4902 if query.NQ_GROUP in self.requested_data:
4903 groups = lu.cfg.GetAllNodeGroupsInfo()
4907 return query.NodeQueryData([all_info[name] for name in nodenames],
4908 live_data, lu.cfg.GetMasterNode(),
4909 node_to_primary, node_to_secondary, groups,
4910 oob_support, lu.cfg.GetClusterInfo())
4913 class LUNodeQuery(NoHooksLU):
4914 """Logical unit for querying nodes.
4917 # pylint: disable=W0142
4920 def CheckArguments(self):
4921 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4922 self.op.output_fields, self.op.use_locking)
4924 def ExpandNames(self):
4925 self.nq.ExpandNames(self)
4927 def DeclareLocks(self, level):
4928 self.nq.DeclareLocks(self, level)
4930 def Exec(self, feedback_fn):
4931 return self.nq.OldStyleQuery(self)
4934 class LUNodeQueryvols(NoHooksLU):
4935 """Logical unit for getting volumes on node(s).
4939 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4940 _FIELDS_STATIC = utils.FieldSet("node")
4942 def CheckArguments(self):
4943 _CheckOutputFields(static=self._FIELDS_STATIC,
4944 dynamic=self._FIELDS_DYNAMIC,
4945 selected=self.op.output_fields)
4947 def ExpandNames(self):
4948 self.share_locks = _ShareAll()
4949 self.needed_locks = {}
4951 if not self.op.nodes:
4952 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4954 self.needed_locks[locking.LEVEL_NODE] = \
4955 _GetWantedNodes(self, self.op.nodes)
4957 def Exec(self, feedback_fn):
4958 """Computes the list of nodes and their attributes.
4961 nodenames = self.owned_locks(locking.LEVEL_NODE)
4962 volumes = self.rpc.call_node_volumes(nodenames)
4964 ilist = self.cfg.GetAllInstancesInfo()
4965 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4968 for node in nodenames:
4969 nresult = volumes[node]
4972 msg = nresult.fail_msg
4974 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4977 node_vols = sorted(nresult.payload,
4978 key=operator.itemgetter("dev"))
4980 for vol in node_vols:
4982 for field in self.op.output_fields:
4985 elif field == "phys":
4989 elif field == "name":
4991 elif field == "size":
4992 val = int(float(vol["size"]))
4993 elif field == "instance":
4994 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4996 raise errors.ParameterError(field)
4997 node_output.append(str(val))
4999 output.append(node_output)
5004 class LUNodeQueryStorage(NoHooksLU):
5005 """Logical unit for getting information on storage units on node(s).
5008 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5011 def CheckArguments(self):
5012 _CheckOutputFields(static=self._FIELDS_STATIC,
5013 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5014 selected=self.op.output_fields)
5016 def ExpandNames(self):
5017 self.share_locks = _ShareAll()
5018 self.needed_locks = {}
5021 self.needed_locks[locking.LEVEL_NODE] = \
5022 _GetWantedNodes(self, self.op.nodes)
5024 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5026 def Exec(self, feedback_fn):
5027 """Computes the list of nodes and their attributes.
5030 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5032 # Always get name to sort by
5033 if constants.SF_NAME in self.op.output_fields:
5034 fields = self.op.output_fields[:]
5036 fields = [constants.SF_NAME] + self.op.output_fields
5038 # Never ask for node or type as it's only known to the LU
5039 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5040 while extra in fields:
5041 fields.remove(extra)
5043 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5044 name_idx = field_idx[constants.SF_NAME]
5046 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5047 data = self.rpc.call_storage_list(self.nodes,
5048 self.op.storage_type, st_args,
5049 self.op.name, fields)
5053 for node in utils.NiceSort(self.nodes):
5054 nresult = data[node]
5058 msg = nresult.fail_msg
5060 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5063 rows = dict([(row[name_idx], row) for row in nresult.payload])
5065 for name in utils.NiceSort(rows.keys()):
5070 for field in self.op.output_fields:
5071 if field == constants.SF_NODE:
5073 elif field == constants.SF_TYPE:
5074 val = self.op.storage_type
5075 elif field in field_idx:
5076 val = row[field_idx[field]]
5078 raise errors.ParameterError(field)
5087 class _InstanceQuery(_QueryBase):
5088 FIELDS = query.INSTANCE_FIELDS
5090 def ExpandNames(self, lu):
5091 lu.needed_locks = {}
5092 lu.share_locks = _ShareAll()
5095 self.wanted = _GetWantedInstances(lu, self.names)
5097 self.wanted = locking.ALL_SET
5099 self.do_locking = (self.use_locking and
5100 query.IQ_LIVE in self.requested_data)
5102 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5103 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5104 lu.needed_locks[locking.LEVEL_NODE] = []
5105 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5107 self.do_grouplocks = (self.do_locking and
5108 query.IQ_NODES in self.requested_data)
5110 def DeclareLocks(self, lu, level):
5112 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5113 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5115 # Lock all groups used by instances optimistically; this requires going
5116 # via the node before it's locked, requiring verification later on
5117 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5119 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5120 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5121 elif level == locking.LEVEL_NODE:
5122 lu._LockInstancesNodes() # pylint: disable=W0212
5125 def _CheckGroupLocks(lu):
5126 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5127 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5129 # Check if node groups for locked instances are still correct
5130 for instance_name in owned_instances:
5131 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5133 def _GetQueryData(self, lu):
5134 """Computes the list of instances and their attributes.
5137 if self.do_grouplocks:
5138 self._CheckGroupLocks(lu)
5140 cluster = lu.cfg.GetClusterInfo()
5141 all_info = lu.cfg.GetAllInstancesInfo()
5143 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5145 instance_list = [all_info[name] for name in instance_names]
5146 nodes = frozenset(itertools.chain(*(inst.all_nodes
5147 for inst in instance_list)))
5148 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5151 wrongnode_inst = set()
5153 # Gather data as requested
5154 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5156 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5158 result = node_data[name]
5160 # offline nodes will be in both lists
5161 assert result.fail_msg
5162 offline_nodes.append(name)
5164 bad_nodes.append(name)
5165 elif result.payload:
5166 for inst in result.payload:
5167 if inst in all_info:
5168 if all_info[inst].primary_node == name:
5169 live_data.update(result.payload)
5171 wrongnode_inst.add(inst)
5173 # orphan instance; we don't list it here as we don't
5174 # handle this case yet in the output of instance listing
5175 logging.warning("Orphan instance '%s' found on node %s",
5177 # else no instance is alive
5181 if query.IQ_DISKUSAGE in self.requested_data:
5182 disk_usage = dict((inst.name,
5183 _ComputeDiskSize(inst.disk_template,
5184 [{constants.IDISK_SIZE: disk.size}
5185 for disk in inst.disks]))
5186 for inst in instance_list)
5190 if query.IQ_CONSOLE in self.requested_data:
5192 for inst in instance_list:
5193 if inst.name in live_data:
5194 # Instance is running
5195 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5197 consinfo[inst.name] = None
5198 assert set(consinfo.keys()) == set(instance_names)
5202 if query.IQ_NODES in self.requested_data:
5203 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5205 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5206 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5207 for uuid in set(map(operator.attrgetter("group"),
5213 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5214 disk_usage, offline_nodes, bad_nodes,
5215 live_data, wrongnode_inst, consinfo,
5219 class LUQuery(NoHooksLU):
5220 """Query for resources/items of a certain kind.
5223 # pylint: disable=W0142
5226 def CheckArguments(self):
5227 qcls = _GetQueryImplementation(self.op.what)
5229 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5231 def ExpandNames(self):
5232 self.impl.ExpandNames(self)
5234 def DeclareLocks(self, level):
5235 self.impl.DeclareLocks(self, level)
5237 def Exec(self, feedback_fn):
5238 return self.impl.NewStyleQuery(self)
5241 class LUQueryFields(NoHooksLU):
5242 """Query for resources/items of a certain kind.
5245 # pylint: disable=W0142
5248 def CheckArguments(self):
5249 self.qcls = _GetQueryImplementation(self.op.what)
5251 def ExpandNames(self):
5252 self.needed_locks = {}
5254 def Exec(self, feedback_fn):
5255 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5258 class LUNodeModifyStorage(NoHooksLU):
5259 """Logical unit for modifying a storage volume on a node.
5264 def CheckArguments(self):
5265 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5267 storage_type = self.op.storage_type
5270 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5272 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5273 " modified" % storage_type,
5276 diff = set(self.op.changes.keys()) - modifiable
5278 raise errors.OpPrereqError("The following fields can not be modified for"
5279 " storage units of type '%s': %r" %
5280 (storage_type, list(diff)),
5283 def ExpandNames(self):
5284 self.needed_locks = {
5285 locking.LEVEL_NODE: self.op.node_name,
5288 def Exec(self, feedback_fn):
5289 """Computes the list of nodes and their attributes.
5292 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5293 result = self.rpc.call_storage_modify(self.op.node_name,
5294 self.op.storage_type, st_args,
5295 self.op.name, self.op.changes)
5296 result.Raise("Failed to modify storage unit '%s' on %s" %
5297 (self.op.name, self.op.node_name))
5300 class LUNodeAdd(LogicalUnit):
5301 """Logical unit for adding node to the cluster.
5305 HTYPE = constants.HTYPE_NODE
5306 _NFLAGS = ["master_capable", "vm_capable"]
5308 def CheckArguments(self):
5309 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5310 # validate/normalize the node name
5311 self.hostname = netutils.GetHostname(name=self.op.node_name,
5312 family=self.primary_ip_family)
5313 self.op.node_name = self.hostname.name
5315 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5316 raise errors.OpPrereqError("Cannot readd the master node",
5319 if self.op.readd and self.op.group:
5320 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5321 " being readded", errors.ECODE_INVAL)
5323 def BuildHooksEnv(self):
5326 This will run on all nodes before, and on all nodes + the new node after.
5330 "OP_TARGET": self.op.node_name,
5331 "NODE_NAME": self.op.node_name,
5332 "NODE_PIP": self.op.primary_ip,
5333 "NODE_SIP": self.op.secondary_ip,
5334 "MASTER_CAPABLE": str(self.op.master_capable),
5335 "VM_CAPABLE": str(self.op.vm_capable),
5338 def BuildHooksNodes(self):
5339 """Build hooks nodes.
5342 # Exclude added node
5343 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5344 post_nodes = pre_nodes + [self.op.node_name, ]
5346 return (pre_nodes, post_nodes)
5348 def CheckPrereq(self):
5349 """Check prerequisites.
5352 - the new node is not already in the config
5354 - its parameters (single/dual homed) matches the cluster
5356 Any errors are signaled by raising errors.OpPrereqError.
5360 hostname = self.hostname
5361 node = hostname.name
5362 primary_ip = self.op.primary_ip = hostname.ip
5363 if self.op.secondary_ip is None:
5364 if self.primary_ip_family == netutils.IP6Address.family:
5365 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5366 " IPv4 address must be given as secondary",
5368 self.op.secondary_ip = primary_ip
5370 secondary_ip = self.op.secondary_ip
5371 if not netutils.IP4Address.IsValid(secondary_ip):
5372 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5373 " address" % secondary_ip, errors.ECODE_INVAL)
5375 node_list = cfg.GetNodeList()
5376 if not self.op.readd and node in node_list:
5377 raise errors.OpPrereqError("Node %s is already in the configuration" %
5378 node, errors.ECODE_EXISTS)
5379 elif self.op.readd and node not in node_list:
5380 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5383 self.changed_primary_ip = False
5385 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5386 if self.op.readd and node == existing_node_name:
5387 if existing_node.secondary_ip != secondary_ip:
5388 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5389 " address configuration as before",
5391 if existing_node.primary_ip != primary_ip:
5392 self.changed_primary_ip = True
5396 if (existing_node.primary_ip == primary_ip or
5397 existing_node.secondary_ip == primary_ip or
5398 existing_node.primary_ip == secondary_ip or
5399 existing_node.secondary_ip == secondary_ip):
5400 raise errors.OpPrereqError("New node ip address(es) conflict with"
5401 " existing node %s" % existing_node.name,
5402 errors.ECODE_NOTUNIQUE)
5404 # After this 'if' block, None is no longer a valid value for the
5405 # _capable op attributes
5407 old_node = self.cfg.GetNodeInfo(node)
5408 assert old_node is not None, "Can't retrieve locked node %s" % node
5409 for attr in self._NFLAGS:
5410 if getattr(self.op, attr) is None:
5411 setattr(self.op, attr, getattr(old_node, attr))
5413 for attr in self._NFLAGS:
5414 if getattr(self.op, attr) is None:
5415 setattr(self.op, attr, True)
5417 if self.op.readd and not self.op.vm_capable:
5418 pri, sec = cfg.GetNodeInstances(node)
5420 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5421 " flag set to false, but it already holds"
5422 " instances" % node,
5425 # check that the type of the node (single versus dual homed) is the
5426 # same as for the master
5427 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5428 master_singlehomed = myself.secondary_ip == myself.primary_ip
5429 newbie_singlehomed = secondary_ip == primary_ip
5430 if master_singlehomed != newbie_singlehomed:
5431 if master_singlehomed:
5432 raise errors.OpPrereqError("The master has no secondary ip but the"
5433 " new node has one",
5436 raise errors.OpPrereqError("The master has a secondary ip but the"
5437 " new node doesn't have one",
5440 # checks reachability
5441 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5442 raise errors.OpPrereqError("Node not reachable by ping",
5443 errors.ECODE_ENVIRON)
5445 if not newbie_singlehomed:
5446 # check reachability from my secondary ip to newbie's secondary ip
5447 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5448 source=myself.secondary_ip):
5449 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5450 " based ping to node daemon port",
5451 errors.ECODE_ENVIRON)
5458 if self.op.master_capable:
5459 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5461 self.master_candidate = False
5464 self.new_node = old_node
5466 node_group = cfg.LookupNodeGroup(self.op.group)
5467 self.new_node = objects.Node(name=node,
5468 primary_ip=primary_ip,
5469 secondary_ip=secondary_ip,
5470 master_candidate=self.master_candidate,
5471 offline=False, drained=False,
5474 if self.op.ndparams:
5475 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5477 if self.op.hv_state:
5478 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5480 if self.op.disk_state:
5481 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5483 def Exec(self, feedback_fn):
5484 """Adds the new node to the cluster.
5487 new_node = self.new_node
5488 node = new_node.name
5490 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5493 # We adding a new node so we assume it's powered
5494 new_node.powered = True
5496 # for re-adds, reset the offline/drained/master-candidate flags;
5497 # we need to reset here, otherwise offline would prevent RPC calls
5498 # later in the procedure; this also means that if the re-add
5499 # fails, we are left with a non-offlined, broken node
5501 new_node.drained = new_node.offline = False # pylint: disable=W0201
5502 self.LogInfo("Readding a node, the offline/drained flags were reset")
5503 # if we demote the node, we do cleanup later in the procedure
5504 new_node.master_candidate = self.master_candidate
5505 if self.changed_primary_ip:
5506 new_node.primary_ip = self.op.primary_ip
5508 # copy the master/vm_capable flags
5509 for attr in self._NFLAGS:
5510 setattr(new_node, attr, getattr(self.op, attr))
5512 # notify the user about any possible mc promotion
5513 if new_node.master_candidate:
5514 self.LogInfo("Node will be a master candidate")
5516 if self.op.ndparams:
5517 new_node.ndparams = self.op.ndparams
5519 new_node.ndparams = {}
5521 if self.op.hv_state:
5522 new_node.hv_state_static = self.new_hv_state
5524 if self.op.disk_state:
5525 new_node.disk_state_static = self.new_disk_state
5527 # check connectivity
5528 result = self.rpc.call_version([node])[node]
5529 result.Raise("Can't get version information from node %s" % node)
5530 if constants.PROTOCOL_VERSION == result.payload:
5531 logging.info("Communication to node %s fine, sw version %s match",
5532 node, result.payload)
5534 raise errors.OpExecError("Version mismatch master version %s,"
5535 " node version %s" %
5536 (constants.PROTOCOL_VERSION, result.payload))
5538 # Add node to our /etc/hosts, and add key to known_hosts
5539 if self.cfg.GetClusterInfo().modify_etc_hosts:
5540 master_node = self.cfg.GetMasterNode()
5541 result = self.rpc.call_etc_hosts_modify(master_node,
5542 constants.ETC_HOSTS_ADD,
5545 result.Raise("Can't update hosts file with new host data")
5547 if new_node.secondary_ip != new_node.primary_ip:
5548 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5551 node_verify_list = [self.cfg.GetMasterNode()]
5552 node_verify_param = {
5553 constants.NV_NODELIST: ([node], {}),
5554 # TODO: do a node-net-test as well?
5557 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5558 self.cfg.GetClusterName())
5559 for verifier in node_verify_list:
5560 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5561 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5563 for failed in nl_payload:
5564 feedback_fn("ssh/hostname verification failed"
5565 " (checking from %s): %s" %
5566 (verifier, nl_payload[failed]))
5567 raise errors.OpExecError("ssh/hostname verification failed")
5570 _RedistributeAncillaryFiles(self)
5571 self.context.ReaddNode(new_node)
5572 # make sure we redistribute the config
5573 self.cfg.Update(new_node, feedback_fn)
5574 # and make sure the new node will not have old files around
5575 if not new_node.master_candidate:
5576 result = self.rpc.call_node_demote_from_mc(new_node.name)
5577 msg = result.fail_msg
5579 self.LogWarning("Node failed to demote itself from master"
5580 " candidate status: %s" % msg)
5582 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5583 additional_vm=self.op.vm_capable)
5584 self.context.AddNode(new_node, self.proc.GetECId())
5587 class LUNodeSetParams(LogicalUnit):
5588 """Modifies the parameters of a node.
5590 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5591 to the node role (as _ROLE_*)
5592 @cvar _R2F: a dictionary from node role to tuples of flags
5593 @cvar _FLAGS: a list of attribute names corresponding to the flags
5596 HPATH = "node-modify"
5597 HTYPE = constants.HTYPE_NODE
5599 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5601 (True, False, False): _ROLE_CANDIDATE,
5602 (False, True, False): _ROLE_DRAINED,
5603 (False, False, True): _ROLE_OFFLINE,
5604 (False, False, False): _ROLE_REGULAR,
5606 _R2F = dict((v, k) for k, v in _F2R.items())
5607 _FLAGS = ["master_candidate", "drained", "offline"]
5609 def CheckArguments(self):
5610 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5611 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5612 self.op.master_capable, self.op.vm_capable,
5613 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5615 if all_mods.count(None) == len(all_mods):
5616 raise errors.OpPrereqError("Please pass at least one modification",
5618 if all_mods.count(True) > 1:
5619 raise errors.OpPrereqError("Can't set the node into more than one"
5620 " state at the same time",
5623 # Boolean value that tells us whether we might be demoting from MC
5624 self.might_demote = (self.op.master_candidate == False or
5625 self.op.offline == True or
5626 self.op.drained == True or
5627 self.op.master_capable == False)
5629 if self.op.secondary_ip:
5630 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5631 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5632 " address" % self.op.secondary_ip,
5635 self.lock_all = self.op.auto_promote and self.might_demote
5636 self.lock_instances = self.op.secondary_ip is not None
5638 def _InstanceFilter(self, instance):
5639 """Filter for getting affected instances.
5642 return (instance.disk_template in constants.DTS_INT_MIRROR and
5643 self.op.node_name in instance.all_nodes)
5645 def ExpandNames(self):
5647 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5649 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5651 # Since modifying a node can have severe effects on currently running
5652 # operations the resource lock is at least acquired in shared mode
5653 self.needed_locks[locking.LEVEL_NODE_RES] = \
5654 self.needed_locks[locking.LEVEL_NODE]
5656 # Get node resource and instance locks in shared mode; they are not used
5657 # for anything but read-only access
5658 self.share_locks[locking.LEVEL_NODE_RES] = 1
5659 self.share_locks[locking.LEVEL_INSTANCE] = 1
5661 if self.lock_instances:
5662 self.needed_locks[locking.LEVEL_INSTANCE] = \
5663 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5665 def BuildHooksEnv(self):
5668 This runs on the master node.
5672 "OP_TARGET": self.op.node_name,
5673 "MASTER_CANDIDATE": str(self.op.master_candidate),
5674 "OFFLINE": str(self.op.offline),
5675 "DRAINED": str(self.op.drained),
5676 "MASTER_CAPABLE": str(self.op.master_capable),
5677 "VM_CAPABLE": str(self.op.vm_capable),
5680 def BuildHooksNodes(self):
5681 """Build hooks nodes.
5684 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5687 def CheckPrereq(self):
5688 """Check prerequisites.
5690 This only checks the instance list against the existing names.
5693 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5695 if self.lock_instances:
5696 affected_instances = \
5697 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5699 # Verify instance locks
5700 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5701 wanted_instances = frozenset(affected_instances.keys())
5702 if wanted_instances - owned_instances:
5703 raise errors.OpPrereqError("Instances affected by changing node %s's"
5704 " secondary IP address have changed since"
5705 " locks were acquired, wanted '%s', have"
5706 " '%s'; retry the operation" %
5708 utils.CommaJoin(wanted_instances),
5709 utils.CommaJoin(owned_instances)),
5712 affected_instances = None
5714 if (self.op.master_candidate is not None or
5715 self.op.drained is not None or
5716 self.op.offline is not None):
5717 # we can't change the master's node flags
5718 if self.op.node_name == self.cfg.GetMasterNode():
5719 raise errors.OpPrereqError("The master role can be changed"
5720 " only via master-failover",
5723 if self.op.master_candidate and not node.master_capable:
5724 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5725 " it a master candidate" % node.name,
5728 if self.op.vm_capable == False:
5729 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5731 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5732 " the vm_capable flag" % node.name,
5735 if node.master_candidate and self.might_demote and not self.lock_all:
5736 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5737 # check if after removing the current node, we're missing master
5739 (mc_remaining, mc_should, _) = \
5740 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5741 if mc_remaining < mc_should:
5742 raise errors.OpPrereqError("Not enough master candidates, please"
5743 " pass auto promote option to allow"
5744 " promotion", errors.ECODE_STATE)
5746 self.old_flags = old_flags = (node.master_candidate,
5747 node.drained, node.offline)
5748 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5749 self.old_role = old_role = self._F2R[old_flags]
5751 # Check for ineffective changes
5752 for attr in self._FLAGS:
5753 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5754 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5755 setattr(self.op, attr, None)
5757 # Past this point, any flag change to False means a transition
5758 # away from the respective state, as only real changes are kept
5760 # TODO: We might query the real power state if it supports OOB
5761 if _SupportsOob(self.cfg, node):
5762 if self.op.offline is False and not (node.powered or
5763 self.op.powered == True):
5764 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5765 " offline status can be reset") %
5767 elif self.op.powered is not None:
5768 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5769 " as it does not support out-of-band"
5770 " handling") % self.op.node_name)
5772 # If we're being deofflined/drained, we'll MC ourself if needed
5773 if (self.op.drained == False or self.op.offline == False or
5774 (self.op.master_capable and not node.master_capable)):
5775 if _DecideSelfPromotion(self):
5776 self.op.master_candidate = True
5777 self.LogInfo("Auto-promoting node to master candidate")
5779 # If we're no longer master capable, we'll demote ourselves from MC
5780 if self.op.master_capable == False and node.master_candidate:
5781 self.LogInfo("Demoting from master candidate")
5782 self.op.master_candidate = False
5785 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5786 if self.op.master_candidate:
5787 new_role = self._ROLE_CANDIDATE
5788 elif self.op.drained:
5789 new_role = self._ROLE_DRAINED
5790 elif self.op.offline:
5791 new_role = self._ROLE_OFFLINE
5792 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5793 # False is still in new flags, which means we're un-setting (the
5795 new_role = self._ROLE_REGULAR
5796 else: # no new flags, nothing, keep old role
5799 self.new_role = new_role
5801 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5802 # Trying to transition out of offline status
5803 # TODO: Use standard RPC runner, but make sure it works when the node is
5804 # still marked offline
5805 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5807 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5808 " to report its version: %s" %
5809 (node.name, result.fail_msg),
5812 self.LogWarning("Transitioning node from offline to online state"
5813 " without using re-add. Please make sure the node"
5816 if self.op.secondary_ip:
5817 # Ok even without locking, because this can't be changed by any LU
5818 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5819 master_singlehomed = master.secondary_ip == master.primary_ip
5820 if master_singlehomed and self.op.secondary_ip:
5821 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5822 " homed cluster", errors.ECODE_INVAL)
5824 assert not (frozenset(affected_instances) -
5825 self.owned_locks(locking.LEVEL_INSTANCE))
5828 if affected_instances:
5829 raise errors.OpPrereqError("Cannot change secondary IP address:"
5830 " offline node has instances (%s)"
5831 " configured to use it" %
5832 utils.CommaJoin(affected_instances.keys()))
5834 # On online nodes, check that no instances are running, and that
5835 # the node has the new ip and we can reach it.
5836 for instance in affected_instances.values():
5837 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5838 msg="cannot change secondary ip")
5840 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5841 if master.name != node.name:
5842 # check reachability from master secondary ip to new secondary ip
5843 if not netutils.TcpPing(self.op.secondary_ip,
5844 constants.DEFAULT_NODED_PORT,
5845 source=master.secondary_ip):
5846 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5847 " based ping to node daemon port",
5848 errors.ECODE_ENVIRON)
5850 if self.op.ndparams:
5851 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5852 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5853 self.new_ndparams = new_ndparams
5855 if self.op.hv_state:
5856 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5857 self.node.hv_state_static)
5859 if self.op.disk_state:
5860 self.new_disk_state = \
5861 _MergeAndVerifyDiskState(self.op.disk_state,
5862 self.node.disk_state_static)
5864 def Exec(self, feedback_fn):
5869 old_role = self.old_role
5870 new_role = self.new_role
5874 if self.op.ndparams:
5875 node.ndparams = self.new_ndparams
5877 if self.op.powered is not None:
5878 node.powered = self.op.powered
5880 if self.op.hv_state:
5881 node.hv_state_static = self.new_hv_state
5883 if self.op.disk_state:
5884 node.disk_state_static = self.new_disk_state
5886 for attr in ["master_capable", "vm_capable"]:
5887 val = getattr(self.op, attr)
5889 setattr(node, attr, val)
5890 result.append((attr, str(val)))
5892 if new_role != old_role:
5893 # Tell the node to demote itself, if no longer MC and not offline
5894 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5895 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5897 self.LogWarning("Node failed to demote itself: %s", msg)
5899 new_flags = self._R2F[new_role]
5900 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5902 result.append((desc, str(nf)))
5903 (node.master_candidate, node.drained, node.offline) = new_flags
5905 # we locked all nodes, we adjust the CP before updating this node
5907 _AdjustCandidatePool(self, [node.name])
5909 if self.op.secondary_ip:
5910 node.secondary_ip = self.op.secondary_ip
5911 result.append(("secondary_ip", self.op.secondary_ip))
5913 # this will trigger configuration file update, if needed
5914 self.cfg.Update(node, feedback_fn)
5916 # this will trigger job queue propagation or cleanup if the mc
5918 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5919 self.context.ReaddNode(node)
5924 class LUNodePowercycle(NoHooksLU):
5925 """Powercycles a node.
5930 def CheckArguments(self):
5931 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5932 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5933 raise errors.OpPrereqError("The node is the master and the force"
5934 " parameter was not set",
5937 def ExpandNames(self):
5938 """Locking for PowercycleNode.
5940 This is a last-resort option and shouldn't block on other
5941 jobs. Therefore, we grab no locks.
5944 self.needed_locks = {}
5946 def Exec(self, feedback_fn):
5950 result = self.rpc.call_node_powercycle(self.op.node_name,
5951 self.cfg.GetHypervisorType())
5952 result.Raise("Failed to schedule the reboot")
5953 return result.payload
5956 class LUClusterQuery(NoHooksLU):
5957 """Query cluster configuration.
5962 def ExpandNames(self):
5963 self.needed_locks = {}
5965 def Exec(self, feedback_fn):
5966 """Return cluster config.
5969 cluster = self.cfg.GetClusterInfo()
5972 # Filter just for enabled hypervisors
5973 for os_name, hv_dict in cluster.os_hvp.items():
5974 os_hvp[os_name] = {}
5975 for hv_name, hv_params in hv_dict.items():
5976 if hv_name in cluster.enabled_hypervisors:
5977 os_hvp[os_name][hv_name] = hv_params
5979 # Convert ip_family to ip_version
5980 primary_ip_version = constants.IP4_VERSION
5981 if cluster.primary_ip_family == netutils.IP6Address.family:
5982 primary_ip_version = constants.IP6_VERSION
5985 "software_version": constants.RELEASE_VERSION,
5986 "protocol_version": constants.PROTOCOL_VERSION,
5987 "config_version": constants.CONFIG_VERSION,
5988 "os_api_version": max(constants.OS_API_VERSIONS),
5989 "export_version": constants.EXPORT_VERSION,
5990 "architecture": (platform.architecture()[0], platform.machine()),
5991 "name": cluster.cluster_name,
5992 "master": cluster.master_node,
5993 "default_hypervisor": cluster.primary_hypervisor,
5994 "enabled_hypervisors": cluster.enabled_hypervisors,
5995 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5996 for hypervisor_name in cluster.enabled_hypervisors]),
5998 "beparams": cluster.beparams,
5999 "osparams": cluster.osparams,
6000 "ipolicy": cluster.ipolicy,
6001 "nicparams": cluster.nicparams,
6002 "ndparams": cluster.ndparams,
6003 "candidate_pool_size": cluster.candidate_pool_size,
6004 "master_netdev": cluster.master_netdev,
6005 "master_netmask": cluster.master_netmask,
6006 "use_external_mip_script": cluster.use_external_mip_script,
6007 "volume_group_name": cluster.volume_group_name,
6008 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6009 "file_storage_dir": cluster.file_storage_dir,
6010 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6011 "maintain_node_health": cluster.maintain_node_health,
6012 "ctime": cluster.ctime,
6013 "mtime": cluster.mtime,
6014 "uuid": cluster.uuid,
6015 "tags": list(cluster.GetTags()),
6016 "uid_pool": cluster.uid_pool,
6017 "default_iallocator": cluster.default_iallocator,
6018 "reserved_lvs": cluster.reserved_lvs,
6019 "primary_ip_version": primary_ip_version,
6020 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6021 "hidden_os": cluster.hidden_os,
6022 "blacklisted_os": cluster.blacklisted_os,
6028 class LUClusterConfigQuery(NoHooksLU):
6029 """Return configuration values.
6033 _FIELDS_DYNAMIC = utils.FieldSet()
6034 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6035 "watcher_pause", "volume_group_name")
6037 def CheckArguments(self):
6038 _CheckOutputFields(static=self._FIELDS_STATIC,
6039 dynamic=self._FIELDS_DYNAMIC,
6040 selected=self.op.output_fields)
6042 def ExpandNames(self):
6043 self.needed_locks = {}
6045 def Exec(self, feedback_fn):
6046 """Dump a representation of the cluster config to the standard output.
6050 for field in self.op.output_fields:
6051 if field == "cluster_name":
6052 entry = self.cfg.GetClusterName()
6053 elif field == "master_node":
6054 entry = self.cfg.GetMasterNode()
6055 elif field == "drain_flag":
6056 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6057 elif field == "watcher_pause":
6058 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6059 elif field == "volume_group_name":
6060 entry = self.cfg.GetVGName()
6062 raise errors.ParameterError(field)
6063 values.append(entry)
6067 class LUInstanceActivateDisks(NoHooksLU):
6068 """Bring up an instance's disks.
6073 def ExpandNames(self):
6074 self._ExpandAndLockInstance()
6075 self.needed_locks[locking.LEVEL_NODE] = []
6076 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6078 def DeclareLocks(self, level):
6079 if level == locking.LEVEL_NODE:
6080 self._LockInstancesNodes()
6082 def CheckPrereq(self):
6083 """Check prerequisites.
6085 This checks that the instance is in the cluster.
6088 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6089 assert self.instance is not None, \
6090 "Cannot retrieve locked instance %s" % self.op.instance_name
6091 _CheckNodeOnline(self, self.instance.primary_node)
6093 def Exec(self, feedback_fn):
6094 """Activate the disks.
6097 disks_ok, disks_info = \
6098 _AssembleInstanceDisks(self, self.instance,
6099 ignore_size=self.op.ignore_size)
6101 raise errors.OpExecError("Cannot activate block devices")
6106 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6108 """Prepare the block devices for an instance.
6110 This sets up the block devices on all nodes.
6112 @type lu: L{LogicalUnit}
6113 @param lu: the logical unit on whose behalf we execute
6114 @type instance: L{objects.Instance}
6115 @param instance: the instance for whose disks we assemble
6116 @type disks: list of L{objects.Disk} or None
6117 @param disks: which disks to assemble (or all, if None)
6118 @type ignore_secondaries: boolean
6119 @param ignore_secondaries: if true, errors on secondary nodes
6120 won't result in an error return from the function
6121 @type ignore_size: boolean
6122 @param ignore_size: if true, the current known size of the disk
6123 will not be used during the disk activation, useful for cases
6124 when the size is wrong
6125 @return: False if the operation failed, otherwise a list of
6126 (host, instance_visible_name, node_visible_name)
6127 with the mapping from node devices to instance devices
6132 iname = instance.name
6133 disks = _ExpandCheckDisks(instance, disks)
6135 # With the two passes mechanism we try to reduce the window of
6136 # opportunity for the race condition of switching DRBD to primary
6137 # before handshaking occured, but we do not eliminate it
6139 # The proper fix would be to wait (with some limits) until the
6140 # connection has been made and drbd transitions from WFConnection
6141 # into any other network-connected state (Connected, SyncTarget,
6144 # 1st pass, assemble on all nodes in secondary mode
6145 for idx, inst_disk in enumerate(disks):
6146 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6148 node_disk = node_disk.Copy()
6149 node_disk.UnsetSize()
6150 lu.cfg.SetDiskID(node_disk, node)
6151 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6152 msg = result.fail_msg
6154 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6155 " (is_primary=False, pass=1): %s",
6156 inst_disk.iv_name, node, msg)
6157 if not ignore_secondaries:
6160 # FIXME: race condition on drbd migration to primary
6162 # 2nd pass, do only the primary node
6163 for idx, inst_disk in enumerate(disks):
6166 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6167 if node != instance.primary_node:
6170 node_disk = node_disk.Copy()
6171 node_disk.UnsetSize()
6172 lu.cfg.SetDiskID(node_disk, node)
6173 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6174 msg = result.fail_msg
6176 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6177 " (is_primary=True, pass=2): %s",
6178 inst_disk.iv_name, node, msg)
6181 dev_path = result.payload
6183 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6185 # leave the disks configured for the primary node
6186 # this is a workaround that would be fixed better by
6187 # improving the logical/physical id handling
6189 lu.cfg.SetDiskID(disk, instance.primary_node)
6191 return disks_ok, device_info
6194 def _StartInstanceDisks(lu, instance, force):
6195 """Start the disks of an instance.
6198 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6199 ignore_secondaries=force)
6201 _ShutdownInstanceDisks(lu, instance)
6202 if force is not None and not force:
6203 lu.proc.LogWarning("", hint="If the message above refers to a"
6205 " you can retry the operation using '--force'.")
6206 raise errors.OpExecError("Disk consistency error")
6209 class LUInstanceDeactivateDisks(NoHooksLU):
6210 """Shutdown an instance's disks.
6215 def ExpandNames(self):
6216 self._ExpandAndLockInstance()
6217 self.needed_locks[locking.LEVEL_NODE] = []
6218 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6220 def DeclareLocks(self, level):
6221 if level == locking.LEVEL_NODE:
6222 self._LockInstancesNodes()
6224 def CheckPrereq(self):
6225 """Check prerequisites.
6227 This checks that the instance is in the cluster.
6230 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6231 assert self.instance is not None, \
6232 "Cannot retrieve locked instance %s" % self.op.instance_name
6234 def Exec(self, feedback_fn):
6235 """Deactivate the disks
6238 instance = self.instance
6240 _ShutdownInstanceDisks(self, instance)
6242 _SafeShutdownInstanceDisks(self, instance)
6245 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6246 """Shutdown block devices of an instance.
6248 This function checks if an instance is running, before calling
6249 _ShutdownInstanceDisks.
6252 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6253 _ShutdownInstanceDisks(lu, instance, disks=disks)
6256 def _ExpandCheckDisks(instance, disks):
6257 """Return the instance disks selected by the disks list
6259 @type disks: list of L{objects.Disk} or None
6260 @param disks: selected disks
6261 @rtype: list of L{objects.Disk}
6262 @return: selected instance disks to act on
6266 return instance.disks
6268 if not set(disks).issubset(instance.disks):
6269 raise errors.ProgrammerError("Can only act on disks belonging to the"
6274 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6275 """Shutdown block devices of an instance.
6277 This does the shutdown on all nodes of the instance.
6279 If the ignore_primary is false, errors on the primary node are
6284 disks = _ExpandCheckDisks(instance, disks)
6287 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6288 lu.cfg.SetDiskID(top_disk, node)
6289 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6290 msg = result.fail_msg
6292 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6293 disk.iv_name, node, msg)
6294 if ((node == instance.primary_node and not ignore_primary) or
6295 (node != instance.primary_node and not result.offline)):
6300 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6301 """Checks if a node has enough free memory.
6303 This function check if a given node has the needed amount of free
6304 memory. In case the node has less memory or we cannot get the
6305 information from the node, this function raise an OpPrereqError
6308 @type lu: C{LogicalUnit}
6309 @param lu: a logical unit from which we get configuration data
6311 @param node: the node to check
6312 @type reason: C{str}
6313 @param reason: string to use in the error message
6314 @type requested: C{int}
6315 @param requested: the amount of memory in MiB to check for
6316 @type hypervisor_name: C{str}
6317 @param hypervisor_name: the hypervisor to ask for memory stats
6318 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6319 we cannot check the node
6322 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6323 nodeinfo[node].Raise("Can't get data from node %s" % node,
6324 prereq=True, ecode=errors.ECODE_ENVIRON)
6325 (_, _, (hv_info, )) = nodeinfo[node].payload
6327 free_mem = hv_info.get("memory_free", None)
6328 if not isinstance(free_mem, int):
6329 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6330 " was '%s'" % (node, free_mem),
6331 errors.ECODE_ENVIRON)
6332 if requested > free_mem:
6333 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6334 " needed %s MiB, available %s MiB" %
6335 (node, reason, requested, free_mem),
6339 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6340 """Checks if nodes have enough free disk space in the all VGs.
6342 This function check if all given nodes have the needed amount of
6343 free disk. In case any node has less disk or we cannot get the
6344 information from the node, this function raise an OpPrereqError
6347 @type lu: C{LogicalUnit}
6348 @param lu: a logical unit from which we get configuration data
6349 @type nodenames: C{list}
6350 @param nodenames: the list of node names to check
6351 @type req_sizes: C{dict}
6352 @param req_sizes: the hash of vg and corresponding amount of disk in
6354 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6355 or we cannot check the node
6358 for vg, req_size in req_sizes.items():
6359 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6362 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6363 """Checks if nodes have enough free disk space in the specified VG.
6365 This function check if all given nodes have the needed amount of
6366 free disk. In case any node has less disk or we cannot get the
6367 information from the node, this function raise an OpPrereqError
6370 @type lu: C{LogicalUnit}
6371 @param lu: a logical unit from which we get configuration data
6372 @type nodenames: C{list}
6373 @param nodenames: the list of node names to check
6375 @param vg: the volume group to check
6376 @type requested: C{int}
6377 @param requested: the amount of disk in MiB to check for
6378 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6379 or we cannot check the node
6382 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6383 for node in nodenames:
6384 info = nodeinfo[node]
6385 info.Raise("Cannot get current information from node %s" % node,
6386 prereq=True, ecode=errors.ECODE_ENVIRON)
6387 (_, (vg_info, ), _) = info.payload
6388 vg_free = vg_info.get("vg_free", None)
6389 if not isinstance(vg_free, int):
6390 raise errors.OpPrereqError("Can't compute free disk space on node"
6391 " %s for vg %s, result was '%s'" %
6392 (node, vg, vg_free), errors.ECODE_ENVIRON)
6393 if requested > vg_free:
6394 raise errors.OpPrereqError("Not enough disk space on target node %s"
6395 " vg %s: required %d MiB, available %d MiB" %
6396 (node, vg, requested, vg_free),
6400 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6401 """Checks if nodes have enough physical CPUs
6403 This function checks if all given nodes have the needed number of
6404 physical CPUs. In case any node has less CPUs or we cannot get the
6405 information from the node, this function raises an OpPrereqError
6408 @type lu: C{LogicalUnit}
6409 @param lu: a logical unit from which we get configuration data
6410 @type nodenames: C{list}
6411 @param nodenames: the list of node names to check
6412 @type requested: C{int}
6413 @param requested: the minimum acceptable number of physical CPUs
6414 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6415 or we cannot check the node
6418 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6419 for node in nodenames:
6420 info = nodeinfo[node]
6421 info.Raise("Cannot get current information from node %s" % node,
6422 prereq=True, ecode=errors.ECODE_ENVIRON)
6423 (_, _, (hv_info, )) = info.payload
6424 num_cpus = hv_info.get("cpu_total", None)
6425 if not isinstance(num_cpus, int):
6426 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6427 " on node %s, result was '%s'" %
6428 (node, num_cpus), errors.ECODE_ENVIRON)
6429 if requested > num_cpus:
6430 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6431 "required" % (node, num_cpus, requested),
6435 class LUInstanceStartup(LogicalUnit):
6436 """Starts an instance.
6439 HPATH = "instance-start"
6440 HTYPE = constants.HTYPE_INSTANCE
6443 def CheckArguments(self):
6445 if self.op.beparams:
6446 # fill the beparams dict
6447 objects.UpgradeBeParams(self.op.beparams)
6448 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6450 def ExpandNames(self):
6451 self._ExpandAndLockInstance()
6453 def BuildHooksEnv(self):
6456 This runs on master, primary and secondary nodes of the instance.
6460 "FORCE": self.op.force,
6463 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6467 def BuildHooksNodes(self):
6468 """Build hooks nodes.
6471 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6474 def CheckPrereq(self):
6475 """Check prerequisites.
6477 This checks that the instance is in the cluster.
6480 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6481 assert self.instance is not None, \
6482 "Cannot retrieve locked instance %s" % self.op.instance_name
6485 if self.op.hvparams:
6486 # check hypervisor parameter syntax (locally)
6487 cluster = self.cfg.GetClusterInfo()
6488 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6489 filled_hvp = cluster.FillHV(instance)
6490 filled_hvp.update(self.op.hvparams)
6491 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6492 hv_type.CheckParameterSyntax(filled_hvp)
6493 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6495 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6497 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6499 if self.primary_offline and self.op.ignore_offline_nodes:
6500 self.proc.LogWarning("Ignoring offline primary node")
6502 if self.op.hvparams or self.op.beparams:
6503 self.proc.LogWarning("Overridden parameters are ignored")
6505 _CheckNodeOnline(self, instance.primary_node)
6507 bep = self.cfg.GetClusterInfo().FillBE(instance)
6508 bep.update(self.op.beparams)
6510 # check bridges existence
6511 _CheckInstanceBridgesExist(self, instance)
6513 remote_info = self.rpc.call_instance_info(instance.primary_node,
6515 instance.hypervisor)
6516 remote_info.Raise("Error checking node %s" % instance.primary_node,
6517 prereq=True, ecode=errors.ECODE_ENVIRON)
6518 if not remote_info.payload: # not running already
6519 _CheckNodeFreeMemory(self, instance.primary_node,
6520 "starting instance %s" % instance.name,
6521 bep[constants.BE_MAXMEM], instance.hypervisor)
6523 def Exec(self, feedback_fn):
6524 """Start the instance.
6527 instance = self.instance
6528 force = self.op.force
6530 if not self.op.no_remember:
6531 self.cfg.MarkInstanceUp(instance.name)
6533 if self.primary_offline:
6534 assert self.op.ignore_offline_nodes
6535 self.proc.LogInfo("Primary node offline, marked instance as started")
6537 node_current = instance.primary_node
6539 _StartInstanceDisks(self, instance, force)
6542 self.rpc.call_instance_start(node_current,
6543 (instance, self.op.hvparams,
6545 self.op.startup_paused)
6546 msg = result.fail_msg
6548 _ShutdownInstanceDisks(self, instance)
6549 raise errors.OpExecError("Could not start instance: %s" % msg)
6552 class LUInstanceReboot(LogicalUnit):
6553 """Reboot an instance.
6556 HPATH = "instance-reboot"
6557 HTYPE = constants.HTYPE_INSTANCE
6560 def ExpandNames(self):
6561 self._ExpandAndLockInstance()
6563 def BuildHooksEnv(self):
6566 This runs on master, primary and secondary nodes of the instance.
6570 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6571 "REBOOT_TYPE": self.op.reboot_type,
6572 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6575 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6579 def BuildHooksNodes(self):
6580 """Build hooks nodes.
6583 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6586 def CheckPrereq(self):
6587 """Check prerequisites.
6589 This checks that the instance is in the cluster.
6592 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6593 assert self.instance is not None, \
6594 "Cannot retrieve locked instance %s" % self.op.instance_name
6595 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6596 _CheckNodeOnline(self, instance.primary_node)
6598 # check bridges existence
6599 _CheckInstanceBridgesExist(self, instance)
6601 def Exec(self, feedback_fn):
6602 """Reboot the instance.
6605 instance = self.instance
6606 ignore_secondaries = self.op.ignore_secondaries
6607 reboot_type = self.op.reboot_type
6609 remote_info = self.rpc.call_instance_info(instance.primary_node,
6611 instance.hypervisor)
6612 remote_info.Raise("Error checking node %s" % instance.primary_node)
6613 instance_running = bool(remote_info.payload)
6615 node_current = instance.primary_node
6617 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6618 constants.INSTANCE_REBOOT_HARD]:
6619 for disk in instance.disks:
6620 self.cfg.SetDiskID(disk, node_current)
6621 result = self.rpc.call_instance_reboot(node_current, instance,
6623 self.op.shutdown_timeout)
6624 result.Raise("Could not reboot instance")
6626 if instance_running:
6627 result = self.rpc.call_instance_shutdown(node_current, instance,
6628 self.op.shutdown_timeout)
6629 result.Raise("Could not shutdown instance for full reboot")
6630 _ShutdownInstanceDisks(self, instance)
6632 self.LogInfo("Instance %s was already stopped, starting now",
6634 _StartInstanceDisks(self, instance, ignore_secondaries)
6635 result = self.rpc.call_instance_start(node_current,
6636 (instance, None, None), False)
6637 msg = result.fail_msg
6639 _ShutdownInstanceDisks(self, instance)
6640 raise errors.OpExecError("Could not start instance for"
6641 " full reboot: %s" % msg)
6643 self.cfg.MarkInstanceUp(instance.name)
6646 class LUInstanceShutdown(LogicalUnit):
6647 """Shutdown an instance.
6650 HPATH = "instance-stop"
6651 HTYPE = constants.HTYPE_INSTANCE
6654 def ExpandNames(self):
6655 self._ExpandAndLockInstance()
6657 def BuildHooksEnv(self):
6660 This runs on master, primary and secondary nodes of the instance.
6663 env = _BuildInstanceHookEnvByObject(self, self.instance)
6664 env["TIMEOUT"] = self.op.timeout
6667 def BuildHooksNodes(self):
6668 """Build hooks nodes.
6671 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6674 def CheckPrereq(self):
6675 """Check prerequisites.
6677 This checks that the instance is in the cluster.
6680 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6681 assert self.instance is not None, \
6682 "Cannot retrieve locked instance %s" % self.op.instance_name
6684 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6686 self.primary_offline = \
6687 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6689 if self.primary_offline and self.op.ignore_offline_nodes:
6690 self.proc.LogWarning("Ignoring offline primary node")
6692 _CheckNodeOnline(self, self.instance.primary_node)
6694 def Exec(self, feedback_fn):
6695 """Shutdown the instance.
6698 instance = self.instance
6699 node_current = instance.primary_node
6700 timeout = self.op.timeout
6702 if not self.op.no_remember:
6703 self.cfg.MarkInstanceDown(instance.name)
6705 if self.primary_offline:
6706 assert self.op.ignore_offline_nodes
6707 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6709 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6710 msg = result.fail_msg
6712 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6714 _ShutdownInstanceDisks(self, instance)
6717 class LUInstanceReinstall(LogicalUnit):
6718 """Reinstall an instance.
6721 HPATH = "instance-reinstall"
6722 HTYPE = constants.HTYPE_INSTANCE
6725 def ExpandNames(self):
6726 self._ExpandAndLockInstance()
6728 def BuildHooksEnv(self):
6731 This runs on master, primary and secondary nodes of the instance.
6734 return _BuildInstanceHookEnvByObject(self, self.instance)
6736 def BuildHooksNodes(self):
6737 """Build hooks nodes.
6740 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6743 def CheckPrereq(self):
6744 """Check prerequisites.
6746 This checks that the instance is in the cluster and is not running.
6749 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6750 assert instance is not None, \
6751 "Cannot retrieve locked instance %s" % self.op.instance_name
6752 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6753 " offline, cannot reinstall")
6754 for node in instance.secondary_nodes:
6755 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6756 " cannot reinstall")
6758 if instance.disk_template == constants.DT_DISKLESS:
6759 raise errors.OpPrereqError("Instance '%s' has no disks" %
6760 self.op.instance_name,
6762 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6764 if self.op.os_type is not None:
6766 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6767 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6768 instance_os = self.op.os_type
6770 instance_os = instance.os
6772 nodelist = list(instance.all_nodes)
6774 if self.op.osparams:
6775 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6776 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6777 self.os_inst = i_osdict # the new dict (without defaults)
6781 self.instance = instance
6783 def Exec(self, feedback_fn):
6784 """Reinstall the instance.
6787 inst = self.instance
6789 if self.op.os_type is not None:
6790 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6791 inst.os = self.op.os_type
6792 # Write to configuration
6793 self.cfg.Update(inst, feedback_fn)
6795 _StartInstanceDisks(self, inst, None)
6797 feedback_fn("Running the instance OS create scripts...")
6798 # FIXME: pass debug option from opcode to backend
6799 result = self.rpc.call_instance_os_add(inst.primary_node,
6800 (inst, self.os_inst), True,
6801 self.op.debug_level)
6802 result.Raise("Could not install OS for instance %s on node %s" %
6803 (inst.name, inst.primary_node))
6805 _ShutdownInstanceDisks(self, inst)
6808 class LUInstanceRecreateDisks(LogicalUnit):
6809 """Recreate an instance's missing disks.
6812 HPATH = "instance-recreate-disks"
6813 HTYPE = constants.HTYPE_INSTANCE
6816 def CheckArguments(self):
6817 # normalise the disk list
6818 self.op.disks = sorted(frozenset(self.op.disks))
6820 def ExpandNames(self):
6821 self._ExpandAndLockInstance()
6822 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6824 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6825 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6827 self.needed_locks[locking.LEVEL_NODE] = []
6829 def DeclareLocks(self, level):
6830 if level == locking.LEVEL_NODE:
6831 # if we replace the nodes, we only need to lock the old primary,
6832 # otherwise we need to lock all nodes for disk re-creation
6833 primary_only = bool(self.op.nodes)
6834 self._LockInstancesNodes(primary_only=primary_only)
6835 elif level == locking.LEVEL_NODE_RES:
6837 self.needed_locks[locking.LEVEL_NODE_RES] = \
6838 self.needed_locks[locking.LEVEL_NODE][:]
6840 def BuildHooksEnv(self):
6843 This runs on master, primary and secondary nodes of the instance.
6846 return _BuildInstanceHookEnvByObject(self, self.instance)
6848 def BuildHooksNodes(self):
6849 """Build hooks nodes.
6852 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6855 def CheckPrereq(self):
6856 """Check prerequisites.
6858 This checks that the instance is in the cluster and is not running.
6861 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6862 assert instance is not None, \
6863 "Cannot retrieve locked instance %s" % self.op.instance_name
6865 if len(self.op.nodes) != len(instance.all_nodes):
6866 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6867 " %d replacement nodes were specified" %
6868 (instance.name, len(instance.all_nodes),
6869 len(self.op.nodes)),
6871 assert instance.disk_template != constants.DT_DRBD8 or \
6872 len(self.op.nodes) == 2
6873 assert instance.disk_template != constants.DT_PLAIN or \
6874 len(self.op.nodes) == 1
6875 primary_node = self.op.nodes[0]
6877 primary_node = instance.primary_node
6878 _CheckNodeOnline(self, primary_node)
6880 if instance.disk_template == constants.DT_DISKLESS:
6881 raise errors.OpPrereqError("Instance '%s' has no disks" %
6882 self.op.instance_name, errors.ECODE_INVAL)
6883 # if we replace nodes *and* the old primary is offline, we don't
6885 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6886 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6887 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6888 if not (self.op.nodes and old_pnode.offline):
6889 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6890 msg="cannot recreate disks")
6892 if not self.op.disks:
6893 self.op.disks = range(len(instance.disks))
6895 for idx in self.op.disks:
6896 if idx >= len(instance.disks):
6897 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6899 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6900 raise errors.OpPrereqError("Can't recreate disks partially and"
6901 " change the nodes at the same time",
6903 self.instance = instance
6905 def Exec(self, feedback_fn):
6906 """Recreate the disks.
6909 instance = self.instance
6911 assert (self.owned_locks(locking.LEVEL_NODE) ==
6912 self.owned_locks(locking.LEVEL_NODE_RES))
6915 mods = [] # keeps track of needed logical_id changes
6917 for idx, disk in enumerate(instance.disks):
6918 if idx not in self.op.disks: # disk idx has not been passed in
6921 # update secondaries for disks, if needed
6923 if disk.dev_type == constants.LD_DRBD8:
6924 # need to update the nodes and minors
6925 assert len(self.op.nodes) == 2
6926 assert len(disk.logical_id) == 6 # otherwise disk internals
6928 (_, _, old_port, _, _, old_secret) = disk.logical_id
6929 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6930 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6931 new_minors[0], new_minors[1], old_secret)
6932 assert len(disk.logical_id) == len(new_id)
6933 mods.append((idx, new_id))
6935 # now that we have passed all asserts above, we can apply the mods
6936 # in a single run (to avoid partial changes)
6937 for idx, new_id in mods:
6938 instance.disks[idx].logical_id = new_id
6940 # change primary node, if needed
6942 instance.primary_node = self.op.nodes[0]
6943 self.LogWarning("Changing the instance's nodes, you will have to"
6944 " remove any disks left on the older nodes manually")
6947 self.cfg.Update(instance, feedback_fn)
6949 _CreateDisks(self, instance, to_skip=to_skip)
6952 class LUInstanceRename(LogicalUnit):
6953 """Rename an instance.
6956 HPATH = "instance-rename"
6957 HTYPE = constants.HTYPE_INSTANCE
6959 def CheckArguments(self):
6963 if self.op.ip_check and not self.op.name_check:
6964 # TODO: make the ip check more flexible and not depend on the name check
6965 raise errors.OpPrereqError("IP address check requires a name check",
6968 def BuildHooksEnv(self):
6971 This runs on master, primary and secondary nodes of the instance.
6974 env = _BuildInstanceHookEnvByObject(self, self.instance)
6975 env["INSTANCE_NEW_NAME"] = self.op.new_name
6978 def BuildHooksNodes(self):
6979 """Build hooks nodes.
6982 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6985 def CheckPrereq(self):
6986 """Check prerequisites.
6988 This checks that the instance is in the cluster and is not running.
6991 self.op.instance_name = _ExpandInstanceName(self.cfg,
6992 self.op.instance_name)
6993 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6994 assert instance is not None
6995 _CheckNodeOnline(self, instance.primary_node)
6996 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6997 msg="cannot rename")
6998 self.instance = instance
7000 new_name = self.op.new_name
7001 if self.op.name_check:
7002 hostname = netutils.GetHostname(name=new_name)
7003 if hostname.name != new_name:
7004 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7006 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7007 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7008 " same as given hostname '%s'") %
7009 (hostname.name, self.op.new_name),
7011 new_name = self.op.new_name = hostname.name
7012 if (self.op.ip_check and
7013 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7014 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7015 (hostname.ip, new_name),
7016 errors.ECODE_NOTUNIQUE)
7018 instance_list = self.cfg.GetInstanceList()
7019 if new_name in instance_list and new_name != instance.name:
7020 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7021 new_name, errors.ECODE_EXISTS)
7023 def Exec(self, feedback_fn):
7024 """Rename the instance.
7027 inst = self.instance
7028 old_name = inst.name
7030 rename_file_storage = False
7031 if (inst.disk_template in constants.DTS_FILEBASED and
7032 self.op.new_name != inst.name):
7033 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7034 rename_file_storage = True
7036 self.cfg.RenameInstance(inst.name, self.op.new_name)
7037 # Change the instance lock. This is definitely safe while we hold the BGL.
7038 # Otherwise the new lock would have to be added in acquired mode.
7040 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7041 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7043 # re-read the instance from the configuration after rename
7044 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7046 if rename_file_storage:
7047 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7048 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7049 old_file_storage_dir,
7050 new_file_storage_dir)
7051 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7052 " (but the instance has been renamed in Ganeti)" %
7053 (inst.primary_node, old_file_storage_dir,
7054 new_file_storage_dir))
7056 _StartInstanceDisks(self, inst, None)
7058 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7059 old_name, self.op.debug_level)
7060 msg = result.fail_msg
7062 msg = ("Could not run OS rename script for instance %s on node %s"
7063 " (but the instance has been renamed in Ganeti): %s" %
7064 (inst.name, inst.primary_node, msg))
7065 self.proc.LogWarning(msg)
7067 _ShutdownInstanceDisks(self, inst)
7072 class LUInstanceRemove(LogicalUnit):
7073 """Remove an instance.
7076 HPATH = "instance-remove"
7077 HTYPE = constants.HTYPE_INSTANCE
7080 def ExpandNames(self):
7081 self._ExpandAndLockInstance()
7082 self.needed_locks[locking.LEVEL_NODE] = []
7083 self.needed_locks[locking.LEVEL_NODE_RES] = []
7084 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7086 def DeclareLocks(self, level):
7087 if level == locking.LEVEL_NODE:
7088 self._LockInstancesNodes()
7089 elif level == locking.LEVEL_NODE_RES:
7091 self.needed_locks[locking.LEVEL_NODE_RES] = \
7092 self.needed_locks[locking.LEVEL_NODE][:]
7094 def BuildHooksEnv(self):
7097 This runs on master, primary and secondary nodes of the instance.
7100 env = _BuildInstanceHookEnvByObject(self, self.instance)
7101 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7104 def BuildHooksNodes(self):
7105 """Build hooks nodes.
7108 nl = [self.cfg.GetMasterNode()]
7109 nl_post = list(self.instance.all_nodes) + nl
7110 return (nl, nl_post)
7112 def CheckPrereq(self):
7113 """Check prerequisites.
7115 This checks that the instance is in the cluster.
7118 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7119 assert self.instance is not None, \
7120 "Cannot retrieve locked instance %s" % self.op.instance_name
7122 def Exec(self, feedback_fn):
7123 """Remove the instance.
7126 instance = self.instance
7127 logging.info("Shutting down instance %s on node %s",
7128 instance.name, instance.primary_node)
7130 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7131 self.op.shutdown_timeout)
7132 msg = result.fail_msg
7134 if self.op.ignore_failures:
7135 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7137 raise errors.OpExecError("Could not shutdown instance %s on"
7139 (instance.name, instance.primary_node, msg))
7141 assert (self.owned_locks(locking.LEVEL_NODE) ==
7142 self.owned_locks(locking.LEVEL_NODE_RES))
7143 assert not (set(instance.all_nodes) -
7144 self.owned_locks(locking.LEVEL_NODE)), \
7145 "Not owning correct locks"
7147 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7150 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7151 """Utility function to remove an instance.
7154 logging.info("Removing block devices for instance %s", instance.name)
7156 if not _RemoveDisks(lu, instance):
7157 if not ignore_failures:
7158 raise errors.OpExecError("Can't remove instance's disks")
7159 feedback_fn("Warning: can't remove instance's disks")
7161 logging.info("Removing instance %s out of cluster config", instance.name)
7163 lu.cfg.RemoveInstance(instance.name)
7165 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7166 "Instance lock removal conflict"
7168 # Remove lock for the instance
7169 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7172 class LUInstanceQuery(NoHooksLU):
7173 """Logical unit for querying instances.
7176 # pylint: disable=W0142
7179 def CheckArguments(self):
7180 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7181 self.op.output_fields, self.op.use_locking)
7183 def ExpandNames(self):
7184 self.iq.ExpandNames(self)
7186 def DeclareLocks(self, level):
7187 self.iq.DeclareLocks(self, level)
7189 def Exec(self, feedback_fn):
7190 return self.iq.OldStyleQuery(self)
7193 class LUInstanceFailover(LogicalUnit):
7194 """Failover an instance.
7197 HPATH = "instance-failover"
7198 HTYPE = constants.HTYPE_INSTANCE
7201 def CheckArguments(self):
7202 """Check the arguments.
7205 self.iallocator = getattr(self.op, "iallocator", None)
7206 self.target_node = getattr(self.op, "target_node", None)
7208 def ExpandNames(self):
7209 self._ExpandAndLockInstance()
7211 if self.op.target_node is not None:
7212 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7214 self.needed_locks[locking.LEVEL_NODE] = []
7215 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7217 ignore_consistency = self.op.ignore_consistency
7218 shutdown_timeout = self.op.shutdown_timeout
7219 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7222 ignore_consistency=ignore_consistency,
7223 shutdown_timeout=shutdown_timeout,
7224 ignore_ipolicy=self.op.ignore_ipolicy)
7225 self.tasklets = [self._migrater]
7227 def DeclareLocks(self, level):
7228 if level == locking.LEVEL_NODE:
7229 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7230 if instance.disk_template in constants.DTS_EXT_MIRROR:
7231 if self.op.target_node is None:
7232 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7234 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7235 self.op.target_node]
7236 del self.recalculate_locks[locking.LEVEL_NODE]
7238 self._LockInstancesNodes()
7240 def BuildHooksEnv(self):
7243 This runs on master, primary and secondary nodes of the instance.
7246 instance = self._migrater.instance
7247 source_node = instance.primary_node
7248 target_node = self.op.target_node
7250 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7251 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7252 "OLD_PRIMARY": source_node,
7253 "NEW_PRIMARY": target_node,
7256 if instance.disk_template in constants.DTS_INT_MIRROR:
7257 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7258 env["NEW_SECONDARY"] = source_node
7260 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7262 env.update(_BuildInstanceHookEnvByObject(self, instance))
7266 def BuildHooksNodes(self):
7267 """Build hooks nodes.
7270 instance = self._migrater.instance
7271 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7272 return (nl, nl + [instance.primary_node])
7275 class LUInstanceMigrate(LogicalUnit):
7276 """Migrate an instance.
7278 This is migration without shutting down, compared to the failover,
7279 which is done with shutdown.
7282 HPATH = "instance-migrate"
7283 HTYPE = constants.HTYPE_INSTANCE
7286 def ExpandNames(self):
7287 self._ExpandAndLockInstance()
7289 if self.op.target_node is not None:
7290 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7292 self.needed_locks[locking.LEVEL_NODE] = []
7293 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7295 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7296 cleanup=self.op.cleanup,
7298 fallback=self.op.allow_failover,
7299 ignore_ipolicy=self.op.ignore_ipolicy)
7300 self.tasklets = [self._migrater]
7302 def DeclareLocks(self, level):
7303 if level == locking.LEVEL_NODE:
7304 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7305 if instance.disk_template in constants.DTS_EXT_MIRROR:
7306 if self.op.target_node is None:
7307 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7309 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7310 self.op.target_node]
7311 del self.recalculate_locks[locking.LEVEL_NODE]
7313 self._LockInstancesNodes()
7315 def BuildHooksEnv(self):
7318 This runs on master, primary and secondary nodes of the instance.
7321 instance = self._migrater.instance
7322 source_node = instance.primary_node
7323 target_node = self.op.target_node
7324 env = _BuildInstanceHookEnvByObject(self, instance)
7326 "MIGRATE_LIVE": self._migrater.live,
7327 "MIGRATE_CLEANUP": self.op.cleanup,
7328 "OLD_PRIMARY": source_node,
7329 "NEW_PRIMARY": target_node,
7332 if instance.disk_template in constants.DTS_INT_MIRROR:
7333 env["OLD_SECONDARY"] = target_node
7334 env["NEW_SECONDARY"] = source_node
7336 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7340 def BuildHooksNodes(self):
7341 """Build hooks nodes.
7344 instance = self._migrater.instance
7345 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7346 return (nl, nl + [instance.primary_node])
7349 class LUInstanceMove(LogicalUnit):
7350 """Move an instance by data-copying.
7353 HPATH = "instance-move"
7354 HTYPE = constants.HTYPE_INSTANCE
7357 def ExpandNames(self):
7358 self._ExpandAndLockInstance()
7359 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7360 self.op.target_node = target_node
7361 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7362 self.needed_locks[locking.LEVEL_NODE_RES] = []
7363 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7365 def DeclareLocks(self, level):
7366 if level == locking.LEVEL_NODE:
7367 self._LockInstancesNodes(primary_only=True)
7368 elif level == locking.LEVEL_NODE_RES:
7370 self.needed_locks[locking.LEVEL_NODE_RES] = \
7371 self.needed_locks[locking.LEVEL_NODE][:]
7373 def BuildHooksEnv(self):
7376 This runs on master, primary and secondary nodes of the instance.
7380 "TARGET_NODE": self.op.target_node,
7381 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7383 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7386 def BuildHooksNodes(self):
7387 """Build hooks nodes.
7391 self.cfg.GetMasterNode(),
7392 self.instance.primary_node,
7393 self.op.target_node,
7397 def CheckPrereq(self):
7398 """Check prerequisites.
7400 This checks that the instance is in the cluster.
7403 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7404 assert self.instance is not None, \
7405 "Cannot retrieve locked instance %s" % self.op.instance_name
7407 node = self.cfg.GetNodeInfo(self.op.target_node)
7408 assert node is not None, \
7409 "Cannot retrieve locked node %s" % self.op.target_node
7411 self.target_node = target_node = node.name
7413 if target_node == instance.primary_node:
7414 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7415 (instance.name, target_node),
7418 bep = self.cfg.GetClusterInfo().FillBE(instance)
7420 for idx, dsk in enumerate(instance.disks):
7421 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7422 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7423 " cannot copy" % idx, errors.ECODE_STATE)
7425 _CheckNodeOnline(self, target_node)
7426 _CheckNodeNotDrained(self, target_node)
7427 _CheckNodeVmCapable(self, target_node)
7428 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(),
7429 self.cfg.GetNodeGroup(node.group))
7430 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7431 ignore=self.op.ignore_ipolicy)
7433 if instance.admin_state == constants.ADMINST_UP:
7434 # check memory requirements on the secondary node
7435 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7436 instance.name, bep[constants.BE_MAXMEM],
7437 instance.hypervisor)
7439 self.LogInfo("Not checking memory on the secondary node as"
7440 " instance will not be started")
7442 # check bridge existance
7443 _CheckInstanceBridgesExist(self, instance, node=target_node)
7445 def Exec(self, feedback_fn):
7446 """Move an instance.
7448 The move is done by shutting it down on its present node, copying
7449 the data over (slow) and starting it on the new node.
7452 instance = self.instance
7454 source_node = instance.primary_node
7455 target_node = self.target_node
7457 self.LogInfo("Shutting down instance %s on source node %s",
7458 instance.name, source_node)
7460 assert (self.owned_locks(locking.LEVEL_NODE) ==
7461 self.owned_locks(locking.LEVEL_NODE_RES))
7463 result = self.rpc.call_instance_shutdown(source_node, instance,
7464 self.op.shutdown_timeout)
7465 msg = result.fail_msg
7467 if self.op.ignore_consistency:
7468 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7469 " Proceeding anyway. Please make sure node"
7470 " %s is down. Error details: %s",
7471 instance.name, source_node, source_node, msg)
7473 raise errors.OpExecError("Could not shutdown instance %s on"
7475 (instance.name, source_node, msg))
7477 # create the target disks
7479 _CreateDisks(self, instance, target_node=target_node)
7480 except errors.OpExecError:
7481 self.LogWarning("Device creation failed, reverting...")
7483 _RemoveDisks(self, instance, target_node=target_node)
7485 self.cfg.ReleaseDRBDMinors(instance.name)
7488 cluster_name = self.cfg.GetClusterInfo().cluster_name
7491 # activate, get path, copy the data over
7492 for idx, disk in enumerate(instance.disks):
7493 self.LogInfo("Copying data for disk %d", idx)
7494 result = self.rpc.call_blockdev_assemble(target_node, disk,
7495 instance.name, True, idx)
7497 self.LogWarning("Can't assemble newly created disk %d: %s",
7498 idx, result.fail_msg)
7499 errs.append(result.fail_msg)
7501 dev_path = result.payload
7502 result = self.rpc.call_blockdev_export(source_node, disk,
7503 target_node, dev_path,
7506 self.LogWarning("Can't copy data over for disk %d: %s",
7507 idx, result.fail_msg)
7508 errs.append(result.fail_msg)
7512 self.LogWarning("Some disks failed to copy, aborting")
7514 _RemoveDisks(self, instance, target_node=target_node)
7516 self.cfg.ReleaseDRBDMinors(instance.name)
7517 raise errors.OpExecError("Errors during disk copy: %s" %
7520 instance.primary_node = target_node
7521 self.cfg.Update(instance, feedback_fn)
7523 self.LogInfo("Removing the disks on the original node")
7524 _RemoveDisks(self, instance, target_node=source_node)
7526 # Only start the instance if it's marked as up
7527 if instance.admin_state == constants.ADMINST_UP:
7528 self.LogInfo("Starting instance %s on node %s",
7529 instance.name, target_node)
7531 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7532 ignore_secondaries=True)
7534 _ShutdownInstanceDisks(self, instance)
7535 raise errors.OpExecError("Can't activate the instance's disks")
7537 result = self.rpc.call_instance_start(target_node,
7538 (instance, None, None), False)
7539 msg = result.fail_msg
7541 _ShutdownInstanceDisks(self, instance)
7542 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7543 (instance.name, target_node, msg))
7546 class LUNodeMigrate(LogicalUnit):
7547 """Migrate all instances from a node.
7550 HPATH = "node-migrate"
7551 HTYPE = constants.HTYPE_NODE
7554 def CheckArguments(self):
7557 def ExpandNames(self):
7558 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7560 self.share_locks = _ShareAll()
7561 self.needed_locks = {
7562 locking.LEVEL_NODE: [self.op.node_name],
7565 def BuildHooksEnv(self):
7568 This runs on the master, the primary and all the secondaries.
7572 "NODE_NAME": self.op.node_name,
7575 def BuildHooksNodes(self):
7576 """Build hooks nodes.
7579 nl = [self.cfg.GetMasterNode()]
7582 def CheckPrereq(self):
7585 def Exec(self, feedback_fn):
7586 # Prepare jobs for migration instances
7588 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7591 iallocator=self.op.iallocator,
7592 target_node=self.op.target_node,
7593 ignore_ipolicy=self.op.ignore_ipolicy)]
7594 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7597 # TODO: Run iallocator in this opcode and pass correct placement options to
7598 # OpInstanceMigrate. Since other jobs can modify the cluster between
7599 # running the iallocator and the actual migration, a good consistency model
7600 # will have to be found.
7602 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7603 frozenset([self.op.node_name]))
7605 return ResultWithJobs(jobs)
7608 class TLMigrateInstance(Tasklet):
7609 """Tasklet class for instance migration.
7612 @ivar live: whether the migration will be done live or non-live;
7613 this variable is initalized only after CheckPrereq has run
7614 @type cleanup: boolean
7615 @ivar cleanup: Wheater we cleanup from a failed migration
7616 @type iallocator: string
7617 @ivar iallocator: The iallocator used to determine target_node
7618 @type target_node: string
7619 @ivar target_node: If given, the target_node to reallocate the instance to
7620 @type failover: boolean
7621 @ivar failover: Whether operation results in failover or migration
7622 @type fallback: boolean
7623 @ivar fallback: Whether fallback to failover is allowed if migration not
7625 @type ignore_consistency: boolean
7626 @ivar ignore_consistency: Wheter we should ignore consistency between source
7628 @type shutdown_timeout: int
7629 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7630 @type ignore_ipolicy: bool
7631 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7636 _MIGRATION_POLL_INTERVAL = 1 # seconds
7637 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7639 def __init__(self, lu, instance_name, cleanup=False,
7640 failover=False, fallback=False,
7641 ignore_consistency=False,
7642 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7643 ignore_ipolicy=False):
7644 """Initializes this class.
7647 Tasklet.__init__(self, lu)
7650 self.instance_name = instance_name
7651 self.cleanup = cleanup
7652 self.live = False # will be overridden later
7653 self.failover = failover
7654 self.fallback = fallback
7655 self.ignore_consistency = ignore_consistency
7656 self.shutdown_timeout = shutdown_timeout
7657 self.ignore_ipolicy = ignore_ipolicy
7659 def CheckPrereq(self):
7660 """Check prerequisites.
7662 This checks that the instance is in the cluster.
7665 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7666 instance = self.cfg.GetInstanceInfo(instance_name)
7667 assert instance is not None
7668 self.instance = instance
7669 cluster = self.cfg.GetClusterInfo()
7671 if (not self.cleanup and
7672 not instance.admin_state == constants.ADMINST_UP and
7673 not self.failover and self.fallback):
7674 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7675 " switching to failover")
7676 self.failover = True
7678 if instance.disk_template not in constants.DTS_MIRRORED:
7683 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7684 " %s" % (instance.disk_template, text),
7687 if instance.disk_template in constants.DTS_EXT_MIRROR:
7688 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7690 if self.lu.op.iallocator:
7691 self._RunAllocator()
7693 # We set set self.target_node as it is required by
7695 self.target_node = self.lu.op.target_node
7697 # Check that the target node is correct in terms of instance policy
7698 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7699 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7700 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7701 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7702 ignore=self.ignore_ipolicy)
7704 # self.target_node is already populated, either directly or by the
7706 target_node = self.target_node
7707 if self.target_node == instance.primary_node:
7708 raise errors.OpPrereqError("Cannot migrate instance %s"
7709 " to its primary (%s)" %
7710 (instance.name, instance.primary_node))
7712 if len(self.lu.tasklets) == 1:
7713 # It is safe to release locks only when we're the only tasklet
7715 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7716 keep=[instance.primary_node, self.target_node])
7719 secondary_nodes = instance.secondary_nodes
7720 if not secondary_nodes:
7721 raise errors.ConfigurationError("No secondary node but using"
7722 " %s disk template" %
7723 instance.disk_template)
7724 target_node = secondary_nodes[0]
7725 if self.lu.op.iallocator or (self.lu.op.target_node and
7726 self.lu.op.target_node != target_node):
7728 text = "failed over"
7731 raise errors.OpPrereqError("Instances with disk template %s cannot"
7732 " be %s to arbitrary nodes"
7733 " (neither an iallocator nor a target"
7734 " node can be passed)" %
7735 (instance.disk_template, text),
7737 nodeinfo = self.cfg.GetNodeInfo(target_node)
7738 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
7739 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
7740 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7741 ignore=self.ignore_ipolicy)
7743 i_be = cluster.FillBE(instance)
7745 # check memory requirements on the secondary node
7746 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7747 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7748 instance.name, i_be[constants.BE_MAXMEM],
7749 instance.hypervisor)
7751 self.lu.LogInfo("Not checking memory on the secondary node as"
7752 " instance will not be started")
7754 # check if failover must be forced instead of migration
7755 if (not self.cleanup and not self.failover and
7756 i_be[constants.BE_ALWAYS_FAILOVER]):
7758 self.lu.LogInfo("Instance configured to always failover; fallback"
7760 self.failover = True
7762 raise errors.OpPrereqError("This instance has been configured to"
7763 " always failover, please allow failover",
7766 # check bridge existance
7767 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7769 if not self.cleanup:
7770 _CheckNodeNotDrained(self.lu, target_node)
7771 if not self.failover:
7772 result = self.rpc.call_instance_migratable(instance.primary_node,
7774 if result.fail_msg and self.fallback:
7775 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7777 self.failover = True
7779 result.Raise("Can't migrate, please use failover",
7780 prereq=True, ecode=errors.ECODE_STATE)
7782 assert not (self.failover and self.cleanup)
7784 if not self.failover:
7785 if self.lu.op.live is not None and self.lu.op.mode is not None:
7786 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7787 " parameters are accepted",
7789 if self.lu.op.live is not None:
7791 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7793 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7794 # reset the 'live' parameter to None so that repeated
7795 # invocations of CheckPrereq do not raise an exception
7796 self.lu.op.live = None
7797 elif self.lu.op.mode is None:
7798 # read the default value from the hypervisor
7799 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7800 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7802 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7804 # Failover is never live
7807 def _RunAllocator(self):
7808 """Run the allocator based on input opcode.
7811 # FIXME: add a self.ignore_ipolicy option
7812 ial = IAllocator(self.cfg, self.rpc,
7813 mode=constants.IALLOCATOR_MODE_RELOC,
7814 name=self.instance_name,
7815 # TODO See why hail breaks with a single node below
7816 relocate_from=[self.instance.primary_node,
7817 self.instance.primary_node],
7820 ial.Run(self.lu.op.iallocator)
7823 raise errors.OpPrereqError("Can't compute nodes using"
7824 " iallocator '%s': %s" %
7825 (self.lu.op.iallocator, ial.info),
7827 if len(ial.result) != ial.required_nodes:
7828 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7829 " of nodes (%s), required %s" %
7830 (self.lu.op.iallocator, len(ial.result),
7831 ial.required_nodes), errors.ECODE_FAULT)
7832 self.target_node = ial.result[0]
7833 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7834 self.instance_name, self.lu.op.iallocator,
7835 utils.CommaJoin(ial.result))
7837 def _WaitUntilSync(self):
7838 """Poll with custom rpc for disk sync.
7840 This uses our own step-based rpc call.
7843 self.feedback_fn("* wait until resync is done")
7847 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7849 self.instance.disks)
7851 for node, nres in result.items():
7852 nres.Raise("Cannot resync disks on node %s" % node)
7853 node_done, node_percent = nres.payload
7854 all_done = all_done and node_done
7855 if node_percent is not None:
7856 min_percent = min(min_percent, node_percent)
7858 if min_percent < 100:
7859 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7862 def _EnsureSecondary(self, node):
7863 """Demote a node to secondary.
7866 self.feedback_fn("* switching node %s to secondary mode" % node)
7868 for dev in self.instance.disks:
7869 self.cfg.SetDiskID(dev, node)
7871 result = self.rpc.call_blockdev_close(node, self.instance.name,
7872 self.instance.disks)
7873 result.Raise("Cannot change disk to secondary on node %s" % node)
7875 def _GoStandalone(self):
7876 """Disconnect from the network.
7879 self.feedback_fn("* changing into standalone mode")
7880 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7881 self.instance.disks)
7882 for node, nres in result.items():
7883 nres.Raise("Cannot disconnect disks node %s" % node)
7885 def _GoReconnect(self, multimaster):
7886 """Reconnect to the network.
7892 msg = "single-master"
7893 self.feedback_fn("* changing disks into %s mode" % msg)
7894 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7895 self.instance.disks,
7896 self.instance.name, multimaster)
7897 for node, nres in result.items():
7898 nres.Raise("Cannot change disks config on node %s" % node)
7900 def _ExecCleanup(self):
7901 """Try to cleanup after a failed migration.
7903 The cleanup is done by:
7904 - check that the instance is running only on one node
7905 (and update the config if needed)
7906 - change disks on its secondary node to secondary
7907 - wait until disks are fully synchronized
7908 - disconnect from the network
7909 - change disks into single-master mode
7910 - wait again until disks are fully synchronized
7913 instance = self.instance
7914 target_node = self.target_node
7915 source_node = self.source_node
7917 # check running on only one node
7918 self.feedback_fn("* checking where the instance actually runs"
7919 " (if this hangs, the hypervisor might be in"
7921 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7922 for node, result in ins_l.items():
7923 result.Raise("Can't contact node %s" % node)
7925 runningon_source = instance.name in ins_l[source_node].payload
7926 runningon_target = instance.name in ins_l[target_node].payload
7928 if runningon_source and runningon_target:
7929 raise errors.OpExecError("Instance seems to be running on two nodes,"
7930 " or the hypervisor is confused; you will have"
7931 " to ensure manually that it runs only on one"
7932 " and restart this operation")
7934 if not (runningon_source or runningon_target):
7935 raise errors.OpExecError("Instance does not seem to be running at all;"
7936 " in this case it's safer to repair by"
7937 " running 'gnt-instance stop' to ensure disk"
7938 " shutdown, and then restarting it")
7940 if runningon_target:
7941 # the migration has actually succeeded, we need to update the config
7942 self.feedback_fn("* instance running on secondary node (%s),"
7943 " updating config" % target_node)
7944 instance.primary_node = target_node
7945 self.cfg.Update(instance, self.feedback_fn)
7946 demoted_node = source_node
7948 self.feedback_fn("* instance confirmed to be running on its"
7949 " primary node (%s)" % source_node)
7950 demoted_node = target_node
7952 if instance.disk_template in constants.DTS_INT_MIRROR:
7953 self._EnsureSecondary(demoted_node)
7955 self._WaitUntilSync()
7956 except errors.OpExecError:
7957 # we ignore here errors, since if the device is standalone, it
7958 # won't be able to sync
7960 self._GoStandalone()
7961 self._GoReconnect(False)
7962 self._WaitUntilSync()
7964 self.feedback_fn("* done")
7966 def _RevertDiskStatus(self):
7967 """Try to revert the disk status after a failed migration.
7970 target_node = self.target_node
7971 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7975 self._EnsureSecondary(target_node)
7976 self._GoStandalone()
7977 self._GoReconnect(False)
7978 self._WaitUntilSync()
7979 except errors.OpExecError, err:
7980 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7981 " please try to recover the instance manually;"
7982 " error '%s'" % str(err))
7984 def _AbortMigration(self):
7985 """Call the hypervisor code to abort a started migration.
7988 instance = self.instance
7989 target_node = self.target_node
7990 source_node = self.source_node
7991 migration_info = self.migration_info
7993 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7997 abort_msg = abort_result.fail_msg
7999 logging.error("Aborting migration failed on target node %s: %s",
8000 target_node, abort_msg)
8001 # Don't raise an exception here, as we stil have to try to revert the
8002 # disk status, even if this step failed.
8004 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8005 instance, False, self.live)
8006 abort_msg = abort_result.fail_msg
8008 logging.error("Aborting migration failed on source node %s: %s",
8009 source_node, abort_msg)
8011 def _ExecMigration(self):
8012 """Migrate an instance.
8014 The migrate is done by:
8015 - change the disks into dual-master mode
8016 - wait until disks are fully synchronized again
8017 - migrate the instance
8018 - change disks on the new secondary node (the old primary) to secondary
8019 - wait until disks are fully synchronized
8020 - change disks into single-master mode
8023 instance = self.instance
8024 target_node = self.target_node
8025 source_node = self.source_node
8027 # Check for hypervisor version mismatch and warn the user.
8028 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8029 None, [self.instance.hypervisor])
8030 for ninfo in nodeinfo.values():
8031 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8033 (_, _, (src_info, )) = nodeinfo[source_node].payload
8034 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8036 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8037 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8038 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8039 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8040 if src_version != dst_version:
8041 self.feedback_fn("* warning: hypervisor version mismatch between"
8042 " source (%s) and target (%s) node" %
8043 (src_version, dst_version))
8045 self.feedback_fn("* checking disk consistency between source and target")
8046 for dev in instance.disks:
8047 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8048 raise errors.OpExecError("Disk %s is degraded or not fully"
8049 " synchronized on target node,"
8050 " aborting migration" % dev.iv_name)
8052 # First get the migration information from the remote node
8053 result = self.rpc.call_migration_info(source_node, instance)
8054 msg = result.fail_msg
8056 log_err = ("Failed fetching source migration information from %s: %s" %
8058 logging.error(log_err)
8059 raise errors.OpExecError(log_err)
8061 self.migration_info = migration_info = result.payload
8063 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8064 # Then switch the disks to master/master mode
8065 self._EnsureSecondary(target_node)
8066 self._GoStandalone()
8067 self._GoReconnect(True)
8068 self._WaitUntilSync()
8070 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8071 result = self.rpc.call_accept_instance(target_node,
8074 self.nodes_ip[target_node])
8076 msg = result.fail_msg
8078 logging.error("Instance pre-migration failed, trying to revert"
8079 " disk status: %s", msg)
8080 self.feedback_fn("Pre-migration failed, aborting")
8081 self._AbortMigration()
8082 self._RevertDiskStatus()
8083 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8084 (instance.name, msg))
8086 self.feedback_fn("* migrating instance to %s" % target_node)
8087 result = self.rpc.call_instance_migrate(source_node, instance,
8088 self.nodes_ip[target_node],
8090 msg = result.fail_msg
8092 logging.error("Instance migration failed, trying to revert"
8093 " disk status: %s", msg)
8094 self.feedback_fn("Migration failed, aborting")
8095 self._AbortMigration()
8096 self._RevertDiskStatus()
8097 raise errors.OpExecError("Could not migrate instance %s: %s" %
8098 (instance.name, msg))
8100 self.feedback_fn("* starting memory transfer")
8101 last_feedback = time.time()
8103 result = self.rpc.call_instance_get_migration_status(source_node,
8105 msg = result.fail_msg
8106 ms = result.payload # MigrationStatus instance
8107 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8108 logging.error("Instance migration failed, trying to revert"
8109 " disk status: %s", msg)
8110 self.feedback_fn("Migration failed, aborting")
8111 self._AbortMigration()
8112 self._RevertDiskStatus()
8113 raise errors.OpExecError("Could not migrate instance %s: %s" %
8114 (instance.name, msg))
8116 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8117 self.feedback_fn("* memory transfer complete")
8120 if (utils.TimeoutExpired(last_feedback,
8121 self._MIGRATION_FEEDBACK_INTERVAL) and
8122 ms.transferred_ram is not None):
8123 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8124 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8125 last_feedback = time.time()
8127 time.sleep(self._MIGRATION_POLL_INTERVAL)
8129 result = self.rpc.call_instance_finalize_migration_src(source_node,
8133 msg = result.fail_msg
8135 logging.error("Instance migration succeeded, but finalization failed"
8136 " on the source node: %s", msg)
8137 raise errors.OpExecError("Could not finalize instance migration: %s" %
8140 instance.primary_node = target_node
8142 # distribute new instance config to the other nodes
8143 self.cfg.Update(instance, self.feedback_fn)
8145 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8149 msg = result.fail_msg
8151 logging.error("Instance migration succeeded, but finalization failed"
8152 " on the target node: %s", msg)
8153 raise errors.OpExecError("Could not finalize instance migration: %s" %
8156 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8157 self._EnsureSecondary(source_node)
8158 self._WaitUntilSync()
8159 self._GoStandalone()
8160 self._GoReconnect(False)
8161 self._WaitUntilSync()
8163 self.feedback_fn("* done")
8165 def _ExecFailover(self):
8166 """Failover an instance.
8168 The failover is done by shutting it down on its present node and
8169 starting it on the secondary.
8172 instance = self.instance
8173 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8175 source_node = instance.primary_node
8176 target_node = self.target_node
8178 if instance.admin_state == constants.ADMINST_UP:
8179 self.feedback_fn("* checking disk consistency between source and target")
8180 for dev in instance.disks:
8181 # for drbd, these are drbd over lvm
8182 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8183 if primary_node.offline:
8184 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8186 (primary_node.name, dev.iv_name, target_node))
8187 elif not self.ignore_consistency:
8188 raise errors.OpExecError("Disk %s is degraded on target node,"
8189 " aborting failover" % dev.iv_name)
8191 self.feedback_fn("* not checking disk consistency as instance is not"
8194 self.feedback_fn("* shutting down instance on source node")
8195 logging.info("Shutting down instance %s on node %s",
8196 instance.name, source_node)
8198 result = self.rpc.call_instance_shutdown(source_node, instance,
8199 self.shutdown_timeout)
8200 msg = result.fail_msg
8202 if self.ignore_consistency or primary_node.offline:
8203 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8204 " proceeding anyway; please make sure node"
8205 " %s is down; error details: %s",
8206 instance.name, source_node, source_node, msg)
8208 raise errors.OpExecError("Could not shutdown instance %s on"
8210 (instance.name, source_node, msg))
8212 self.feedback_fn("* deactivating the instance's disks on source node")
8213 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8214 raise errors.OpExecError("Can't shut down the instance's disks")
8216 instance.primary_node = target_node
8217 # distribute new instance config to the other nodes
8218 self.cfg.Update(instance, self.feedback_fn)
8220 # Only start the instance if it's marked as up
8221 if instance.admin_state == constants.ADMINST_UP:
8222 self.feedback_fn("* activating the instance's disks on target node %s" %
8224 logging.info("Starting instance %s on node %s",
8225 instance.name, target_node)
8227 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8228 ignore_secondaries=True)
8230 _ShutdownInstanceDisks(self.lu, instance)
8231 raise errors.OpExecError("Can't activate the instance's disks")
8233 self.feedback_fn("* starting the instance on the target node %s" %
8235 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8237 msg = result.fail_msg
8239 _ShutdownInstanceDisks(self.lu, instance)
8240 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8241 (instance.name, target_node, msg))
8243 def Exec(self, feedback_fn):
8244 """Perform the migration.
8247 self.feedback_fn = feedback_fn
8248 self.source_node = self.instance.primary_node
8250 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8251 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8252 self.target_node = self.instance.secondary_nodes[0]
8253 # Otherwise self.target_node has been populated either
8254 # directly, or through an iallocator.
8256 self.all_nodes = [self.source_node, self.target_node]
8257 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8258 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8261 feedback_fn("Failover instance %s" % self.instance.name)
8262 self._ExecFailover()
8264 feedback_fn("Migrating instance %s" % self.instance.name)
8267 return self._ExecCleanup()
8269 return self._ExecMigration()
8272 def _CreateBlockDev(lu, node, instance, device, force_create,
8274 """Create a tree of block devices on a given node.
8276 If this device type has to be created on secondaries, create it and
8279 If not, just recurse to children keeping the same 'force' value.
8281 @param lu: the lu on whose behalf we execute
8282 @param node: the node on which to create the device
8283 @type instance: L{objects.Instance}
8284 @param instance: the instance which owns the device
8285 @type device: L{objects.Disk}
8286 @param device: the device to create
8287 @type force_create: boolean
8288 @param force_create: whether to force creation of this device; this
8289 will be change to True whenever we find a device which has
8290 CreateOnSecondary() attribute
8291 @param info: the extra 'metadata' we should attach to the device
8292 (this will be represented as a LVM tag)
8293 @type force_open: boolean
8294 @param force_open: this parameter will be passes to the
8295 L{backend.BlockdevCreate} function where it specifies
8296 whether we run on primary or not, and it affects both
8297 the child assembly and the device own Open() execution
8300 if device.CreateOnSecondary():
8304 for child in device.children:
8305 _CreateBlockDev(lu, node, instance, child, force_create,
8308 if not force_create:
8311 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8314 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8315 """Create a single block device on a given node.
8317 This will not recurse over children of the device, so they must be
8320 @param lu: the lu on whose behalf we execute
8321 @param node: the node on which to create the device
8322 @type instance: L{objects.Instance}
8323 @param instance: the instance which owns the device
8324 @type device: L{objects.Disk}
8325 @param device: the device to create
8326 @param info: the extra 'metadata' we should attach to the device
8327 (this will be represented as a LVM tag)
8328 @type force_open: boolean
8329 @param force_open: this parameter will be passes to the
8330 L{backend.BlockdevCreate} function where it specifies
8331 whether we run on primary or not, and it affects both
8332 the child assembly and the device own Open() execution
8335 lu.cfg.SetDiskID(device, node)
8336 result = lu.rpc.call_blockdev_create(node, device, device.size,
8337 instance.name, force_open, info)
8338 result.Raise("Can't create block device %s on"
8339 " node %s for instance %s" % (device, node, instance.name))
8340 if device.physical_id is None:
8341 device.physical_id = result.payload
8344 def _GenerateUniqueNames(lu, exts):
8345 """Generate a suitable LV name.
8347 This will generate a logical volume name for the given instance.
8352 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8353 results.append("%s%s" % (new_id, val))
8357 def _ComputeLDParams(disk_template, disk_params):
8358 """Computes Logical Disk parameters from Disk Template parameters.
8360 @type disk_template: string
8361 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8362 @type disk_params: dict
8363 @param disk_params: disk template parameters; dict(template_name -> parameters
8365 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8366 contains the LD parameters of the node. The tree is flattened in-order.
8369 if disk_template not in constants.DISK_TEMPLATES:
8370 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8373 dt_params = disk_params[disk_template]
8374 if disk_template == constants.DT_DRBD8:
8376 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8377 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8378 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8379 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8380 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8381 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8382 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8383 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8384 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8385 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8386 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8387 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8391 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8394 result.append(drbd_params)
8398 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8401 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8403 result.append(data_params)
8407 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8410 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8412 result.append(meta_params)
8414 elif (disk_template == constants.DT_FILE or
8415 disk_template == constants.DT_SHARED_FILE):
8416 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8418 elif disk_template == constants.DT_PLAIN:
8420 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8423 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8425 result.append(params)
8427 elif disk_template == constants.DT_BLOCK:
8428 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8433 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8434 iv_name, p_minor, s_minor, drbd_params, data_params,
8436 """Generate a drbd8 device complete with its children.
8439 assert len(vgnames) == len(names) == 2
8440 port = lu.cfg.AllocatePort()
8441 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8443 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8444 logical_id=(vgnames[0], names[0]),
8446 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8447 logical_id=(vgnames[1], names[1]),
8449 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8450 logical_id=(primary, secondary, port,
8453 children=[dev_data, dev_meta],
8454 iv_name=iv_name, params=drbd_params)
8458 def _GenerateDiskTemplate(lu, template_name,
8459 instance_name, primary_node,
8460 secondary_nodes, disk_info,
8461 file_storage_dir, file_driver,
8462 base_index, feedback_fn, disk_params):
8463 """Generate the entire disk layout for a given template type.
8466 #TODO: compute space requirements
8468 vgname = lu.cfg.GetVGName()
8469 disk_count = len(disk_info)
8471 ld_params = _ComputeLDParams(template_name, disk_params)
8472 if template_name == constants.DT_DISKLESS:
8474 elif template_name == constants.DT_PLAIN:
8475 if len(secondary_nodes) != 0:
8476 raise errors.ProgrammerError("Wrong template configuration")
8478 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8479 for i in range(disk_count)])
8480 for idx, disk in enumerate(disk_info):
8481 disk_index = idx + base_index
8482 vg = disk.get(constants.IDISK_VG, vgname)
8483 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8484 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8485 size=disk[constants.IDISK_SIZE],
8486 logical_id=(vg, names[idx]),
8487 iv_name="disk/%d" % disk_index,
8488 mode=disk[constants.IDISK_MODE],
8489 params=ld_params[0])
8490 disks.append(disk_dev)
8491 elif template_name == constants.DT_DRBD8:
8492 drbd_params, data_params, meta_params = ld_params
8493 if len(secondary_nodes) != 1:
8494 raise errors.ProgrammerError("Wrong template configuration")
8495 remote_node = secondary_nodes[0]
8496 minors = lu.cfg.AllocateDRBDMinor(
8497 [primary_node, remote_node] * len(disk_info), instance_name)
8500 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8501 for i in range(disk_count)]):
8502 names.append(lv_prefix + "_data")
8503 names.append(lv_prefix + "_meta")
8504 for idx, disk in enumerate(disk_info):
8505 disk_index = idx + base_index
8506 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8507 data_vg = disk.get(constants.IDISK_VG, vgname)
8508 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8509 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8510 disk[constants.IDISK_SIZE],
8512 names[idx * 2:idx * 2 + 2],
8513 "disk/%d" % disk_index,
8514 minors[idx * 2], minors[idx * 2 + 1],
8515 drbd_params, data_params, meta_params)
8516 disk_dev.mode = disk[constants.IDISK_MODE]
8517 disks.append(disk_dev)
8518 elif template_name == constants.DT_FILE:
8519 if len(secondary_nodes) != 0:
8520 raise errors.ProgrammerError("Wrong template configuration")
8522 opcodes.RequireFileStorage()
8524 for idx, disk in enumerate(disk_info):
8525 disk_index = idx + base_index
8526 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8527 size=disk[constants.IDISK_SIZE],
8528 iv_name="disk/%d" % disk_index,
8529 logical_id=(file_driver,
8530 "%s/disk%d" % (file_storage_dir,
8532 mode=disk[constants.IDISK_MODE],
8533 params=ld_params[0])
8534 disks.append(disk_dev)
8535 elif template_name == constants.DT_SHARED_FILE:
8536 if len(secondary_nodes) != 0:
8537 raise errors.ProgrammerError("Wrong template configuration")
8539 opcodes.RequireSharedFileStorage()
8541 for idx, disk in enumerate(disk_info):
8542 disk_index = idx + base_index
8543 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8544 size=disk[constants.IDISK_SIZE],
8545 iv_name="disk/%d" % disk_index,
8546 logical_id=(file_driver,
8547 "%s/disk%d" % (file_storage_dir,
8549 mode=disk[constants.IDISK_MODE],
8550 params=ld_params[0])
8551 disks.append(disk_dev)
8552 elif template_name == constants.DT_BLOCK:
8553 if len(secondary_nodes) != 0:
8554 raise errors.ProgrammerError("Wrong template configuration")
8556 for idx, disk in enumerate(disk_info):
8557 disk_index = idx + base_index
8558 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8559 size=disk[constants.IDISK_SIZE],
8560 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8561 disk[constants.IDISK_ADOPT]),
8562 iv_name="disk/%d" % disk_index,
8563 mode=disk[constants.IDISK_MODE],
8564 params=ld_params[0])
8565 disks.append(disk_dev)
8568 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8572 def _GetInstanceInfoText(instance):
8573 """Compute that text that should be added to the disk's metadata.
8576 return "originstname+%s" % instance.name
8579 def _CalcEta(time_taken, written, total_size):
8580 """Calculates the ETA based on size written and total size.
8582 @param time_taken: The time taken so far
8583 @param written: amount written so far
8584 @param total_size: The total size of data to be written
8585 @return: The remaining time in seconds
8588 avg_time = time_taken / float(written)
8589 return (total_size - written) * avg_time
8592 def _WipeDisks(lu, instance):
8593 """Wipes instance disks.
8595 @type lu: L{LogicalUnit}
8596 @param lu: the logical unit on whose behalf we execute
8597 @type instance: L{objects.Instance}
8598 @param instance: the instance whose disks we should create
8599 @return: the success of the wipe
8602 node = instance.primary_node
8604 for device in instance.disks:
8605 lu.cfg.SetDiskID(device, node)
8607 logging.info("Pause sync of instance %s disks", instance.name)
8608 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8610 for idx, success in enumerate(result.payload):
8612 logging.warn("pause-sync of instance %s for disks %d failed",
8616 for idx, device in enumerate(instance.disks):
8617 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8618 # MAX_WIPE_CHUNK at max
8619 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8620 constants.MIN_WIPE_CHUNK_PERCENT)
8621 # we _must_ make this an int, otherwise rounding errors will
8623 wipe_chunk_size = int(wipe_chunk_size)
8625 lu.LogInfo("* Wiping disk %d", idx)
8626 logging.info("Wiping disk %d for instance %s, node %s using"
8627 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8632 start_time = time.time()
8634 while offset < size:
8635 wipe_size = min(wipe_chunk_size, size - offset)
8636 logging.debug("Wiping disk %d, offset %s, chunk %s",
8637 idx, offset, wipe_size)
8638 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8639 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8640 (idx, offset, wipe_size))
8643 if now - last_output >= 60:
8644 eta = _CalcEta(now - start_time, offset, size)
8645 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8646 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8649 logging.info("Resume sync of instance %s disks", instance.name)
8651 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8653 for idx, success in enumerate(result.payload):
8655 lu.LogWarning("Resume sync of disk %d failed, please have a"
8656 " look at the status and troubleshoot the issue", idx)
8657 logging.warn("resume-sync of instance %s for disks %d failed",
8661 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8662 """Create all disks for an instance.
8664 This abstracts away some work from AddInstance.
8666 @type lu: L{LogicalUnit}
8667 @param lu: the logical unit on whose behalf we execute
8668 @type instance: L{objects.Instance}
8669 @param instance: the instance whose disks we should create
8671 @param to_skip: list of indices to skip
8672 @type target_node: string
8673 @param target_node: if passed, overrides the target node for creation
8675 @return: the success of the creation
8678 info = _GetInstanceInfoText(instance)
8679 if target_node is None:
8680 pnode = instance.primary_node
8681 all_nodes = instance.all_nodes
8686 if instance.disk_template in constants.DTS_FILEBASED:
8687 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8688 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8690 result.Raise("Failed to create directory '%s' on"
8691 " node %s" % (file_storage_dir, pnode))
8693 # Note: this needs to be kept in sync with adding of disks in
8694 # LUInstanceSetParams
8695 for idx, device in enumerate(instance.disks):
8696 if to_skip and idx in to_skip:
8698 logging.info("Creating volume %s for instance %s",
8699 device.iv_name, instance.name)
8701 for node in all_nodes:
8702 f_create = node == pnode
8703 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8706 def _RemoveDisks(lu, instance, target_node=None):
8707 """Remove all disks for an instance.
8709 This abstracts away some work from `AddInstance()` and
8710 `RemoveInstance()`. Note that in case some of the devices couldn't
8711 be removed, the removal will continue with the other ones (compare
8712 with `_CreateDisks()`).
8714 @type lu: L{LogicalUnit}
8715 @param lu: the logical unit on whose behalf we execute
8716 @type instance: L{objects.Instance}
8717 @param instance: the instance whose disks we should remove
8718 @type target_node: string
8719 @param target_node: used to override the node on which to remove the disks
8721 @return: the success of the removal
8724 logging.info("Removing block devices for instance %s", instance.name)
8727 for device in instance.disks:
8729 edata = [(target_node, device)]
8731 edata = device.ComputeNodeTree(instance.primary_node)
8732 for node, disk in edata:
8733 lu.cfg.SetDiskID(disk, node)
8734 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8736 lu.LogWarning("Could not remove block device %s on node %s,"
8737 " continuing anyway: %s", device.iv_name, node, msg)
8740 # if this is a DRBD disk, return its port to the pool
8741 if device.dev_type in constants.LDS_DRBD:
8742 tcp_port = device.logical_id[2]
8743 lu.cfg.AddTcpUdpPort(tcp_port)
8745 if instance.disk_template == constants.DT_FILE:
8746 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8750 tgt = instance.primary_node
8751 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8753 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8754 file_storage_dir, instance.primary_node, result.fail_msg)
8760 def _ComputeDiskSizePerVG(disk_template, disks):
8761 """Compute disk size requirements in the volume group
8764 def _compute(disks, payload):
8765 """Universal algorithm.
8770 vgs[disk[constants.IDISK_VG]] = \
8771 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8775 # Required free disk space as a function of disk and swap space
8777 constants.DT_DISKLESS: {},
8778 constants.DT_PLAIN: _compute(disks, 0),
8779 # 128 MB are added for drbd metadata for each disk
8780 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8781 constants.DT_FILE: {},
8782 constants.DT_SHARED_FILE: {},
8785 if disk_template not in req_size_dict:
8786 raise errors.ProgrammerError("Disk template '%s' size requirement"
8787 " is unknown" % disk_template)
8789 return req_size_dict[disk_template]
8792 def _ComputeDiskSize(disk_template, disks):
8793 """Compute disk size requirements in the volume group
8796 # Required free disk space as a function of disk and swap space
8798 constants.DT_DISKLESS: None,
8799 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8800 # 128 MB are added for drbd metadata for each disk
8802 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8803 constants.DT_FILE: None,
8804 constants.DT_SHARED_FILE: 0,
8805 constants.DT_BLOCK: 0,
8808 if disk_template not in req_size_dict:
8809 raise errors.ProgrammerError("Disk template '%s' size requirement"
8810 " is unknown" % disk_template)
8812 return req_size_dict[disk_template]
8815 def _FilterVmNodes(lu, nodenames):
8816 """Filters out non-vm_capable nodes from a list.
8818 @type lu: L{LogicalUnit}
8819 @param lu: the logical unit for which we check
8820 @type nodenames: list
8821 @param nodenames: the list of nodes on which we should check
8823 @return: the list of vm-capable nodes
8826 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8827 return [name for name in nodenames if name not in vm_nodes]
8830 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8831 """Hypervisor parameter validation.
8833 This function abstract the hypervisor parameter validation to be
8834 used in both instance create and instance modify.
8836 @type lu: L{LogicalUnit}
8837 @param lu: the logical unit for which we check
8838 @type nodenames: list
8839 @param nodenames: the list of nodes on which we should check
8840 @type hvname: string
8841 @param hvname: the name of the hypervisor we should use
8842 @type hvparams: dict
8843 @param hvparams: the parameters which we need to check
8844 @raise errors.OpPrereqError: if the parameters are not valid
8847 nodenames = _FilterVmNodes(lu, nodenames)
8849 cluster = lu.cfg.GetClusterInfo()
8850 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8852 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8853 for node in nodenames:
8857 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8860 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8861 """OS parameters validation.
8863 @type lu: L{LogicalUnit}
8864 @param lu: the logical unit for which we check
8865 @type required: boolean
8866 @param required: whether the validation should fail if the OS is not
8868 @type nodenames: list
8869 @param nodenames: the list of nodes on which we should check
8870 @type osname: string
8871 @param osname: the name of the hypervisor we should use
8872 @type osparams: dict
8873 @param osparams: the parameters which we need to check
8874 @raise errors.OpPrereqError: if the parameters are not valid
8877 nodenames = _FilterVmNodes(lu, nodenames)
8878 result = lu.rpc.call_os_validate(nodenames, required, osname,
8879 [constants.OS_VALIDATE_PARAMETERS],
8881 for node, nres in result.items():
8882 # we don't check for offline cases since this should be run only
8883 # against the master node and/or an instance's nodes
8884 nres.Raise("OS Parameters validation failed on node %s" % node)
8885 if not nres.payload:
8886 lu.LogInfo("OS %s not found on node %s, validation skipped",
8890 class LUInstanceCreate(LogicalUnit):
8891 """Create an instance.
8894 HPATH = "instance-add"
8895 HTYPE = constants.HTYPE_INSTANCE
8898 def CheckArguments(self):
8902 # do not require name_check to ease forward/backward compatibility
8904 if self.op.no_install and self.op.start:
8905 self.LogInfo("No-installation mode selected, disabling startup")
8906 self.op.start = False
8907 # validate/normalize the instance name
8908 self.op.instance_name = \
8909 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8911 if self.op.ip_check and not self.op.name_check:
8912 # TODO: make the ip check more flexible and not depend on the name check
8913 raise errors.OpPrereqError("Cannot do IP address check without a name"
8914 " check", errors.ECODE_INVAL)
8916 # check nics' parameter names
8917 for nic in self.op.nics:
8918 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8920 # check disks. parameter names and consistent adopt/no-adopt strategy
8921 has_adopt = has_no_adopt = False
8922 for disk in self.op.disks:
8923 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8924 if constants.IDISK_ADOPT in disk:
8928 if has_adopt and has_no_adopt:
8929 raise errors.OpPrereqError("Either all disks are adopted or none is",
8932 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8933 raise errors.OpPrereqError("Disk adoption is not supported for the"
8934 " '%s' disk template" %
8935 self.op.disk_template,
8937 if self.op.iallocator is not None:
8938 raise errors.OpPrereqError("Disk adoption not allowed with an"
8939 " iallocator script", errors.ECODE_INVAL)
8940 if self.op.mode == constants.INSTANCE_IMPORT:
8941 raise errors.OpPrereqError("Disk adoption not allowed for"
8942 " instance import", errors.ECODE_INVAL)
8944 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8945 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8946 " but no 'adopt' parameter given" %
8947 self.op.disk_template,
8950 self.adopt_disks = has_adopt
8952 # instance name verification
8953 if self.op.name_check:
8954 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8955 self.op.instance_name = self.hostname1.name
8956 # used in CheckPrereq for ip ping check
8957 self.check_ip = self.hostname1.ip
8959 self.check_ip = None
8961 # file storage checks
8962 if (self.op.file_driver and
8963 not self.op.file_driver in constants.FILE_DRIVER):
8964 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8965 self.op.file_driver, errors.ECODE_INVAL)
8967 if self.op.disk_template == constants.DT_FILE:
8968 opcodes.RequireFileStorage()
8969 elif self.op.disk_template == constants.DT_SHARED_FILE:
8970 opcodes.RequireSharedFileStorage()
8972 ### Node/iallocator related checks
8973 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8975 if self.op.pnode is not None:
8976 if self.op.disk_template in constants.DTS_INT_MIRROR:
8977 if self.op.snode is None:
8978 raise errors.OpPrereqError("The networked disk templates need"
8979 " a mirror node", errors.ECODE_INVAL)
8981 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8983 self.op.snode = None
8985 self._cds = _GetClusterDomainSecret()
8987 if self.op.mode == constants.INSTANCE_IMPORT:
8988 # On import force_variant must be True, because if we forced it at
8989 # initial install, our only chance when importing it back is that it
8991 self.op.force_variant = True
8993 if self.op.no_install:
8994 self.LogInfo("No-installation mode has no effect during import")
8996 elif self.op.mode == constants.INSTANCE_CREATE:
8997 if self.op.os_type is None:
8998 raise errors.OpPrereqError("No guest OS specified",
9000 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9001 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9002 " installation" % self.op.os_type,
9004 if self.op.disk_template is None:
9005 raise errors.OpPrereqError("No disk template specified",
9008 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9009 # Check handshake to ensure both clusters have the same domain secret
9010 src_handshake = self.op.source_handshake
9011 if not src_handshake:
9012 raise errors.OpPrereqError("Missing source handshake",
9015 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9018 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9021 # Load and check source CA
9022 self.source_x509_ca_pem = self.op.source_x509_ca
9023 if not self.source_x509_ca_pem:
9024 raise errors.OpPrereqError("Missing source X509 CA",
9028 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9030 except OpenSSL.crypto.Error, err:
9031 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9032 (err, ), errors.ECODE_INVAL)
9034 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9035 if errcode is not None:
9036 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9039 self.source_x509_ca = cert
9041 src_instance_name = self.op.source_instance_name
9042 if not src_instance_name:
9043 raise errors.OpPrereqError("Missing source instance name",
9046 self.source_instance_name = \
9047 netutils.GetHostname(name=src_instance_name).name
9050 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9051 self.op.mode, errors.ECODE_INVAL)
9053 def ExpandNames(self):
9054 """ExpandNames for CreateInstance.
9056 Figure out the right locks for instance creation.
9059 self.needed_locks = {}
9061 instance_name = self.op.instance_name
9062 # this is just a preventive check, but someone might still add this
9063 # instance in the meantime, and creation will fail at lock-add time
9064 if instance_name in self.cfg.GetInstanceList():
9065 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9066 instance_name, errors.ECODE_EXISTS)
9068 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9070 if self.op.iallocator:
9071 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9072 # specifying a group on instance creation and then selecting nodes from
9074 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9075 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9077 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9078 nodelist = [self.op.pnode]
9079 if self.op.snode is not None:
9080 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9081 nodelist.append(self.op.snode)
9082 self.needed_locks[locking.LEVEL_NODE] = nodelist
9083 # Lock resources of instance's primary and secondary nodes (copy to
9084 # prevent accidential modification)
9085 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9087 # in case of import lock the source node too
9088 if self.op.mode == constants.INSTANCE_IMPORT:
9089 src_node = self.op.src_node
9090 src_path = self.op.src_path
9092 if src_path is None:
9093 self.op.src_path = src_path = self.op.instance_name
9095 if src_node is None:
9096 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9097 self.op.src_node = None
9098 if os.path.isabs(src_path):
9099 raise errors.OpPrereqError("Importing an instance from a path"
9100 " requires a source node option",
9103 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9104 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9105 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9106 if not os.path.isabs(src_path):
9107 self.op.src_path = src_path = \
9108 utils.PathJoin(constants.EXPORT_DIR, src_path)
9110 def _RunAllocator(self):
9111 """Run the allocator based on input opcode.
9114 nics = [n.ToDict() for n in self.nics]
9115 ial = IAllocator(self.cfg, self.rpc,
9116 mode=constants.IALLOCATOR_MODE_ALLOC,
9117 name=self.op.instance_name,
9118 disk_template=self.op.disk_template,
9121 vcpus=self.be_full[constants.BE_VCPUS],
9122 memory=self.be_full[constants.BE_MAXMEM],
9125 hypervisor=self.op.hypervisor,
9128 ial.Run(self.op.iallocator)
9131 raise errors.OpPrereqError("Can't compute nodes using"
9132 " iallocator '%s': %s" %
9133 (self.op.iallocator, ial.info),
9135 if len(ial.result) != ial.required_nodes:
9136 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9137 " of nodes (%s), required %s" %
9138 (self.op.iallocator, len(ial.result),
9139 ial.required_nodes), errors.ECODE_FAULT)
9140 self.op.pnode = ial.result[0]
9141 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9142 self.op.instance_name, self.op.iallocator,
9143 utils.CommaJoin(ial.result))
9144 if ial.required_nodes == 2:
9145 self.op.snode = ial.result[1]
9147 def BuildHooksEnv(self):
9150 This runs on master, primary and secondary nodes of the instance.
9154 "ADD_MODE": self.op.mode,
9156 if self.op.mode == constants.INSTANCE_IMPORT:
9157 env["SRC_NODE"] = self.op.src_node
9158 env["SRC_PATH"] = self.op.src_path
9159 env["SRC_IMAGES"] = self.src_images
9161 env.update(_BuildInstanceHookEnv(
9162 name=self.op.instance_name,
9163 primary_node=self.op.pnode,
9164 secondary_nodes=self.secondaries,
9165 status=self.op.start,
9166 os_type=self.op.os_type,
9167 minmem=self.be_full[constants.BE_MINMEM],
9168 maxmem=self.be_full[constants.BE_MAXMEM],
9169 vcpus=self.be_full[constants.BE_VCPUS],
9170 nics=_NICListToTuple(self, self.nics),
9171 disk_template=self.op.disk_template,
9172 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9173 for d in self.disks],
9176 hypervisor_name=self.op.hypervisor,
9182 def BuildHooksNodes(self):
9183 """Build hooks nodes.
9186 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9189 def _ReadExportInfo(self):
9190 """Reads the export information from disk.
9192 It will override the opcode source node and path with the actual
9193 information, if these two were not specified before.
9195 @return: the export information
9198 assert self.op.mode == constants.INSTANCE_IMPORT
9200 src_node = self.op.src_node
9201 src_path = self.op.src_path
9203 if src_node is None:
9204 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9205 exp_list = self.rpc.call_export_list(locked_nodes)
9207 for node in exp_list:
9208 if exp_list[node].fail_msg:
9210 if src_path in exp_list[node].payload:
9212 self.op.src_node = src_node = node
9213 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9217 raise errors.OpPrereqError("No export found for relative path %s" %
9218 src_path, errors.ECODE_INVAL)
9220 _CheckNodeOnline(self, src_node)
9221 result = self.rpc.call_export_info(src_node, src_path)
9222 result.Raise("No export or invalid export found in dir %s" % src_path)
9224 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9225 if not export_info.has_section(constants.INISECT_EXP):
9226 raise errors.ProgrammerError("Corrupted export config",
9227 errors.ECODE_ENVIRON)
9229 ei_version = export_info.get(constants.INISECT_EXP, "version")
9230 if (int(ei_version) != constants.EXPORT_VERSION):
9231 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9232 (ei_version, constants.EXPORT_VERSION),
9233 errors.ECODE_ENVIRON)
9236 def _ReadExportParams(self, einfo):
9237 """Use export parameters as defaults.
9239 In case the opcode doesn't specify (as in override) some instance
9240 parameters, then try to use them from the export information, if
9244 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9246 if self.op.disk_template is None:
9247 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9248 self.op.disk_template = einfo.get(constants.INISECT_INS,
9250 if self.op.disk_template not in constants.DISK_TEMPLATES:
9251 raise errors.OpPrereqError("Disk template specified in configuration"
9252 " file is not one of the allowed values:"
9253 " %s" % " ".join(constants.DISK_TEMPLATES))
9255 raise errors.OpPrereqError("No disk template specified and the export"
9256 " is missing the disk_template information",
9259 if not self.op.disks:
9261 # TODO: import the disk iv_name too
9262 for idx in range(constants.MAX_DISKS):
9263 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9264 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9265 disks.append({constants.IDISK_SIZE: disk_sz})
9266 self.op.disks = disks
9267 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9268 raise errors.OpPrereqError("No disk info specified and the export"
9269 " is missing the disk information",
9272 if not self.op.nics:
9274 for idx in range(constants.MAX_NICS):
9275 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9277 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9278 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9285 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9286 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9288 if (self.op.hypervisor is None and
9289 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9290 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9292 if einfo.has_section(constants.INISECT_HYP):
9293 # use the export parameters but do not override the ones
9294 # specified by the user
9295 for name, value in einfo.items(constants.INISECT_HYP):
9296 if name not in self.op.hvparams:
9297 self.op.hvparams[name] = value
9299 if einfo.has_section(constants.INISECT_BEP):
9300 # use the parameters, without overriding
9301 for name, value in einfo.items(constants.INISECT_BEP):
9302 if name not in self.op.beparams:
9303 self.op.beparams[name] = value
9304 # Compatibility for the old "memory" be param
9305 if name == constants.BE_MEMORY:
9306 if constants.BE_MAXMEM not in self.op.beparams:
9307 self.op.beparams[constants.BE_MAXMEM] = value
9308 if constants.BE_MINMEM not in self.op.beparams:
9309 self.op.beparams[constants.BE_MINMEM] = value
9311 # try to read the parameters old style, from the main section
9312 for name in constants.BES_PARAMETERS:
9313 if (name not in self.op.beparams and
9314 einfo.has_option(constants.INISECT_INS, name)):
9315 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9317 if einfo.has_section(constants.INISECT_OSP):
9318 # use the parameters, without overriding
9319 for name, value in einfo.items(constants.INISECT_OSP):
9320 if name not in self.op.osparams:
9321 self.op.osparams[name] = value
9323 def _RevertToDefaults(self, cluster):
9324 """Revert the instance parameters to the default values.
9328 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9329 for name in self.op.hvparams.keys():
9330 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9331 del self.op.hvparams[name]
9333 be_defs = cluster.SimpleFillBE({})
9334 for name in self.op.beparams.keys():
9335 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9336 del self.op.beparams[name]
9338 nic_defs = cluster.SimpleFillNIC({})
9339 for nic in self.op.nics:
9340 for name in constants.NICS_PARAMETERS:
9341 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9344 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9345 for name in self.op.osparams.keys():
9346 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9347 del self.op.osparams[name]
9349 def _CalculateFileStorageDir(self):
9350 """Calculate final instance file storage dir.
9353 # file storage dir calculation/check
9354 self.instance_file_storage_dir = None
9355 if self.op.disk_template in constants.DTS_FILEBASED:
9356 # build the full file storage dir path
9359 if self.op.disk_template == constants.DT_SHARED_FILE:
9360 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9362 get_fsd_fn = self.cfg.GetFileStorageDir
9364 cfg_storagedir = get_fsd_fn()
9365 if not cfg_storagedir:
9366 raise errors.OpPrereqError("Cluster file storage dir not defined")
9367 joinargs.append(cfg_storagedir)
9369 if self.op.file_storage_dir is not None:
9370 joinargs.append(self.op.file_storage_dir)
9372 joinargs.append(self.op.instance_name)
9374 # pylint: disable=W0142
9375 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9377 def CheckPrereq(self): # pylint: disable=R0914
9378 """Check prerequisites.
9381 self._CalculateFileStorageDir()
9383 if self.op.mode == constants.INSTANCE_IMPORT:
9384 export_info = self._ReadExportInfo()
9385 self._ReadExportParams(export_info)
9387 if (not self.cfg.GetVGName() and
9388 self.op.disk_template not in constants.DTS_NOT_LVM):
9389 raise errors.OpPrereqError("Cluster does not support lvm-based"
9390 " instances", errors.ECODE_STATE)
9392 if (self.op.hypervisor is None or
9393 self.op.hypervisor == constants.VALUE_AUTO):
9394 self.op.hypervisor = self.cfg.GetHypervisorType()
9396 cluster = self.cfg.GetClusterInfo()
9397 enabled_hvs = cluster.enabled_hypervisors
9398 if self.op.hypervisor not in enabled_hvs:
9399 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9400 " cluster (%s)" % (self.op.hypervisor,
9401 ",".join(enabled_hvs)),
9404 # Check tag validity
9405 for tag in self.op.tags:
9406 objects.TaggableObject.ValidateTag(tag)
9408 # check hypervisor parameter syntax (locally)
9409 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9410 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9412 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9413 hv_type.CheckParameterSyntax(filled_hvp)
9414 self.hv_full = filled_hvp
9415 # check that we don't specify global parameters on an instance
9416 _CheckGlobalHvParams(self.op.hvparams)
9418 # fill and remember the beparams dict
9419 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9420 for param, value in self.op.beparams.iteritems():
9421 if value == constants.VALUE_AUTO:
9422 self.op.beparams[param] = default_beparams[param]
9423 objects.UpgradeBeParams(self.op.beparams)
9424 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9425 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9427 # build os parameters
9428 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9430 # now that hvp/bep are in final format, let's reset to defaults,
9432 if self.op.identify_defaults:
9433 self._RevertToDefaults(cluster)
9437 for idx, nic in enumerate(self.op.nics):
9438 nic_mode_req = nic.get(constants.INIC_MODE, None)
9439 nic_mode = nic_mode_req
9440 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9441 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9443 # in routed mode, for the first nic, the default ip is 'auto'
9444 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9445 default_ip_mode = constants.VALUE_AUTO
9447 default_ip_mode = constants.VALUE_NONE
9449 # ip validity checks
9450 ip = nic.get(constants.INIC_IP, default_ip_mode)
9451 if ip is None or ip.lower() == constants.VALUE_NONE:
9453 elif ip.lower() == constants.VALUE_AUTO:
9454 if not self.op.name_check:
9455 raise errors.OpPrereqError("IP address set to auto but name checks"
9456 " have been skipped",
9458 nic_ip = self.hostname1.ip
9460 if not netutils.IPAddress.IsValid(ip):
9461 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9465 # TODO: check the ip address for uniqueness
9466 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9467 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9470 # MAC address verification
9471 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9472 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9473 mac = utils.NormalizeAndValidateMac(mac)
9476 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9477 except errors.ReservationError:
9478 raise errors.OpPrereqError("MAC address %s already in use"
9479 " in cluster" % mac,
9480 errors.ECODE_NOTUNIQUE)
9482 # Build nic parameters
9483 link = nic.get(constants.INIC_LINK, None)
9484 if link == constants.VALUE_AUTO:
9485 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9488 nicparams[constants.NIC_MODE] = nic_mode
9490 nicparams[constants.NIC_LINK] = link
9492 check_params = cluster.SimpleFillNIC(nicparams)
9493 objects.NIC.CheckParameterSyntax(check_params)
9494 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9496 # disk checks/pre-build
9497 default_vg = self.cfg.GetVGName()
9499 for disk in self.op.disks:
9500 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9501 if mode not in constants.DISK_ACCESS_SET:
9502 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9503 mode, errors.ECODE_INVAL)
9504 size = disk.get(constants.IDISK_SIZE, None)
9506 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9509 except (TypeError, ValueError):
9510 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9513 data_vg = disk.get(constants.IDISK_VG, default_vg)
9515 constants.IDISK_SIZE: size,
9516 constants.IDISK_MODE: mode,
9517 constants.IDISK_VG: data_vg,
9519 if constants.IDISK_METAVG in disk:
9520 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9521 if constants.IDISK_ADOPT in disk:
9522 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9523 self.disks.append(new_disk)
9525 if self.op.mode == constants.INSTANCE_IMPORT:
9527 for idx in range(len(self.disks)):
9528 option = "disk%d_dump" % idx
9529 if export_info.has_option(constants.INISECT_INS, option):
9530 # FIXME: are the old os-es, disk sizes, etc. useful?
9531 export_name = export_info.get(constants.INISECT_INS, option)
9532 image = utils.PathJoin(self.op.src_path, export_name)
9533 disk_images.append(image)
9535 disk_images.append(False)
9537 self.src_images = disk_images
9539 old_name = export_info.get(constants.INISECT_INS, "name")
9540 if self.op.instance_name == old_name:
9541 for idx, nic in enumerate(self.nics):
9542 if nic.mac == constants.VALUE_AUTO:
9543 nic_mac_ini = "nic%d_mac" % idx
9544 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9546 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9548 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9549 if self.op.ip_check:
9550 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9551 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9552 (self.check_ip, self.op.instance_name),
9553 errors.ECODE_NOTUNIQUE)
9555 #### mac address generation
9556 # By generating here the mac address both the allocator and the hooks get
9557 # the real final mac address rather than the 'auto' or 'generate' value.
9558 # There is a race condition between the generation and the instance object
9559 # creation, which means that we know the mac is valid now, but we're not
9560 # sure it will be when we actually add the instance. If things go bad
9561 # adding the instance will abort because of a duplicate mac, and the
9562 # creation job will fail.
9563 for nic in self.nics:
9564 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9565 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9569 if self.op.iallocator is not None:
9570 self._RunAllocator()
9572 # Release all unneeded node locks
9573 _ReleaseLocks(self, locking.LEVEL_NODE,
9574 keep=filter(None, [self.op.pnode, self.op.snode,
9576 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
9577 keep=filter(None, [self.op.pnode, self.op.snode,
9580 #### node related checks
9582 # check primary node
9583 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9584 assert self.pnode is not None, \
9585 "Cannot retrieve locked node %s" % self.op.pnode
9587 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9588 pnode.name, errors.ECODE_STATE)
9590 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9591 pnode.name, errors.ECODE_STATE)
9592 if not pnode.vm_capable:
9593 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9594 " '%s'" % pnode.name, errors.ECODE_STATE)
9596 self.secondaries = []
9598 # mirror node verification
9599 if self.op.disk_template in constants.DTS_INT_MIRROR:
9600 if self.op.snode == pnode.name:
9601 raise errors.OpPrereqError("The secondary node cannot be the"
9602 " primary node", errors.ECODE_INVAL)
9603 _CheckNodeOnline(self, self.op.snode)
9604 _CheckNodeNotDrained(self, self.op.snode)
9605 _CheckNodeVmCapable(self, self.op.snode)
9606 self.secondaries.append(self.op.snode)
9608 snode = self.cfg.GetNodeInfo(self.op.snode)
9609 if pnode.group != snode.group:
9610 self.LogWarning("The primary and secondary nodes are in two"
9611 " different node groups; the disk parameters"
9612 " from the first disk's node group will be"
9615 nodenames = [pnode.name] + self.secondaries
9617 # Verify instance specs
9619 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9620 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9621 constants.ISPEC_DISK_COUNT: len(self.disks),
9622 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9623 constants.ISPEC_NIC_COUNT: len(self.nics),
9626 group_info = self.cfg.GetNodeGroup(pnode.group)
9627 ipolicy = _CalculateGroupIPolicy(cluster, group_info)
9628 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9629 if not self.op.ignore_ipolicy and res:
9630 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9631 " policy: %s") % (pnode.group,
9632 utils.CommaJoin(res)),
9635 # disk parameters (not customizable at instance or node level)
9636 # just use the primary node parameters, ignoring the secondary.
9637 self.diskparams = group_info.diskparams
9639 if not self.adopt_disks:
9640 # Check lv size requirements, if not adopting
9641 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9642 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9644 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9645 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9646 disk[constants.IDISK_ADOPT])
9647 for disk in self.disks])
9648 if len(all_lvs) != len(self.disks):
9649 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9651 for lv_name in all_lvs:
9653 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9654 # to ReserveLV uses the same syntax
9655 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9656 except errors.ReservationError:
9657 raise errors.OpPrereqError("LV named %s used by another instance" %
9658 lv_name, errors.ECODE_NOTUNIQUE)
9660 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9661 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9663 node_lvs = self.rpc.call_lv_list([pnode.name],
9664 vg_names.payload.keys())[pnode.name]
9665 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9666 node_lvs = node_lvs.payload
9668 delta = all_lvs.difference(node_lvs.keys())
9670 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9671 utils.CommaJoin(delta),
9673 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9675 raise errors.OpPrereqError("Online logical volumes found, cannot"
9676 " adopt: %s" % utils.CommaJoin(online_lvs),
9678 # update the size of disk based on what is found
9679 for dsk in self.disks:
9680 dsk[constants.IDISK_SIZE] = \
9681 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9682 dsk[constants.IDISK_ADOPT])][0]))
9684 elif self.op.disk_template == constants.DT_BLOCK:
9685 # Normalize and de-duplicate device paths
9686 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9687 for disk in self.disks])
9688 if len(all_disks) != len(self.disks):
9689 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9691 baddisks = [d for d in all_disks
9692 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9694 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9695 " cannot be adopted" %
9696 (", ".join(baddisks),
9697 constants.ADOPTABLE_BLOCKDEV_ROOT),
9700 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9701 list(all_disks))[pnode.name]
9702 node_disks.Raise("Cannot get block device information from node %s" %
9704 node_disks = node_disks.payload
9705 delta = all_disks.difference(node_disks.keys())
9707 raise errors.OpPrereqError("Missing block device(s): %s" %
9708 utils.CommaJoin(delta),
9710 for dsk in self.disks:
9711 dsk[constants.IDISK_SIZE] = \
9712 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9714 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9716 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9717 # check OS parameters (remotely)
9718 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9720 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9722 # memory check on primary node
9723 #TODO(dynmem): use MINMEM for checking
9725 _CheckNodeFreeMemory(self, self.pnode.name,
9726 "creating instance %s" % self.op.instance_name,
9727 self.be_full[constants.BE_MAXMEM],
9730 self.dry_run_result = list(nodenames)
9732 def Exec(self, feedback_fn):
9733 """Create and add the instance to the cluster.
9736 instance = self.op.instance_name
9737 pnode_name = self.pnode.name
9739 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9740 self.owned_locks(locking.LEVEL_NODE)), \
9741 "Node locks differ from node resource locks"
9743 ht_kind = self.op.hypervisor
9744 if ht_kind in constants.HTS_REQ_PORT:
9745 network_port = self.cfg.AllocatePort()
9749 disks = _GenerateDiskTemplate(self,
9750 self.op.disk_template,
9751 instance, pnode_name,
9754 self.instance_file_storage_dir,
9755 self.op.file_driver,
9760 iobj = objects.Instance(name=instance, os=self.op.os_type,
9761 primary_node=pnode_name,
9762 nics=self.nics, disks=disks,
9763 disk_template=self.op.disk_template,
9764 admin_state=constants.ADMINST_DOWN,
9765 network_port=network_port,
9766 beparams=self.op.beparams,
9767 hvparams=self.op.hvparams,
9768 hypervisor=self.op.hypervisor,
9769 osparams=self.op.osparams,
9773 for tag in self.op.tags:
9776 if self.adopt_disks:
9777 if self.op.disk_template == constants.DT_PLAIN:
9778 # rename LVs to the newly-generated names; we need to construct
9779 # 'fake' LV disks with the old data, plus the new unique_id
9780 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9782 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9783 rename_to.append(t_dsk.logical_id)
9784 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9785 self.cfg.SetDiskID(t_dsk, pnode_name)
9786 result = self.rpc.call_blockdev_rename(pnode_name,
9787 zip(tmp_disks, rename_to))
9788 result.Raise("Failed to rename adoped LVs")
9790 feedback_fn("* creating instance disks...")
9792 _CreateDisks(self, iobj)
9793 except errors.OpExecError:
9794 self.LogWarning("Device creation failed, reverting...")
9796 _RemoveDisks(self, iobj)
9798 self.cfg.ReleaseDRBDMinors(instance)
9801 feedback_fn("adding instance %s to cluster config" % instance)
9803 self.cfg.AddInstance(iobj, self.proc.GetECId())
9805 # Declare that we don't want to remove the instance lock anymore, as we've
9806 # added the instance to the config
9807 del self.remove_locks[locking.LEVEL_INSTANCE]
9809 if self.op.mode == constants.INSTANCE_IMPORT:
9810 # Release unused nodes
9811 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9814 _ReleaseLocks(self, locking.LEVEL_NODE)
9817 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9818 feedback_fn("* wiping instance disks...")
9820 _WipeDisks(self, iobj)
9821 except errors.OpExecError, err:
9822 logging.exception("Wiping disks failed")
9823 self.LogWarning("Wiping instance disks failed (%s)", err)
9827 # Something is already wrong with the disks, don't do anything else
9829 elif self.op.wait_for_sync:
9830 disk_abort = not _WaitForSync(self, iobj)
9831 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9832 # make sure the disks are not degraded (still sync-ing is ok)
9833 feedback_fn("* checking mirrors status")
9834 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9839 _RemoveDisks(self, iobj)
9840 self.cfg.RemoveInstance(iobj.name)
9841 # Make sure the instance lock gets removed
9842 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9843 raise errors.OpExecError("There are some degraded disks for"
9846 # Release all node resource locks
9847 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9849 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9850 if self.op.mode == constants.INSTANCE_CREATE:
9851 if not self.op.no_install:
9852 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9853 not self.op.wait_for_sync)
9855 feedback_fn("* pausing disk sync to install instance OS")
9856 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9858 for idx, success in enumerate(result.payload):
9860 logging.warn("pause-sync of instance %s for disk %d failed",
9863 feedback_fn("* running the instance OS create scripts...")
9864 # FIXME: pass debug option from opcode to backend
9866 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9867 self.op.debug_level)
9869 feedback_fn("* resuming disk sync")
9870 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9872 for idx, success in enumerate(result.payload):
9874 logging.warn("resume-sync of instance %s for disk %d failed",
9877 os_add_result.Raise("Could not add os for instance %s"
9878 " on node %s" % (instance, pnode_name))
9880 elif self.op.mode == constants.INSTANCE_IMPORT:
9881 feedback_fn("* running the instance OS import scripts...")
9885 for idx, image in enumerate(self.src_images):
9889 # FIXME: pass debug option from opcode to backend
9890 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9891 constants.IEIO_FILE, (image, ),
9892 constants.IEIO_SCRIPT,
9893 (iobj.disks[idx], idx),
9895 transfers.append(dt)
9898 masterd.instance.TransferInstanceData(self, feedback_fn,
9899 self.op.src_node, pnode_name,
9900 self.pnode.secondary_ip,
9902 if not compat.all(import_result):
9903 self.LogWarning("Some disks for instance %s on node %s were not"
9904 " imported successfully" % (instance, pnode_name))
9906 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9907 feedback_fn("* preparing remote import...")
9908 # The source cluster will stop the instance before attempting to make a
9909 # connection. In some cases stopping an instance can take a long time,
9910 # hence the shutdown timeout is added to the connection timeout.
9911 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9912 self.op.source_shutdown_timeout)
9913 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9915 assert iobj.primary_node == self.pnode.name
9917 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9918 self.source_x509_ca,
9919 self._cds, timeouts)
9920 if not compat.all(disk_results):
9921 # TODO: Should the instance still be started, even if some disks
9922 # failed to import (valid for local imports, too)?
9923 self.LogWarning("Some disks for instance %s on node %s were not"
9924 " imported successfully" % (instance, pnode_name))
9926 # Run rename script on newly imported instance
9927 assert iobj.name == instance
9928 feedback_fn("Running rename script for %s" % instance)
9929 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9930 self.source_instance_name,
9931 self.op.debug_level)
9933 self.LogWarning("Failed to run rename script for %s on node"
9934 " %s: %s" % (instance, pnode_name, result.fail_msg))
9937 # also checked in the prereq part
9938 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9941 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9944 iobj.admin_state = constants.ADMINST_UP
9945 self.cfg.Update(iobj, feedback_fn)
9946 logging.info("Starting instance %s on node %s", instance, pnode_name)
9947 feedback_fn("* starting instance...")
9948 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9950 result.Raise("Could not start instance")
9952 return list(iobj.all_nodes)
9955 class LUInstanceConsole(NoHooksLU):
9956 """Connect to an instance's console.
9958 This is somewhat special in that it returns the command line that
9959 you need to run on the master node in order to connect to the
9965 def ExpandNames(self):
9966 self.share_locks = _ShareAll()
9967 self._ExpandAndLockInstance()
9969 def CheckPrereq(self):
9970 """Check prerequisites.
9972 This checks that the instance is in the cluster.
9975 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9976 assert self.instance is not None, \
9977 "Cannot retrieve locked instance %s" % self.op.instance_name
9978 _CheckNodeOnline(self, self.instance.primary_node)
9980 def Exec(self, feedback_fn):
9981 """Connect to the console of an instance
9984 instance = self.instance
9985 node = instance.primary_node
9987 node_insts = self.rpc.call_instance_list([node],
9988 [instance.hypervisor])[node]
9989 node_insts.Raise("Can't get node information from %s" % node)
9991 if instance.name not in node_insts.payload:
9992 if instance.admin_state == constants.ADMINST_UP:
9993 state = constants.INSTST_ERRORDOWN
9994 elif instance.admin_state == constants.ADMINST_DOWN:
9995 state = constants.INSTST_ADMINDOWN
9997 state = constants.INSTST_ADMINOFFLINE
9998 raise errors.OpExecError("Instance %s is not running (state %s)" %
9999 (instance.name, state))
10001 logging.debug("Connecting to console of %s on %s", instance.name, node)
10003 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10006 def _GetInstanceConsole(cluster, instance):
10007 """Returns console information for an instance.
10009 @type cluster: L{objects.Cluster}
10010 @type instance: L{objects.Instance}
10014 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10015 # beparams and hvparams are passed separately, to avoid editing the
10016 # instance and then saving the defaults in the instance itself.
10017 hvparams = cluster.FillHV(instance)
10018 beparams = cluster.FillBE(instance)
10019 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10021 assert console.instance == instance.name
10022 assert console.Validate()
10024 return console.ToDict()
10027 class LUInstanceReplaceDisks(LogicalUnit):
10028 """Replace the disks of an instance.
10031 HPATH = "mirrors-replace"
10032 HTYPE = constants.HTYPE_INSTANCE
10035 def CheckArguments(self):
10036 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10037 self.op.iallocator)
10039 def ExpandNames(self):
10040 self._ExpandAndLockInstance()
10042 assert locking.LEVEL_NODE not in self.needed_locks
10043 assert locking.LEVEL_NODE_RES not in self.needed_locks
10044 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10046 assert self.op.iallocator is None or self.op.remote_node is None, \
10047 "Conflicting options"
10049 if self.op.remote_node is not None:
10050 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10052 # Warning: do not remove the locking of the new secondary here
10053 # unless DRBD8.AddChildren is changed to work in parallel;
10054 # currently it doesn't since parallel invocations of
10055 # FindUnusedMinor will conflict
10056 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10057 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10059 self.needed_locks[locking.LEVEL_NODE] = []
10060 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10062 if self.op.iallocator is not None:
10063 # iallocator will select a new node in the same group
10064 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10066 self.needed_locks[locking.LEVEL_NODE_RES] = []
10068 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10069 self.op.iallocator, self.op.remote_node,
10070 self.op.disks, False, self.op.early_release)
10072 self.tasklets = [self.replacer]
10074 def DeclareLocks(self, level):
10075 if level == locking.LEVEL_NODEGROUP:
10076 assert self.op.remote_node is None
10077 assert self.op.iallocator is not None
10078 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10080 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10081 # Lock all groups used by instance optimistically; this requires going
10082 # via the node before it's locked, requiring verification later on
10083 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10084 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10086 elif level == locking.LEVEL_NODE:
10087 if self.op.iallocator is not None:
10088 assert self.op.remote_node is None
10089 assert not self.needed_locks[locking.LEVEL_NODE]
10091 # Lock member nodes of all locked groups
10092 self.needed_locks[locking.LEVEL_NODE] = [node_name
10093 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10094 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10096 self._LockInstancesNodes()
10097 elif level == locking.LEVEL_NODE_RES:
10099 self.needed_locks[locking.LEVEL_NODE_RES] = \
10100 self.needed_locks[locking.LEVEL_NODE]
10102 def BuildHooksEnv(self):
10103 """Build hooks env.
10105 This runs on the master, the primary and all the secondaries.
10108 instance = self.replacer.instance
10110 "MODE": self.op.mode,
10111 "NEW_SECONDARY": self.op.remote_node,
10112 "OLD_SECONDARY": instance.secondary_nodes[0],
10114 env.update(_BuildInstanceHookEnvByObject(self, instance))
10117 def BuildHooksNodes(self):
10118 """Build hooks nodes.
10121 instance = self.replacer.instance
10123 self.cfg.GetMasterNode(),
10124 instance.primary_node,
10126 if self.op.remote_node is not None:
10127 nl.append(self.op.remote_node)
10130 def CheckPrereq(self):
10131 """Check prerequisites.
10134 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10135 self.op.iallocator is None)
10137 # Verify if node group locks are still correct
10138 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10140 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10142 return LogicalUnit.CheckPrereq(self)
10145 class TLReplaceDisks(Tasklet):
10146 """Replaces disks for an instance.
10148 Note: Locking is not within the scope of this class.
10151 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10152 disks, delay_iallocator, early_release):
10153 """Initializes this class.
10156 Tasklet.__init__(self, lu)
10159 self.instance_name = instance_name
10161 self.iallocator_name = iallocator_name
10162 self.remote_node = remote_node
10164 self.delay_iallocator = delay_iallocator
10165 self.early_release = early_release
10168 self.instance = None
10169 self.new_node = None
10170 self.target_node = None
10171 self.other_node = None
10172 self.remote_node_info = None
10173 self.node_secondary_ip = None
10176 def CheckArguments(mode, remote_node, iallocator):
10177 """Helper function for users of this class.
10180 # check for valid parameter combination
10181 if mode == constants.REPLACE_DISK_CHG:
10182 if remote_node is None and iallocator is None:
10183 raise errors.OpPrereqError("When changing the secondary either an"
10184 " iallocator script must be used or the"
10185 " new node given", errors.ECODE_INVAL)
10187 if remote_node is not None and iallocator is not None:
10188 raise errors.OpPrereqError("Give either the iallocator or the new"
10189 " secondary, not both", errors.ECODE_INVAL)
10191 elif remote_node is not None or iallocator is not None:
10192 # Not replacing the secondary
10193 raise errors.OpPrereqError("The iallocator and new node options can"
10194 " only be used when changing the"
10195 " secondary node", errors.ECODE_INVAL)
10198 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10199 """Compute a new secondary node using an IAllocator.
10202 ial = IAllocator(lu.cfg, lu.rpc,
10203 mode=constants.IALLOCATOR_MODE_RELOC,
10204 name=instance_name,
10205 relocate_from=list(relocate_from))
10207 ial.Run(iallocator_name)
10209 if not ial.success:
10210 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10211 " %s" % (iallocator_name, ial.info),
10212 errors.ECODE_NORES)
10214 if len(ial.result) != ial.required_nodes:
10215 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10216 " of nodes (%s), required %s" %
10218 len(ial.result), ial.required_nodes),
10219 errors.ECODE_FAULT)
10221 remote_node_name = ial.result[0]
10223 lu.LogInfo("Selected new secondary for instance '%s': %s",
10224 instance_name, remote_node_name)
10226 return remote_node_name
10228 def _FindFaultyDisks(self, node_name):
10229 """Wrapper for L{_FindFaultyInstanceDisks}.
10232 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10235 def _CheckDisksActivated(self, instance):
10236 """Checks if the instance disks are activated.
10238 @param instance: The instance to check disks
10239 @return: True if they are activated, False otherwise
10242 nodes = instance.all_nodes
10244 for idx, dev in enumerate(instance.disks):
10246 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10247 self.cfg.SetDiskID(dev, node)
10249 result = self.rpc.call_blockdev_find(node, dev)
10253 elif result.fail_msg or not result.payload:
10258 def CheckPrereq(self):
10259 """Check prerequisites.
10261 This checks that the instance is in the cluster.
10264 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10265 assert instance is not None, \
10266 "Cannot retrieve locked instance %s" % self.instance_name
10268 if instance.disk_template != constants.DT_DRBD8:
10269 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10270 " instances", errors.ECODE_INVAL)
10272 if len(instance.secondary_nodes) != 1:
10273 raise errors.OpPrereqError("The instance has a strange layout,"
10274 " expected one secondary but found %d" %
10275 len(instance.secondary_nodes),
10276 errors.ECODE_FAULT)
10278 if not self.delay_iallocator:
10279 self._CheckPrereq2()
10281 def _CheckPrereq2(self):
10282 """Check prerequisites, second part.
10284 This function should always be part of CheckPrereq. It was separated and is
10285 now called from Exec because during node evacuation iallocator was only
10286 called with an unmodified cluster model, not taking planned changes into
10290 instance = self.instance
10291 secondary_node = instance.secondary_nodes[0]
10293 if self.iallocator_name is None:
10294 remote_node = self.remote_node
10296 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10297 instance.name, instance.secondary_nodes)
10299 if remote_node is None:
10300 self.remote_node_info = None
10302 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10303 "Remote node '%s' is not locked" % remote_node
10305 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10306 assert self.remote_node_info is not None, \
10307 "Cannot retrieve locked node %s" % remote_node
10309 if remote_node == self.instance.primary_node:
10310 raise errors.OpPrereqError("The specified node is the primary node of"
10311 " the instance", errors.ECODE_INVAL)
10313 if remote_node == secondary_node:
10314 raise errors.OpPrereqError("The specified node is already the"
10315 " secondary node of the instance",
10316 errors.ECODE_INVAL)
10318 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10319 constants.REPLACE_DISK_CHG):
10320 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10321 errors.ECODE_INVAL)
10323 if self.mode == constants.REPLACE_DISK_AUTO:
10324 if not self._CheckDisksActivated(instance):
10325 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10326 " first" % self.instance_name,
10327 errors.ECODE_STATE)
10328 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10329 faulty_secondary = self._FindFaultyDisks(secondary_node)
10331 if faulty_primary and faulty_secondary:
10332 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10333 " one node and can not be repaired"
10334 " automatically" % self.instance_name,
10335 errors.ECODE_STATE)
10338 self.disks = faulty_primary
10339 self.target_node = instance.primary_node
10340 self.other_node = secondary_node
10341 check_nodes = [self.target_node, self.other_node]
10342 elif faulty_secondary:
10343 self.disks = faulty_secondary
10344 self.target_node = secondary_node
10345 self.other_node = instance.primary_node
10346 check_nodes = [self.target_node, self.other_node]
10352 # Non-automatic modes
10353 if self.mode == constants.REPLACE_DISK_PRI:
10354 self.target_node = instance.primary_node
10355 self.other_node = secondary_node
10356 check_nodes = [self.target_node, self.other_node]
10358 elif self.mode == constants.REPLACE_DISK_SEC:
10359 self.target_node = secondary_node
10360 self.other_node = instance.primary_node
10361 check_nodes = [self.target_node, self.other_node]
10363 elif self.mode == constants.REPLACE_DISK_CHG:
10364 self.new_node = remote_node
10365 self.other_node = instance.primary_node
10366 self.target_node = secondary_node
10367 check_nodes = [self.new_node, self.other_node]
10369 _CheckNodeNotDrained(self.lu, remote_node)
10370 _CheckNodeVmCapable(self.lu, remote_node)
10372 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10373 assert old_node_info is not None
10374 if old_node_info.offline and not self.early_release:
10375 # doesn't make sense to delay the release
10376 self.early_release = True
10377 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10378 " early-release mode", secondary_node)
10381 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10384 # If not specified all disks should be replaced
10386 self.disks = range(len(self.instance.disks))
10388 # TODO: compute disk parameters
10389 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10390 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10391 if primary_node_info.group != secondary_node_info.group:
10392 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10393 " different node groups; the disk parameters of the"
10394 " primary node's group will be applied.")
10396 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10398 for node in check_nodes:
10399 _CheckNodeOnline(self.lu, node)
10401 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10404 if node_name is not None)
10406 # Release unneeded node and node resource locks
10407 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10408 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10410 # Release any owned node group
10411 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10412 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10414 # Check whether disks are valid
10415 for disk_idx in self.disks:
10416 instance.FindDisk(disk_idx)
10418 # Get secondary node IP addresses
10419 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10420 in self.cfg.GetMultiNodeInfo(touched_nodes))
10422 def Exec(self, feedback_fn):
10423 """Execute disk replacement.
10425 This dispatches the disk replacement to the appropriate handler.
10428 if self.delay_iallocator:
10429 self._CheckPrereq2()
10432 # Verify owned locks before starting operation
10433 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10434 assert set(owned_nodes) == set(self.node_secondary_ip), \
10435 ("Incorrect node locks, owning %s, expected %s" %
10436 (owned_nodes, self.node_secondary_ip.keys()))
10437 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10438 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10440 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10441 assert list(owned_instances) == [self.instance_name], \
10442 "Instance '%s' not locked" % self.instance_name
10444 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10445 "Should not own any node group lock at this point"
10448 feedback_fn("No disks need replacement")
10451 feedback_fn("Replacing disk(s) %s for %s" %
10452 (utils.CommaJoin(self.disks), self.instance.name))
10454 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10456 # Activate the instance disks if we're replacing them on a down instance
10458 _StartInstanceDisks(self.lu, self.instance, True)
10461 # Should we replace the secondary node?
10462 if self.new_node is not None:
10463 fn = self._ExecDrbd8Secondary
10465 fn = self._ExecDrbd8DiskOnly
10467 result = fn(feedback_fn)
10469 # Deactivate the instance disks if we're replacing them on a
10472 _SafeShutdownInstanceDisks(self.lu, self.instance)
10474 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10477 # Verify owned locks
10478 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10479 nodes = frozenset(self.node_secondary_ip)
10480 assert ((self.early_release and not owned_nodes) or
10481 (not self.early_release and not (set(owned_nodes) - nodes))), \
10482 ("Not owning the correct locks, early_release=%s, owned=%r,"
10483 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10487 def _CheckVolumeGroup(self, nodes):
10488 self.lu.LogInfo("Checking volume groups")
10490 vgname = self.cfg.GetVGName()
10492 # Make sure volume group exists on all involved nodes
10493 results = self.rpc.call_vg_list(nodes)
10495 raise errors.OpExecError("Can't list volume groups on the nodes")
10498 res = results[node]
10499 res.Raise("Error checking node %s" % node)
10500 if vgname not in res.payload:
10501 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10504 def _CheckDisksExistence(self, nodes):
10505 # Check disk existence
10506 for idx, dev in enumerate(self.instance.disks):
10507 if idx not in self.disks:
10511 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10512 self.cfg.SetDiskID(dev, node)
10514 result = self.rpc.call_blockdev_find(node, dev)
10516 msg = result.fail_msg
10517 if msg or not result.payload:
10519 msg = "disk not found"
10520 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10523 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10524 for idx, dev in enumerate(self.instance.disks):
10525 if idx not in self.disks:
10528 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10531 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10533 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10534 " replace disks for instance %s" %
10535 (node_name, self.instance.name))
10537 def _CreateNewStorage(self, node_name):
10538 """Create new storage on the primary or secondary node.
10540 This is only used for same-node replaces, not for changing the
10541 secondary node, hence we don't want to modify the existing disk.
10546 for idx, dev in enumerate(self.instance.disks):
10547 if idx not in self.disks:
10550 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10552 self.cfg.SetDiskID(dev, node_name)
10554 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10555 names = _GenerateUniqueNames(self.lu, lv_names)
10557 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10559 vg_data = dev.children[0].logical_id[0]
10560 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10561 logical_id=(vg_data, names[0]), params=data_p)
10562 vg_meta = dev.children[1].logical_id[0]
10563 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10564 logical_id=(vg_meta, names[1]), params=meta_p)
10566 new_lvs = [lv_data, lv_meta]
10567 old_lvs = [child.Copy() for child in dev.children]
10568 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10570 # we pass force_create=True to force the LVM creation
10571 for new_lv in new_lvs:
10572 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10573 _GetInstanceInfoText(self.instance), False)
10577 def _CheckDevices(self, node_name, iv_names):
10578 for name, (dev, _, _) in iv_names.iteritems():
10579 self.cfg.SetDiskID(dev, node_name)
10581 result = self.rpc.call_blockdev_find(node_name, dev)
10583 msg = result.fail_msg
10584 if msg or not result.payload:
10586 msg = "disk not found"
10587 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10590 if result.payload.is_degraded:
10591 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10593 def _RemoveOldStorage(self, node_name, iv_names):
10594 for name, (_, old_lvs, _) in iv_names.iteritems():
10595 self.lu.LogInfo("Remove logical volumes for %s" % name)
10598 self.cfg.SetDiskID(lv, node_name)
10600 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10602 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10603 hint="remove unused LVs manually")
10605 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10606 """Replace a disk on the primary or secondary for DRBD 8.
10608 The algorithm for replace is quite complicated:
10610 1. for each disk to be replaced:
10612 1. create new LVs on the target node with unique names
10613 1. detach old LVs from the drbd device
10614 1. rename old LVs to name_replaced.<time_t>
10615 1. rename new LVs to old LVs
10616 1. attach the new LVs (with the old names now) to the drbd device
10618 1. wait for sync across all devices
10620 1. for each modified disk:
10622 1. remove old LVs (which have the name name_replaces.<time_t>)
10624 Failures are not very well handled.
10629 # Step: check device activation
10630 self.lu.LogStep(1, steps_total, "Check device existence")
10631 self._CheckDisksExistence([self.other_node, self.target_node])
10632 self._CheckVolumeGroup([self.target_node, self.other_node])
10634 # Step: check other node consistency
10635 self.lu.LogStep(2, steps_total, "Check peer consistency")
10636 self._CheckDisksConsistency(self.other_node,
10637 self.other_node == self.instance.primary_node,
10640 # Step: create new storage
10641 self.lu.LogStep(3, steps_total, "Allocate new storage")
10642 iv_names = self._CreateNewStorage(self.target_node)
10644 # Step: for each lv, detach+rename*2+attach
10645 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10646 for dev, old_lvs, new_lvs in iv_names.itervalues():
10647 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10649 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10651 result.Raise("Can't detach drbd from local storage on node"
10652 " %s for device %s" % (self.target_node, dev.iv_name))
10654 #cfg.Update(instance)
10656 # ok, we created the new LVs, so now we know we have the needed
10657 # storage; as such, we proceed on the target node to rename
10658 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10659 # using the assumption that logical_id == physical_id (which in
10660 # turn is the unique_id on that node)
10662 # FIXME(iustin): use a better name for the replaced LVs
10663 temp_suffix = int(time.time())
10664 ren_fn = lambda d, suff: (d.physical_id[0],
10665 d.physical_id[1] + "_replaced-%s" % suff)
10667 # Build the rename list based on what LVs exist on the node
10668 rename_old_to_new = []
10669 for to_ren in old_lvs:
10670 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10671 if not result.fail_msg and result.payload:
10673 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10675 self.lu.LogInfo("Renaming the old LVs on the target node")
10676 result = self.rpc.call_blockdev_rename(self.target_node,
10678 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10680 # Now we rename the new LVs to the old LVs
10681 self.lu.LogInfo("Renaming the new LVs on the target node")
10682 rename_new_to_old = [(new, old.physical_id)
10683 for old, new in zip(old_lvs, new_lvs)]
10684 result = self.rpc.call_blockdev_rename(self.target_node,
10686 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10688 # Intermediate steps of in memory modifications
10689 for old, new in zip(old_lvs, new_lvs):
10690 new.logical_id = old.logical_id
10691 self.cfg.SetDiskID(new, self.target_node)
10693 # We need to modify old_lvs so that removal later removes the
10694 # right LVs, not the newly added ones; note that old_lvs is a
10696 for disk in old_lvs:
10697 disk.logical_id = ren_fn(disk, temp_suffix)
10698 self.cfg.SetDiskID(disk, self.target_node)
10700 # Now that the new lvs have the old name, we can add them to the device
10701 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10702 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10704 msg = result.fail_msg
10706 for new_lv in new_lvs:
10707 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10710 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10711 hint=("cleanup manually the unused logical"
10713 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10715 cstep = itertools.count(5)
10717 if self.early_release:
10718 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10719 self._RemoveOldStorage(self.target_node, iv_names)
10720 # TODO: Check if releasing locks early still makes sense
10721 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10723 # Release all resource locks except those used by the instance
10724 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10725 keep=self.node_secondary_ip.keys())
10727 # Release all node locks while waiting for sync
10728 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10730 # TODO: Can the instance lock be downgraded here? Take the optional disk
10731 # shutdown in the caller into consideration.
10734 # This can fail as the old devices are degraded and _WaitForSync
10735 # does a combined result over all disks, so we don't check its return value
10736 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10737 _WaitForSync(self.lu, self.instance)
10739 # Check all devices manually
10740 self._CheckDevices(self.instance.primary_node, iv_names)
10742 # Step: remove old storage
10743 if not self.early_release:
10744 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10745 self._RemoveOldStorage(self.target_node, iv_names)
10747 def _ExecDrbd8Secondary(self, feedback_fn):
10748 """Replace the secondary node for DRBD 8.
10750 The algorithm for replace is quite complicated:
10751 - for all disks of the instance:
10752 - create new LVs on the new node with same names
10753 - shutdown the drbd device on the old secondary
10754 - disconnect the drbd network on the primary
10755 - create the drbd device on the new secondary
10756 - network attach the drbd on the primary, using an artifice:
10757 the drbd code for Attach() will connect to the network if it
10758 finds a device which is connected to the good local disks but
10759 not network enabled
10760 - wait for sync across all devices
10761 - remove all disks from the old secondary
10763 Failures are not very well handled.
10768 pnode = self.instance.primary_node
10770 # Step: check device activation
10771 self.lu.LogStep(1, steps_total, "Check device existence")
10772 self._CheckDisksExistence([self.instance.primary_node])
10773 self._CheckVolumeGroup([self.instance.primary_node])
10775 # Step: check other node consistency
10776 self.lu.LogStep(2, steps_total, "Check peer consistency")
10777 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10779 # Step: create new storage
10780 self.lu.LogStep(3, steps_total, "Allocate new storage")
10781 for idx, dev in enumerate(self.instance.disks):
10782 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10783 (self.new_node, idx))
10784 # we pass force_create=True to force LVM creation
10785 for new_lv in dev.children:
10786 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10787 _GetInstanceInfoText(self.instance), False)
10789 # Step 4: dbrd minors and drbd setups changes
10790 # after this, we must manually remove the drbd minors on both the
10791 # error and the success paths
10792 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10793 minors = self.cfg.AllocateDRBDMinor([self.new_node
10794 for dev in self.instance.disks],
10795 self.instance.name)
10796 logging.debug("Allocated minors %r", minors)
10799 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10800 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10801 (self.new_node, idx))
10802 # create new devices on new_node; note that we create two IDs:
10803 # one without port, so the drbd will be activated without
10804 # networking information on the new node at this stage, and one
10805 # with network, for the latter activation in step 4
10806 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10807 if self.instance.primary_node == o_node1:
10810 assert self.instance.primary_node == o_node2, "Three-node instance?"
10813 new_alone_id = (self.instance.primary_node, self.new_node, None,
10814 p_minor, new_minor, o_secret)
10815 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10816 p_minor, new_minor, o_secret)
10818 iv_names[idx] = (dev, dev.children, new_net_id)
10819 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10821 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10822 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10823 logical_id=new_alone_id,
10824 children=dev.children,
10826 params=drbd_params)
10828 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10829 _GetInstanceInfoText(self.instance), False)
10830 except errors.GenericError:
10831 self.cfg.ReleaseDRBDMinors(self.instance.name)
10834 # We have new devices, shutdown the drbd on the old secondary
10835 for idx, dev in enumerate(self.instance.disks):
10836 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10837 self.cfg.SetDiskID(dev, self.target_node)
10838 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10840 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10841 "node: %s" % (idx, msg),
10842 hint=("Please cleanup this device manually as"
10843 " soon as possible"))
10845 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10846 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10847 self.instance.disks)[pnode]
10849 msg = result.fail_msg
10851 # detaches didn't succeed (unlikely)
10852 self.cfg.ReleaseDRBDMinors(self.instance.name)
10853 raise errors.OpExecError("Can't detach the disks from the network on"
10854 " old node: %s" % (msg,))
10856 # if we managed to detach at least one, we update all the disks of
10857 # the instance to point to the new secondary
10858 self.lu.LogInfo("Updating instance configuration")
10859 for dev, _, new_logical_id in iv_names.itervalues():
10860 dev.logical_id = new_logical_id
10861 self.cfg.SetDiskID(dev, self.instance.primary_node)
10863 self.cfg.Update(self.instance, feedback_fn)
10865 # Release all node locks (the configuration has been updated)
10866 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10868 # and now perform the drbd attach
10869 self.lu.LogInfo("Attaching primary drbds to new secondary"
10870 " (standalone => connected)")
10871 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10873 self.node_secondary_ip,
10874 self.instance.disks,
10875 self.instance.name,
10877 for to_node, to_result in result.items():
10878 msg = to_result.fail_msg
10880 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10882 hint=("please do a gnt-instance info to see the"
10883 " status of disks"))
10885 cstep = itertools.count(5)
10887 if self.early_release:
10888 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10889 self._RemoveOldStorage(self.target_node, iv_names)
10890 # TODO: Check if releasing locks early still makes sense
10891 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10893 # Release all resource locks except those used by the instance
10894 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10895 keep=self.node_secondary_ip.keys())
10897 # TODO: Can the instance lock be downgraded here? Take the optional disk
10898 # shutdown in the caller into consideration.
10901 # This can fail as the old devices are degraded and _WaitForSync
10902 # does a combined result over all disks, so we don't check its return value
10903 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10904 _WaitForSync(self.lu, self.instance)
10906 # Check all devices manually
10907 self._CheckDevices(self.instance.primary_node, iv_names)
10909 # Step: remove old storage
10910 if not self.early_release:
10911 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10912 self._RemoveOldStorage(self.target_node, iv_names)
10915 class LURepairNodeStorage(NoHooksLU):
10916 """Repairs the volume group on a node.
10921 def CheckArguments(self):
10922 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10924 storage_type = self.op.storage_type
10926 if (constants.SO_FIX_CONSISTENCY not in
10927 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10928 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10929 " repaired" % storage_type,
10930 errors.ECODE_INVAL)
10932 def ExpandNames(self):
10933 self.needed_locks = {
10934 locking.LEVEL_NODE: [self.op.node_name],
10937 def _CheckFaultyDisks(self, instance, node_name):
10938 """Ensure faulty disks abort the opcode or at least warn."""
10940 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10942 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10943 " node '%s'" % (instance.name, node_name),
10944 errors.ECODE_STATE)
10945 except errors.OpPrereqError, err:
10946 if self.op.ignore_consistency:
10947 self.proc.LogWarning(str(err.args[0]))
10951 def CheckPrereq(self):
10952 """Check prerequisites.
10955 # Check whether any instance on this node has faulty disks
10956 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10957 if inst.admin_state != constants.ADMINST_UP:
10959 check_nodes = set(inst.all_nodes)
10960 check_nodes.discard(self.op.node_name)
10961 for inst_node_name in check_nodes:
10962 self._CheckFaultyDisks(inst, inst_node_name)
10964 def Exec(self, feedback_fn):
10965 feedback_fn("Repairing storage unit '%s' on %s ..." %
10966 (self.op.name, self.op.node_name))
10968 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10969 result = self.rpc.call_storage_execute(self.op.node_name,
10970 self.op.storage_type, st_args,
10972 constants.SO_FIX_CONSISTENCY)
10973 result.Raise("Failed to repair storage unit '%s' on %s" %
10974 (self.op.name, self.op.node_name))
10977 class LUNodeEvacuate(NoHooksLU):
10978 """Evacuates instances off a list of nodes.
10983 _MODE2IALLOCATOR = {
10984 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10985 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10986 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10988 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10989 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10990 constants.IALLOCATOR_NEVAC_MODES)
10992 def CheckArguments(self):
10993 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10995 def ExpandNames(self):
10996 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10998 if self.op.remote_node is not None:
10999 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11000 assert self.op.remote_node
11002 if self.op.remote_node == self.op.node_name:
11003 raise errors.OpPrereqError("Can not use evacuated node as a new"
11004 " secondary node", errors.ECODE_INVAL)
11006 if self.op.mode != constants.NODE_EVAC_SEC:
11007 raise errors.OpPrereqError("Without the use of an iallocator only"
11008 " secondary instances can be evacuated",
11009 errors.ECODE_INVAL)
11012 self.share_locks = _ShareAll()
11013 self.needed_locks = {
11014 locking.LEVEL_INSTANCE: [],
11015 locking.LEVEL_NODEGROUP: [],
11016 locking.LEVEL_NODE: [],
11019 # Determine nodes (via group) optimistically, needs verification once locks
11020 # have been acquired
11021 self.lock_nodes = self._DetermineNodes()
11023 def _DetermineNodes(self):
11024 """Gets the list of nodes to operate on.
11027 if self.op.remote_node is None:
11028 # Iallocator will choose any node(s) in the same group
11029 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11031 group_nodes = frozenset([self.op.remote_node])
11033 # Determine nodes to be locked
11034 return set([self.op.node_name]) | group_nodes
11036 def _DetermineInstances(self):
11037 """Builds list of instances to operate on.
11040 assert self.op.mode in constants.NODE_EVAC_MODES
11042 if self.op.mode == constants.NODE_EVAC_PRI:
11043 # Primary instances only
11044 inst_fn = _GetNodePrimaryInstances
11045 assert self.op.remote_node is None, \
11046 "Evacuating primary instances requires iallocator"
11047 elif self.op.mode == constants.NODE_EVAC_SEC:
11048 # Secondary instances only
11049 inst_fn = _GetNodeSecondaryInstances
11052 assert self.op.mode == constants.NODE_EVAC_ALL
11053 inst_fn = _GetNodeInstances
11054 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11056 raise errors.OpPrereqError("Due to an issue with the iallocator"
11057 " interface it is not possible to evacuate"
11058 " all instances at once; specify explicitly"
11059 " whether to evacuate primary or secondary"
11061 errors.ECODE_INVAL)
11063 return inst_fn(self.cfg, self.op.node_name)
11065 def DeclareLocks(self, level):
11066 if level == locking.LEVEL_INSTANCE:
11067 # Lock instances optimistically, needs verification once node and group
11068 # locks have been acquired
11069 self.needed_locks[locking.LEVEL_INSTANCE] = \
11070 set(i.name for i in self._DetermineInstances())
11072 elif level == locking.LEVEL_NODEGROUP:
11073 # Lock node groups for all potential target nodes optimistically, needs
11074 # verification once nodes have been acquired
11075 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11076 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11078 elif level == locking.LEVEL_NODE:
11079 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11081 def CheckPrereq(self):
11083 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11084 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11085 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11087 need_nodes = self._DetermineNodes()
11089 if not owned_nodes.issuperset(need_nodes):
11090 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11091 " locks were acquired, current nodes are"
11092 " are '%s', used to be '%s'; retry the"
11094 (self.op.node_name,
11095 utils.CommaJoin(need_nodes),
11096 utils.CommaJoin(owned_nodes)),
11097 errors.ECODE_STATE)
11099 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11100 if owned_groups != wanted_groups:
11101 raise errors.OpExecError("Node groups changed since locks were acquired,"
11102 " current groups are '%s', used to be '%s';"
11103 " retry the operation" %
11104 (utils.CommaJoin(wanted_groups),
11105 utils.CommaJoin(owned_groups)))
11107 # Determine affected instances
11108 self.instances = self._DetermineInstances()
11109 self.instance_names = [i.name for i in self.instances]
11111 if set(self.instance_names) != owned_instances:
11112 raise errors.OpExecError("Instances on node '%s' changed since locks"
11113 " were acquired, current instances are '%s',"
11114 " used to be '%s'; retry the operation" %
11115 (self.op.node_name,
11116 utils.CommaJoin(self.instance_names),
11117 utils.CommaJoin(owned_instances)))
11119 if self.instance_names:
11120 self.LogInfo("Evacuating instances from node '%s': %s",
11122 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11124 self.LogInfo("No instances to evacuate from node '%s'",
11127 if self.op.remote_node is not None:
11128 for i in self.instances:
11129 if i.primary_node == self.op.remote_node:
11130 raise errors.OpPrereqError("Node %s is the primary node of"
11131 " instance %s, cannot use it as"
11133 (self.op.remote_node, i.name),
11134 errors.ECODE_INVAL)
11136 def Exec(self, feedback_fn):
11137 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11139 if not self.instance_names:
11140 # No instances to evacuate
11143 elif self.op.iallocator is not None:
11144 # TODO: Implement relocation to other group
11145 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11146 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11147 instances=list(self.instance_names))
11149 ial.Run(self.op.iallocator)
11151 if not ial.success:
11152 raise errors.OpPrereqError("Can't compute node evacuation using"
11153 " iallocator '%s': %s" %
11154 (self.op.iallocator, ial.info),
11155 errors.ECODE_NORES)
11157 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11159 elif self.op.remote_node is not None:
11160 assert self.op.mode == constants.NODE_EVAC_SEC
11162 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11163 remote_node=self.op.remote_node,
11165 mode=constants.REPLACE_DISK_CHG,
11166 early_release=self.op.early_release)]
11167 for instance_name in self.instance_names
11171 raise errors.ProgrammerError("No iallocator or remote node")
11173 return ResultWithJobs(jobs)
11176 def _SetOpEarlyRelease(early_release, op):
11177 """Sets C{early_release} flag on opcodes if available.
11181 op.early_release = early_release
11182 except AttributeError:
11183 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11188 def _NodeEvacDest(use_nodes, group, nodes):
11189 """Returns group or nodes depending on caller's choice.
11193 return utils.CommaJoin(nodes)
11198 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11199 """Unpacks the result of change-group and node-evacuate iallocator requests.
11201 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11202 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11204 @type lu: L{LogicalUnit}
11205 @param lu: Logical unit instance
11206 @type alloc_result: tuple/list
11207 @param alloc_result: Result from iallocator
11208 @type early_release: bool
11209 @param early_release: Whether to release locks early if possible
11210 @type use_nodes: bool
11211 @param use_nodes: Whether to display node names instead of groups
11214 (moved, failed, jobs) = alloc_result
11217 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11218 for (name, reason) in failed)
11219 lu.LogWarning("Unable to evacuate instances %s", failreason)
11220 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11223 lu.LogInfo("Instances to be moved: %s",
11224 utils.CommaJoin("%s (to %s)" %
11225 (name, _NodeEvacDest(use_nodes, group, nodes))
11226 for (name, group, nodes) in moved))
11228 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11229 map(opcodes.OpCode.LoadOpCode, ops))
11233 class LUInstanceGrowDisk(LogicalUnit):
11234 """Grow a disk of an instance.
11237 HPATH = "disk-grow"
11238 HTYPE = constants.HTYPE_INSTANCE
11241 def ExpandNames(self):
11242 self._ExpandAndLockInstance()
11243 self.needed_locks[locking.LEVEL_NODE] = []
11244 self.needed_locks[locking.LEVEL_NODE_RES] = []
11245 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11247 def DeclareLocks(self, level):
11248 if level == locking.LEVEL_NODE:
11249 self._LockInstancesNodes()
11250 elif level == locking.LEVEL_NODE_RES:
11252 self.needed_locks[locking.LEVEL_NODE_RES] = \
11253 self.needed_locks[locking.LEVEL_NODE][:]
11255 def BuildHooksEnv(self):
11256 """Build hooks env.
11258 This runs on the master, the primary and all the secondaries.
11262 "DISK": self.op.disk,
11263 "AMOUNT": self.op.amount,
11265 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11268 def BuildHooksNodes(self):
11269 """Build hooks nodes.
11272 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11275 def CheckPrereq(self):
11276 """Check prerequisites.
11278 This checks that the instance is in the cluster.
11281 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11282 assert instance is not None, \
11283 "Cannot retrieve locked instance %s" % self.op.instance_name
11284 nodenames = list(instance.all_nodes)
11285 for node in nodenames:
11286 _CheckNodeOnline(self, node)
11288 self.instance = instance
11290 if instance.disk_template not in constants.DTS_GROWABLE:
11291 raise errors.OpPrereqError("Instance's disk layout does not support"
11292 " growing", errors.ECODE_INVAL)
11294 self.disk = instance.FindDisk(self.op.disk)
11296 if instance.disk_template not in (constants.DT_FILE,
11297 constants.DT_SHARED_FILE):
11298 # TODO: check the free disk space for file, when that feature will be
11300 _CheckNodesFreeDiskPerVG(self, nodenames,
11301 self.disk.ComputeGrowth(self.op.amount))
11303 def Exec(self, feedback_fn):
11304 """Execute disk grow.
11307 instance = self.instance
11310 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11311 assert (self.owned_locks(locking.LEVEL_NODE) ==
11312 self.owned_locks(locking.LEVEL_NODE_RES))
11314 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11316 raise errors.OpExecError("Cannot activate block device to grow")
11318 feedback_fn("Growing disk %s of instance '%s' by %s" %
11319 (self.op.disk, instance.name,
11320 utils.FormatUnit(self.op.amount, "h")))
11322 # First run all grow ops in dry-run mode
11323 for node in instance.all_nodes:
11324 self.cfg.SetDiskID(disk, node)
11325 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11326 result.Raise("Grow request failed to node %s" % node)
11328 # We know that (as far as we can test) operations across different
11329 # nodes will succeed, time to run it for real
11330 for node in instance.all_nodes:
11331 self.cfg.SetDiskID(disk, node)
11332 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11333 result.Raise("Grow request failed to node %s" % node)
11335 # TODO: Rewrite code to work properly
11336 # DRBD goes into sync mode for a short amount of time after executing the
11337 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11338 # calling "resize" in sync mode fails. Sleeping for a short amount of
11339 # time is a work-around.
11342 disk.RecordGrow(self.op.amount)
11343 self.cfg.Update(instance, feedback_fn)
11345 # Changes have been recorded, release node lock
11346 _ReleaseLocks(self, locking.LEVEL_NODE)
11348 # Downgrade lock while waiting for sync
11349 self.glm.downgrade(locking.LEVEL_INSTANCE)
11351 if self.op.wait_for_sync:
11352 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11354 self.proc.LogWarning("Disk sync-ing has not returned a good"
11355 " status; please check the instance")
11356 if instance.admin_state != constants.ADMINST_UP:
11357 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11358 elif instance.admin_state != constants.ADMINST_UP:
11359 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11360 " not supposed to be running because no wait for"
11361 " sync mode was requested")
11363 assert self.owned_locks(locking.LEVEL_NODE_RES)
11364 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11367 class LUInstanceQueryData(NoHooksLU):
11368 """Query runtime instance data.
11373 def ExpandNames(self):
11374 self.needed_locks = {}
11376 # Use locking if requested or when non-static information is wanted
11377 if not (self.op.static or self.op.use_locking):
11378 self.LogWarning("Non-static data requested, locks need to be acquired")
11379 self.op.use_locking = True
11381 if self.op.instances or not self.op.use_locking:
11382 # Expand instance names right here
11383 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11385 # Will use acquired locks
11386 self.wanted_names = None
11388 if self.op.use_locking:
11389 self.share_locks = _ShareAll()
11391 if self.wanted_names is None:
11392 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11394 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11396 self.needed_locks[locking.LEVEL_NODE] = []
11397 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11399 def DeclareLocks(self, level):
11400 if self.op.use_locking and level == locking.LEVEL_NODE:
11401 self._LockInstancesNodes()
11403 def CheckPrereq(self):
11404 """Check prerequisites.
11406 This only checks the optional instance list against the existing names.
11409 if self.wanted_names is None:
11410 assert self.op.use_locking, "Locking was not used"
11411 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11413 self.wanted_instances = \
11414 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11416 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11417 """Returns the status of a block device
11420 if self.op.static or not node:
11423 self.cfg.SetDiskID(dev, node)
11425 result = self.rpc.call_blockdev_find(node, dev)
11429 result.Raise("Can't compute disk status for %s" % instance_name)
11431 status = result.payload
11435 return (status.dev_path, status.major, status.minor,
11436 status.sync_percent, status.estimated_time,
11437 status.is_degraded, status.ldisk_status)
11439 def _ComputeDiskStatus(self, instance, snode, dev):
11440 """Compute block device status.
11443 if dev.dev_type in constants.LDS_DRBD:
11444 # we change the snode then (otherwise we use the one passed in)
11445 if dev.logical_id[0] == instance.primary_node:
11446 snode = dev.logical_id[1]
11448 snode = dev.logical_id[0]
11450 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11451 instance.name, dev)
11452 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11455 dev_children = map(compat.partial(self._ComputeDiskStatus,
11462 "iv_name": dev.iv_name,
11463 "dev_type": dev.dev_type,
11464 "logical_id": dev.logical_id,
11465 "physical_id": dev.physical_id,
11466 "pstatus": dev_pstatus,
11467 "sstatus": dev_sstatus,
11468 "children": dev_children,
11473 def Exec(self, feedback_fn):
11474 """Gather and return data"""
11477 cluster = self.cfg.GetClusterInfo()
11479 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11480 for i in self.wanted_instances)
11481 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11482 if self.op.static or pnode.offline:
11483 remote_state = None
11485 self.LogWarning("Primary node %s is marked offline, returning static"
11486 " information only for instance %s" %
11487 (pnode.name, instance.name))
11489 remote_info = self.rpc.call_instance_info(instance.primary_node,
11491 instance.hypervisor)
11492 remote_info.Raise("Error checking node %s" % instance.primary_node)
11493 remote_info = remote_info.payload
11494 if remote_info and "state" in remote_info:
11495 remote_state = "up"
11497 if instance.admin_state == constants.ADMINST_UP:
11498 remote_state = "down"
11500 remote_state = instance.admin_state
11502 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11505 result[instance.name] = {
11506 "name": instance.name,
11507 "config_state": instance.admin_state,
11508 "run_state": remote_state,
11509 "pnode": instance.primary_node,
11510 "snodes": instance.secondary_nodes,
11512 # this happens to be the same format used for hooks
11513 "nics": _NICListToTuple(self, instance.nics),
11514 "disk_template": instance.disk_template,
11516 "hypervisor": instance.hypervisor,
11517 "network_port": instance.network_port,
11518 "hv_instance": instance.hvparams,
11519 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11520 "be_instance": instance.beparams,
11521 "be_actual": cluster.FillBE(instance),
11522 "os_instance": instance.osparams,
11523 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11524 "serial_no": instance.serial_no,
11525 "mtime": instance.mtime,
11526 "ctime": instance.ctime,
11527 "uuid": instance.uuid,
11533 class LUInstanceSetParams(LogicalUnit):
11534 """Modifies an instances's parameters.
11537 HPATH = "instance-modify"
11538 HTYPE = constants.HTYPE_INSTANCE
11541 def CheckArguments(self):
11542 if not (self.op.nics or self.op.disks or self.op.disk_template or
11543 self.op.hvparams or self.op.beparams or self.op.os_name or
11544 self.op.online_inst or self.op.offline_inst):
11545 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11547 if self.op.hvparams:
11548 _CheckGlobalHvParams(self.op.hvparams)
11552 for disk_op, disk_dict in self.op.disks:
11553 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11554 if disk_op == constants.DDM_REMOVE:
11555 disk_addremove += 1
11557 elif disk_op == constants.DDM_ADD:
11558 disk_addremove += 1
11560 if not isinstance(disk_op, int):
11561 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11562 if not isinstance(disk_dict, dict):
11563 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11564 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11566 if disk_op == constants.DDM_ADD:
11567 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11568 if mode not in constants.DISK_ACCESS_SET:
11569 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11570 errors.ECODE_INVAL)
11571 size = disk_dict.get(constants.IDISK_SIZE, None)
11573 raise errors.OpPrereqError("Required disk parameter size missing",
11574 errors.ECODE_INVAL)
11577 except (TypeError, ValueError), err:
11578 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11579 str(err), errors.ECODE_INVAL)
11580 disk_dict[constants.IDISK_SIZE] = size
11582 # modification of disk
11583 if constants.IDISK_SIZE in disk_dict:
11584 raise errors.OpPrereqError("Disk size change not possible, use"
11585 " grow-disk", errors.ECODE_INVAL)
11587 if disk_addremove > 1:
11588 raise errors.OpPrereqError("Only one disk add or remove operation"
11589 " supported at a time", errors.ECODE_INVAL)
11591 if self.op.disks and self.op.disk_template is not None:
11592 raise errors.OpPrereqError("Disk template conversion and other disk"
11593 " changes not supported at the same time",
11594 errors.ECODE_INVAL)
11596 if (self.op.disk_template and
11597 self.op.disk_template in constants.DTS_INT_MIRROR and
11598 self.op.remote_node is None):
11599 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11600 " one requires specifying a secondary node",
11601 errors.ECODE_INVAL)
11605 for nic_op, nic_dict in self.op.nics:
11606 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11607 if nic_op == constants.DDM_REMOVE:
11610 elif nic_op == constants.DDM_ADD:
11613 if not isinstance(nic_op, int):
11614 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11615 if not isinstance(nic_dict, dict):
11616 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11617 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11619 # nic_dict should be a dict
11620 nic_ip = nic_dict.get(constants.INIC_IP, None)
11621 if nic_ip is not None:
11622 if nic_ip.lower() == constants.VALUE_NONE:
11623 nic_dict[constants.INIC_IP] = None
11625 if not netutils.IPAddress.IsValid(nic_ip):
11626 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11627 errors.ECODE_INVAL)
11629 nic_bridge = nic_dict.get("bridge", None)
11630 nic_link = nic_dict.get(constants.INIC_LINK, None)
11631 if nic_bridge and nic_link:
11632 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11633 " at the same time", errors.ECODE_INVAL)
11634 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11635 nic_dict["bridge"] = None
11636 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11637 nic_dict[constants.INIC_LINK] = None
11639 if nic_op == constants.DDM_ADD:
11640 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11641 if nic_mac is None:
11642 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11644 if constants.INIC_MAC in nic_dict:
11645 nic_mac = nic_dict[constants.INIC_MAC]
11646 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11647 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11649 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11650 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11651 " modifying an existing nic",
11652 errors.ECODE_INVAL)
11654 if nic_addremove > 1:
11655 raise errors.OpPrereqError("Only one NIC add or remove operation"
11656 " supported at a time", errors.ECODE_INVAL)
11658 def ExpandNames(self):
11659 self._ExpandAndLockInstance()
11660 # Can't even acquire node locks in shared mode as upcoming changes in
11661 # Ganeti 2.6 will start to modify the node object on disk conversion
11662 self.needed_locks[locking.LEVEL_NODE] = []
11663 self.needed_locks[locking.LEVEL_NODE_RES] = []
11664 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11666 def DeclareLocks(self, level):
11667 if level == locking.LEVEL_NODE:
11668 self._LockInstancesNodes()
11669 if self.op.disk_template and self.op.remote_node:
11670 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11671 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11672 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11674 self.needed_locks[locking.LEVEL_NODE_RES] = \
11675 self.needed_locks[locking.LEVEL_NODE][:]
11677 def BuildHooksEnv(self):
11678 """Build hooks env.
11680 This runs on the master, primary and secondaries.
11684 if constants.BE_MINMEM in self.be_new:
11685 args["minmem"] = self.be_new[constants.BE_MINMEM]
11686 if constants.BE_MAXMEM in self.be_new:
11687 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11688 if constants.BE_VCPUS in self.be_new:
11689 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11690 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11691 # information at all.
11694 nic_override = dict(self.op.nics)
11695 for idx, nic in enumerate(self.instance.nics):
11696 if idx in nic_override:
11697 this_nic_override = nic_override[idx]
11699 this_nic_override = {}
11700 if constants.INIC_IP in this_nic_override:
11701 ip = this_nic_override[constants.INIC_IP]
11704 if constants.INIC_MAC in this_nic_override:
11705 mac = this_nic_override[constants.INIC_MAC]
11708 if idx in self.nic_pnew:
11709 nicparams = self.nic_pnew[idx]
11711 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11712 mode = nicparams[constants.NIC_MODE]
11713 link = nicparams[constants.NIC_LINK]
11714 args["nics"].append((ip, mac, mode, link))
11715 if constants.DDM_ADD in nic_override:
11716 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11717 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11718 nicparams = self.nic_pnew[constants.DDM_ADD]
11719 mode = nicparams[constants.NIC_MODE]
11720 link = nicparams[constants.NIC_LINK]
11721 args["nics"].append((ip, mac, mode, link))
11722 elif constants.DDM_REMOVE in nic_override:
11723 del args["nics"][-1]
11725 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11726 if self.op.disk_template:
11727 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11731 def BuildHooksNodes(self):
11732 """Build hooks nodes.
11735 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11738 def CheckPrereq(self):
11739 """Check prerequisites.
11741 This only checks the instance list against the existing names.
11744 # checking the new params on the primary/secondary nodes
11746 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11747 cluster = self.cluster = self.cfg.GetClusterInfo()
11748 assert self.instance is not None, \
11749 "Cannot retrieve locked instance %s" % self.op.instance_name
11750 pnode = instance.primary_node
11751 nodelist = list(instance.all_nodes)
11752 pnode_info = self.cfg.GetNodeInfo(pnode)
11753 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11756 if self.op.os_name and not self.op.force:
11757 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11758 self.op.force_variant)
11759 instance_os = self.op.os_name
11761 instance_os = instance.os
11763 if self.op.disk_template:
11764 if instance.disk_template == self.op.disk_template:
11765 raise errors.OpPrereqError("Instance already has disk template %s" %
11766 instance.disk_template, errors.ECODE_INVAL)
11768 if (instance.disk_template,
11769 self.op.disk_template) not in self._DISK_CONVERSIONS:
11770 raise errors.OpPrereqError("Unsupported disk template conversion from"
11771 " %s to %s" % (instance.disk_template,
11772 self.op.disk_template),
11773 errors.ECODE_INVAL)
11774 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11775 msg="cannot change disk template")
11776 if self.op.disk_template in constants.DTS_INT_MIRROR:
11777 if self.op.remote_node == pnode:
11778 raise errors.OpPrereqError("Given new secondary node %s is the same"
11779 " as the primary node of the instance" %
11780 self.op.remote_node, errors.ECODE_STATE)
11781 _CheckNodeOnline(self, self.op.remote_node)
11782 _CheckNodeNotDrained(self, self.op.remote_node)
11783 # FIXME: here we assume that the old instance type is DT_PLAIN
11784 assert instance.disk_template == constants.DT_PLAIN
11785 disks = [{constants.IDISK_SIZE: d.size,
11786 constants.IDISK_VG: d.logical_id[0]}
11787 for d in instance.disks]
11788 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11789 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11791 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11792 snode_group = self.cfg.GetNodeGroup(snode_info.group)
11793 ipolicy = _CalculateGroupIPolicy(cluster, snode_group)
11794 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11795 ignore=self.op.ignore_ipolicy)
11796 if pnode_info.group != snode_info.group:
11797 self.LogWarning("The primary and secondary nodes are in two"
11798 " different node groups; the disk parameters"
11799 " from the first disk's node group will be"
11802 # hvparams processing
11803 if self.op.hvparams:
11804 hv_type = instance.hypervisor
11805 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11806 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11807 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11810 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11811 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11812 self.hv_proposed = self.hv_new = hv_new # the new actual values
11813 self.hv_inst = i_hvdict # the new dict (without defaults)
11815 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11817 self.hv_new = self.hv_inst = {}
11819 # beparams processing
11820 if self.op.beparams:
11821 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11823 objects.UpgradeBeParams(i_bedict)
11824 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11825 be_new = cluster.SimpleFillBE(i_bedict)
11826 self.be_proposed = self.be_new = be_new # the new actual values
11827 self.be_inst = i_bedict # the new dict (without defaults)
11829 self.be_new = self.be_inst = {}
11830 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11831 be_old = cluster.FillBE(instance)
11833 # CPU param validation -- checking every time a paramtere is
11834 # changed to cover all cases where either CPU mask or vcpus have
11836 if (constants.BE_VCPUS in self.be_proposed and
11837 constants.HV_CPU_MASK in self.hv_proposed):
11839 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11840 # Verify mask is consistent with number of vCPUs. Can skip this
11841 # test if only 1 entry in the CPU mask, which means same mask
11842 # is applied to all vCPUs.
11843 if (len(cpu_list) > 1 and
11844 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11845 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11847 (self.be_proposed[constants.BE_VCPUS],
11848 self.hv_proposed[constants.HV_CPU_MASK]),
11849 errors.ECODE_INVAL)
11851 # Only perform this test if a new CPU mask is given
11852 if constants.HV_CPU_MASK in self.hv_new:
11853 # Calculate the largest CPU number requested
11854 max_requested_cpu = max(map(max, cpu_list))
11855 # Check that all of the instance's nodes have enough physical CPUs to
11856 # satisfy the requested CPU mask
11857 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11858 max_requested_cpu + 1, instance.hypervisor)
11860 # osparams processing
11861 if self.op.osparams:
11862 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11863 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11864 self.os_inst = i_osdict # the new dict (without defaults)
11870 #TODO(dynmem): do the appropriate check involving MINMEM
11871 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11872 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11873 mem_check_list = [pnode]
11874 if be_new[constants.BE_AUTO_BALANCE]:
11875 # either we changed auto_balance to yes or it was from before
11876 mem_check_list.extend(instance.secondary_nodes)
11877 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11878 instance.hypervisor)
11879 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11880 [instance.hypervisor])
11881 pninfo = nodeinfo[pnode]
11882 msg = pninfo.fail_msg
11884 # Assume the primary node is unreachable and go ahead
11885 self.warn.append("Can't get info from primary node %s: %s" %
11888 (_, _, (pnhvinfo, )) = pninfo.payload
11889 if not isinstance(pnhvinfo.get("memory_free", None), int):
11890 self.warn.append("Node data from primary node %s doesn't contain"
11891 " free memory information" % pnode)
11892 elif instance_info.fail_msg:
11893 self.warn.append("Can't get instance runtime information: %s" %
11894 instance_info.fail_msg)
11896 if instance_info.payload:
11897 current_mem = int(instance_info.payload["memory"])
11899 # Assume instance not running
11900 # (there is a slight race condition here, but it's not very
11901 # probable, and we have no other way to check)
11902 # TODO: Describe race condition
11904 #TODO(dynmem): do the appropriate check involving MINMEM
11905 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11906 pnhvinfo["memory_free"])
11908 raise errors.OpPrereqError("This change will prevent the instance"
11909 " from starting, due to %d MB of memory"
11910 " missing on its primary node" %
11912 errors.ECODE_NORES)
11914 if be_new[constants.BE_AUTO_BALANCE]:
11915 for node, nres in nodeinfo.items():
11916 if node not in instance.secondary_nodes:
11918 nres.Raise("Can't get info from secondary node %s" % node,
11919 prereq=True, ecode=errors.ECODE_STATE)
11920 (_, _, (nhvinfo, )) = nres.payload
11921 if not isinstance(nhvinfo.get("memory_free", None), int):
11922 raise errors.OpPrereqError("Secondary node %s didn't return free"
11923 " memory information" % node,
11924 errors.ECODE_STATE)
11925 #TODO(dynmem): do the appropriate check involving MINMEM
11926 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11927 raise errors.OpPrereqError("This change will prevent the instance"
11928 " from failover to its secondary node"
11929 " %s, due to not enough memory" % node,
11930 errors.ECODE_STATE)
11934 self.nic_pinst = {}
11935 for nic_op, nic_dict in self.op.nics:
11936 if nic_op == constants.DDM_REMOVE:
11937 if not instance.nics:
11938 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11939 errors.ECODE_INVAL)
11941 if nic_op != constants.DDM_ADD:
11943 if not instance.nics:
11944 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11945 " no NICs" % nic_op,
11946 errors.ECODE_INVAL)
11947 if nic_op < 0 or nic_op >= len(instance.nics):
11948 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11950 (nic_op, len(instance.nics) - 1),
11951 errors.ECODE_INVAL)
11952 old_nic_params = instance.nics[nic_op].nicparams
11953 old_nic_ip = instance.nics[nic_op].ip
11955 old_nic_params = {}
11958 update_params_dict = dict([(key, nic_dict[key])
11959 for key in constants.NICS_PARAMETERS
11960 if key in nic_dict])
11962 if "bridge" in nic_dict:
11963 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11965 new_nic_params = _GetUpdatedParams(old_nic_params,
11966 update_params_dict)
11967 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11968 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11969 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11970 self.nic_pinst[nic_op] = new_nic_params
11971 self.nic_pnew[nic_op] = new_filled_nic_params
11972 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11974 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11975 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11976 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11978 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11980 self.warn.append(msg)
11982 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11983 if new_nic_mode == constants.NIC_MODE_ROUTED:
11984 if constants.INIC_IP in nic_dict:
11985 nic_ip = nic_dict[constants.INIC_IP]
11987 nic_ip = old_nic_ip
11989 raise errors.OpPrereqError("Cannot set the nic ip to None"
11990 " on a routed nic", errors.ECODE_INVAL)
11991 if constants.INIC_MAC in nic_dict:
11992 nic_mac = nic_dict[constants.INIC_MAC]
11993 if nic_mac is None:
11994 raise errors.OpPrereqError("Cannot set the nic mac to None",
11995 errors.ECODE_INVAL)
11996 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11997 # otherwise generate the mac
11998 nic_dict[constants.INIC_MAC] = \
11999 self.cfg.GenerateMAC(self.proc.GetECId())
12001 # or validate/reserve the current one
12003 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12004 except errors.ReservationError:
12005 raise errors.OpPrereqError("MAC address %s already in use"
12006 " in cluster" % nic_mac,
12007 errors.ECODE_NOTUNIQUE)
12010 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12011 raise errors.OpPrereqError("Disk operations not supported for"
12012 " diskless instances",
12013 errors.ECODE_INVAL)
12014 for disk_op, _ in self.op.disks:
12015 if disk_op == constants.DDM_REMOVE:
12016 if len(instance.disks) == 1:
12017 raise errors.OpPrereqError("Cannot remove the last disk of"
12018 " an instance", errors.ECODE_INVAL)
12019 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12020 msg="cannot remove disks")
12022 if (disk_op == constants.DDM_ADD and
12023 len(instance.disks) >= constants.MAX_DISKS):
12024 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12025 " add more" % constants.MAX_DISKS,
12026 errors.ECODE_STATE)
12027 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12029 if disk_op < 0 or disk_op >= len(instance.disks):
12030 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12032 (disk_op, len(instance.disks)),
12033 errors.ECODE_INVAL)
12035 # disabling the instance
12036 if self.op.offline_inst:
12037 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12038 msg="cannot change instance state to offline")
12040 # enabling the instance
12041 if self.op.online_inst:
12042 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12043 msg="cannot make instance go online")
12045 def _ConvertPlainToDrbd(self, feedback_fn):
12046 """Converts an instance from plain to drbd.
12049 feedback_fn("Converting template to drbd")
12050 instance = self.instance
12051 pnode = instance.primary_node
12052 snode = self.op.remote_node
12054 assert instance.disk_template == constants.DT_PLAIN
12056 # create a fake disk info for _GenerateDiskTemplate
12057 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12058 constants.IDISK_VG: d.logical_id[0]}
12059 for d in instance.disks]
12060 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12061 instance.name, pnode, [snode],
12062 disk_info, None, None, 0, feedback_fn,
12064 info = _GetInstanceInfoText(instance)
12065 feedback_fn("Creating aditional volumes...")
12066 # first, create the missing data and meta devices
12067 for disk in new_disks:
12068 # unfortunately this is... not too nice
12069 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12071 for child in disk.children:
12072 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12073 # at this stage, all new LVs have been created, we can rename the
12075 feedback_fn("Renaming original volumes...")
12076 rename_list = [(o, n.children[0].logical_id)
12077 for (o, n) in zip(instance.disks, new_disks)]
12078 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12079 result.Raise("Failed to rename original LVs")
12081 feedback_fn("Initializing DRBD devices...")
12082 # all child devices are in place, we can now create the DRBD devices
12083 for disk in new_disks:
12084 for node in [pnode, snode]:
12085 f_create = node == pnode
12086 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12088 # at this point, the instance has been modified
12089 instance.disk_template = constants.DT_DRBD8
12090 instance.disks = new_disks
12091 self.cfg.Update(instance, feedback_fn)
12093 # Release node locks while waiting for sync
12094 _ReleaseLocks(self, locking.LEVEL_NODE)
12096 # disks are created, waiting for sync
12097 disk_abort = not _WaitForSync(self, instance,
12098 oneshot=not self.op.wait_for_sync)
12100 raise errors.OpExecError("There are some degraded disks for"
12101 " this instance, please cleanup manually")
12103 # Node resource locks will be released by caller
12105 def _ConvertDrbdToPlain(self, feedback_fn):
12106 """Converts an instance from drbd to plain.
12109 instance = self.instance
12111 assert len(instance.secondary_nodes) == 1
12112 assert instance.disk_template == constants.DT_DRBD8
12114 pnode = instance.primary_node
12115 snode = instance.secondary_nodes[0]
12116 feedback_fn("Converting template to plain")
12118 old_disks = instance.disks
12119 new_disks = [d.children[0] for d in old_disks]
12121 # copy over size and mode
12122 for parent, child in zip(old_disks, new_disks):
12123 child.size = parent.size
12124 child.mode = parent.mode
12126 # update instance structure
12127 instance.disks = new_disks
12128 instance.disk_template = constants.DT_PLAIN
12129 self.cfg.Update(instance, feedback_fn)
12131 # Release locks in case removing disks takes a while
12132 _ReleaseLocks(self, locking.LEVEL_NODE)
12134 feedback_fn("Removing volumes on the secondary node...")
12135 for disk in old_disks:
12136 self.cfg.SetDiskID(disk, snode)
12137 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12139 self.LogWarning("Could not remove block device %s on node %s,"
12140 " continuing anyway: %s", disk.iv_name, snode, msg)
12142 feedback_fn("Removing unneeded volumes on the primary node...")
12143 for idx, disk in enumerate(old_disks):
12144 meta = disk.children[1]
12145 self.cfg.SetDiskID(meta, pnode)
12146 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12148 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12149 " continuing anyway: %s", idx, pnode, msg)
12151 # this is a DRBD disk, return its port to the pool
12152 for disk in old_disks:
12153 tcp_port = disk.logical_id[2]
12154 self.cfg.AddTcpUdpPort(tcp_port)
12156 # Node resource locks will be released by caller
12158 def Exec(self, feedback_fn):
12159 """Modifies an instance.
12161 All parameters take effect only at the next restart of the instance.
12164 # Process here the warnings from CheckPrereq, as we don't have a
12165 # feedback_fn there.
12166 for warn in self.warn:
12167 feedback_fn("WARNING: %s" % warn)
12169 assert ((self.op.disk_template is None) ^
12170 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12171 "Not owning any node resource locks"
12174 instance = self.instance
12176 for disk_op, disk_dict in self.op.disks:
12177 if disk_op == constants.DDM_REMOVE:
12178 # remove the last disk
12179 device = instance.disks.pop()
12180 device_idx = len(instance.disks)
12181 for node, disk in device.ComputeNodeTree(instance.primary_node):
12182 self.cfg.SetDiskID(disk, node)
12183 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12185 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12186 " continuing anyway", device_idx, node, msg)
12187 result.append(("disk/%d" % device_idx, "remove"))
12189 # if this is a DRBD disk, return its port to the pool
12190 if device.dev_type in constants.LDS_DRBD:
12191 tcp_port = device.logical_id[2]
12192 self.cfg.AddTcpUdpPort(tcp_port)
12193 elif disk_op == constants.DDM_ADD:
12195 if instance.disk_template in (constants.DT_FILE,
12196 constants.DT_SHARED_FILE):
12197 file_driver, file_path = instance.disks[0].logical_id
12198 file_path = os.path.dirname(file_path)
12200 file_driver = file_path = None
12201 disk_idx_base = len(instance.disks)
12202 new_disk = _GenerateDiskTemplate(self,
12203 instance.disk_template,
12204 instance.name, instance.primary_node,
12205 instance.secondary_nodes,
12211 self.diskparams)[0]
12212 instance.disks.append(new_disk)
12213 info = _GetInstanceInfoText(instance)
12215 logging.info("Creating volume %s for instance %s",
12216 new_disk.iv_name, instance.name)
12217 # Note: this needs to be kept in sync with _CreateDisks
12219 for node in instance.all_nodes:
12220 f_create = node == instance.primary_node
12222 _CreateBlockDev(self, node, instance, new_disk,
12223 f_create, info, f_create)
12224 except errors.OpExecError, err:
12225 self.LogWarning("Failed to create volume %s (%s) on"
12227 new_disk.iv_name, new_disk, node, err)
12228 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12229 (new_disk.size, new_disk.mode)))
12231 # change a given disk
12232 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12233 result.append(("disk.mode/%d" % disk_op,
12234 disk_dict[constants.IDISK_MODE]))
12236 if self.op.disk_template:
12238 check_nodes = set(instance.all_nodes)
12239 if self.op.remote_node:
12240 check_nodes.add(self.op.remote_node)
12241 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12242 owned = self.owned_locks(level)
12243 assert not (check_nodes - owned), \
12244 ("Not owning the correct locks, owning %r, expected at least %r" %
12245 (owned, check_nodes))
12247 r_shut = _ShutdownInstanceDisks(self, instance)
12249 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12250 " proceed with disk template conversion")
12251 mode = (instance.disk_template, self.op.disk_template)
12253 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12255 self.cfg.ReleaseDRBDMinors(instance.name)
12257 result.append(("disk_template", self.op.disk_template))
12259 assert instance.disk_template == self.op.disk_template, \
12260 ("Expected disk template '%s', found '%s'" %
12261 (self.op.disk_template, instance.disk_template))
12263 # Release node and resource locks if there are any (they might already have
12264 # been released during disk conversion)
12265 _ReleaseLocks(self, locking.LEVEL_NODE)
12266 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12269 for nic_op, nic_dict in self.op.nics:
12270 if nic_op == constants.DDM_REMOVE:
12271 # remove the last nic
12272 del instance.nics[-1]
12273 result.append(("nic.%d" % len(instance.nics), "remove"))
12274 elif nic_op == constants.DDM_ADD:
12275 # mac and bridge should be set, by now
12276 mac = nic_dict[constants.INIC_MAC]
12277 ip = nic_dict.get(constants.INIC_IP, None)
12278 nicparams = self.nic_pinst[constants.DDM_ADD]
12279 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12280 instance.nics.append(new_nic)
12281 result.append(("nic.%d" % (len(instance.nics) - 1),
12282 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12283 (new_nic.mac, new_nic.ip,
12284 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12285 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12288 for key in (constants.INIC_MAC, constants.INIC_IP):
12289 if key in nic_dict:
12290 setattr(instance.nics[nic_op], key, nic_dict[key])
12291 if nic_op in self.nic_pinst:
12292 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12293 for key, val in nic_dict.iteritems():
12294 result.append(("nic.%s/%d" % (key, nic_op), val))
12297 if self.op.hvparams:
12298 instance.hvparams = self.hv_inst
12299 for key, val in self.op.hvparams.iteritems():
12300 result.append(("hv/%s" % key, val))
12303 if self.op.beparams:
12304 instance.beparams = self.be_inst
12305 for key, val in self.op.beparams.iteritems():
12306 result.append(("be/%s" % key, val))
12309 if self.op.os_name:
12310 instance.os = self.op.os_name
12313 if self.op.osparams:
12314 instance.osparams = self.os_inst
12315 for key, val in self.op.osparams.iteritems():
12316 result.append(("os/%s" % key, val))
12318 # online/offline instance
12319 if self.op.online_inst:
12320 self.cfg.MarkInstanceDown(instance.name)
12321 result.append(("admin_state", constants.ADMINST_DOWN))
12322 if self.op.offline_inst:
12323 self.cfg.MarkInstanceOffline(instance.name)
12324 result.append(("admin_state", constants.ADMINST_OFFLINE))
12326 self.cfg.Update(instance, feedback_fn)
12328 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12329 self.owned_locks(locking.LEVEL_NODE)), \
12330 "All node locks should have been released by now"
12334 _DISK_CONVERSIONS = {
12335 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12336 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12340 class LUInstanceChangeGroup(LogicalUnit):
12341 HPATH = "instance-change-group"
12342 HTYPE = constants.HTYPE_INSTANCE
12345 def ExpandNames(self):
12346 self.share_locks = _ShareAll()
12347 self.needed_locks = {
12348 locking.LEVEL_NODEGROUP: [],
12349 locking.LEVEL_NODE: [],
12352 self._ExpandAndLockInstance()
12354 if self.op.target_groups:
12355 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12356 self.op.target_groups)
12358 self.req_target_uuids = None
12360 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12362 def DeclareLocks(self, level):
12363 if level == locking.LEVEL_NODEGROUP:
12364 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12366 if self.req_target_uuids:
12367 lock_groups = set(self.req_target_uuids)
12369 # Lock all groups used by instance optimistically; this requires going
12370 # via the node before it's locked, requiring verification later on
12371 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12372 lock_groups.update(instance_groups)
12374 # No target groups, need to lock all of them
12375 lock_groups = locking.ALL_SET
12377 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12379 elif level == locking.LEVEL_NODE:
12380 if self.req_target_uuids:
12381 # Lock all nodes used by instances
12382 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12383 self._LockInstancesNodes()
12385 # Lock all nodes in all potential target groups
12386 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12387 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12388 member_nodes = [node_name
12389 for group in lock_groups
12390 for node_name in self.cfg.GetNodeGroup(group).members]
12391 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12393 # Lock all nodes as all groups are potential targets
12394 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12396 def CheckPrereq(self):
12397 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12398 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12399 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12401 assert (self.req_target_uuids is None or
12402 owned_groups.issuperset(self.req_target_uuids))
12403 assert owned_instances == set([self.op.instance_name])
12405 # Get instance information
12406 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12408 # Check if node groups for locked instance are still correct
12409 assert owned_nodes.issuperset(self.instance.all_nodes), \
12410 ("Instance %s's nodes changed while we kept the lock" %
12411 self.op.instance_name)
12413 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12416 if self.req_target_uuids:
12417 # User requested specific target groups
12418 self.target_uuids = self.req_target_uuids
12420 # All groups except those used by the instance are potential targets
12421 self.target_uuids = owned_groups - inst_groups
12423 conflicting_groups = self.target_uuids & inst_groups
12424 if conflicting_groups:
12425 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12426 " used by the instance '%s'" %
12427 (utils.CommaJoin(conflicting_groups),
12428 self.op.instance_name),
12429 errors.ECODE_INVAL)
12431 if not self.target_uuids:
12432 raise errors.OpPrereqError("There are no possible target groups",
12433 errors.ECODE_INVAL)
12435 def BuildHooksEnv(self):
12436 """Build hooks env.
12439 assert self.target_uuids
12442 "TARGET_GROUPS": " ".join(self.target_uuids),
12445 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12449 def BuildHooksNodes(self):
12450 """Build hooks nodes.
12453 mn = self.cfg.GetMasterNode()
12454 return ([mn], [mn])
12456 def Exec(self, feedback_fn):
12457 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12459 assert instances == [self.op.instance_name], "Instance not locked"
12461 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12462 instances=instances, target_groups=list(self.target_uuids))
12464 ial.Run(self.op.iallocator)
12466 if not ial.success:
12467 raise errors.OpPrereqError("Can't compute solution for changing group of"
12468 " instance '%s' using iallocator '%s': %s" %
12469 (self.op.instance_name, self.op.iallocator,
12471 errors.ECODE_NORES)
12473 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12475 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12476 " instance '%s'", len(jobs), self.op.instance_name)
12478 return ResultWithJobs(jobs)
12481 class LUBackupQuery(NoHooksLU):
12482 """Query the exports list
12487 def ExpandNames(self):
12488 self.needed_locks = {}
12489 self.share_locks[locking.LEVEL_NODE] = 1
12490 if not self.op.nodes:
12491 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12493 self.needed_locks[locking.LEVEL_NODE] = \
12494 _GetWantedNodes(self, self.op.nodes)
12496 def Exec(self, feedback_fn):
12497 """Compute the list of all the exported system images.
12500 @return: a dictionary with the structure node->(export-list)
12501 where export-list is a list of the instances exported on
12505 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12506 rpcresult = self.rpc.call_export_list(self.nodes)
12508 for node in rpcresult:
12509 if rpcresult[node].fail_msg:
12510 result[node] = False
12512 result[node] = rpcresult[node].payload
12517 class LUBackupPrepare(NoHooksLU):
12518 """Prepares an instance for an export and returns useful information.
12523 def ExpandNames(self):
12524 self._ExpandAndLockInstance()
12526 def CheckPrereq(self):
12527 """Check prerequisites.
12530 instance_name = self.op.instance_name
12532 self.instance = self.cfg.GetInstanceInfo(instance_name)
12533 assert self.instance is not None, \
12534 "Cannot retrieve locked instance %s" % self.op.instance_name
12535 _CheckNodeOnline(self, self.instance.primary_node)
12537 self._cds = _GetClusterDomainSecret()
12539 def Exec(self, feedback_fn):
12540 """Prepares an instance for an export.
12543 instance = self.instance
12545 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12546 salt = utils.GenerateSecret(8)
12548 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12549 result = self.rpc.call_x509_cert_create(instance.primary_node,
12550 constants.RIE_CERT_VALIDITY)
12551 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12553 (name, cert_pem) = result.payload
12555 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12559 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12560 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12562 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12568 class LUBackupExport(LogicalUnit):
12569 """Export an instance to an image in the cluster.
12572 HPATH = "instance-export"
12573 HTYPE = constants.HTYPE_INSTANCE
12576 def CheckArguments(self):
12577 """Check the arguments.
12580 self.x509_key_name = self.op.x509_key_name
12581 self.dest_x509_ca_pem = self.op.destination_x509_ca
12583 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12584 if not self.x509_key_name:
12585 raise errors.OpPrereqError("Missing X509 key name for encryption",
12586 errors.ECODE_INVAL)
12588 if not self.dest_x509_ca_pem:
12589 raise errors.OpPrereqError("Missing destination X509 CA",
12590 errors.ECODE_INVAL)
12592 def ExpandNames(self):
12593 self._ExpandAndLockInstance()
12595 # Lock all nodes for local exports
12596 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12597 # FIXME: lock only instance primary and destination node
12599 # Sad but true, for now we have do lock all nodes, as we don't know where
12600 # the previous export might be, and in this LU we search for it and
12601 # remove it from its current node. In the future we could fix this by:
12602 # - making a tasklet to search (share-lock all), then create the
12603 # new one, then one to remove, after
12604 # - removing the removal operation altogether
12605 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12607 def DeclareLocks(self, level):
12608 """Last minute lock declaration."""
12609 # All nodes are locked anyway, so nothing to do here.
12611 def BuildHooksEnv(self):
12612 """Build hooks env.
12614 This will run on the master, primary node and target node.
12618 "EXPORT_MODE": self.op.mode,
12619 "EXPORT_NODE": self.op.target_node,
12620 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12621 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12622 # TODO: Generic function for boolean env variables
12623 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12626 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12630 def BuildHooksNodes(self):
12631 """Build hooks nodes.
12634 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12636 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12637 nl.append(self.op.target_node)
12641 def CheckPrereq(self):
12642 """Check prerequisites.
12644 This checks that the instance and node names are valid.
12647 instance_name = self.op.instance_name
12649 self.instance = self.cfg.GetInstanceInfo(instance_name)
12650 assert self.instance is not None, \
12651 "Cannot retrieve locked instance %s" % self.op.instance_name
12652 _CheckNodeOnline(self, self.instance.primary_node)
12654 if (self.op.remove_instance and
12655 self.instance.admin_state == constants.ADMINST_UP and
12656 not self.op.shutdown):
12657 raise errors.OpPrereqError("Can not remove instance without shutting it"
12660 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12661 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12662 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12663 assert self.dst_node is not None
12665 _CheckNodeOnline(self, self.dst_node.name)
12666 _CheckNodeNotDrained(self, self.dst_node.name)
12669 self.dest_disk_info = None
12670 self.dest_x509_ca = None
12672 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12673 self.dst_node = None
12675 if len(self.op.target_node) != len(self.instance.disks):
12676 raise errors.OpPrereqError(("Received destination information for %s"
12677 " disks, but instance %s has %s disks") %
12678 (len(self.op.target_node), instance_name,
12679 len(self.instance.disks)),
12680 errors.ECODE_INVAL)
12682 cds = _GetClusterDomainSecret()
12684 # Check X509 key name
12686 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12687 except (TypeError, ValueError), err:
12688 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12690 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12691 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12692 errors.ECODE_INVAL)
12694 # Load and verify CA
12696 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12697 except OpenSSL.crypto.Error, err:
12698 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12699 (err, ), errors.ECODE_INVAL)
12701 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12702 if errcode is not None:
12703 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12704 (msg, ), errors.ECODE_INVAL)
12706 self.dest_x509_ca = cert
12708 # Verify target information
12710 for idx, disk_data in enumerate(self.op.target_node):
12712 (host, port, magic) = \
12713 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12714 except errors.GenericError, err:
12715 raise errors.OpPrereqError("Target info for disk %s: %s" %
12716 (idx, err), errors.ECODE_INVAL)
12718 disk_info.append((host, port, magic))
12720 assert len(disk_info) == len(self.op.target_node)
12721 self.dest_disk_info = disk_info
12724 raise errors.ProgrammerError("Unhandled export mode %r" %
12727 # instance disk type verification
12728 # TODO: Implement export support for file-based disks
12729 for disk in self.instance.disks:
12730 if disk.dev_type == constants.LD_FILE:
12731 raise errors.OpPrereqError("Export not supported for instances with"
12732 " file-based disks", errors.ECODE_INVAL)
12734 def _CleanupExports(self, feedback_fn):
12735 """Removes exports of current instance from all other nodes.
12737 If an instance in a cluster with nodes A..D was exported to node C, its
12738 exports will be removed from the nodes A, B and D.
12741 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12743 nodelist = self.cfg.GetNodeList()
12744 nodelist.remove(self.dst_node.name)
12746 # on one-node clusters nodelist will be empty after the removal
12747 # if we proceed the backup would be removed because OpBackupQuery
12748 # substitutes an empty list with the full cluster node list.
12749 iname = self.instance.name
12751 feedback_fn("Removing old exports for instance %s" % iname)
12752 exportlist = self.rpc.call_export_list(nodelist)
12753 for node in exportlist:
12754 if exportlist[node].fail_msg:
12756 if iname in exportlist[node].payload:
12757 msg = self.rpc.call_export_remove(node, iname).fail_msg
12759 self.LogWarning("Could not remove older export for instance %s"
12760 " on node %s: %s", iname, node, msg)
12762 def Exec(self, feedback_fn):
12763 """Export an instance to an image in the cluster.
12766 assert self.op.mode in constants.EXPORT_MODES
12768 instance = self.instance
12769 src_node = instance.primary_node
12771 if self.op.shutdown:
12772 # shutdown the instance, but not the disks
12773 feedback_fn("Shutting down instance %s" % instance.name)
12774 result = self.rpc.call_instance_shutdown(src_node, instance,
12775 self.op.shutdown_timeout)
12776 # TODO: Maybe ignore failures if ignore_remove_failures is set
12777 result.Raise("Could not shutdown instance %s on"
12778 " node %s" % (instance.name, src_node))
12780 # set the disks ID correctly since call_instance_start needs the
12781 # correct drbd minor to create the symlinks
12782 for disk in instance.disks:
12783 self.cfg.SetDiskID(disk, src_node)
12785 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12788 # Activate the instance disks if we'exporting a stopped instance
12789 feedback_fn("Activating disks for %s" % instance.name)
12790 _StartInstanceDisks(self, instance, None)
12793 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12796 helper.CreateSnapshots()
12798 if (self.op.shutdown and
12799 instance.admin_state == constants.ADMINST_UP and
12800 not self.op.remove_instance):
12801 assert not activate_disks
12802 feedback_fn("Starting instance %s" % instance.name)
12803 result = self.rpc.call_instance_start(src_node,
12804 (instance, None, None), False)
12805 msg = result.fail_msg
12807 feedback_fn("Failed to start instance: %s" % msg)
12808 _ShutdownInstanceDisks(self, instance)
12809 raise errors.OpExecError("Could not start instance: %s" % msg)
12811 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12812 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12813 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12814 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12815 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12817 (key_name, _, _) = self.x509_key_name
12820 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12823 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12824 key_name, dest_ca_pem,
12829 # Check for backwards compatibility
12830 assert len(dresults) == len(instance.disks)
12831 assert compat.all(isinstance(i, bool) for i in dresults), \
12832 "Not all results are boolean: %r" % dresults
12836 feedback_fn("Deactivating disks for %s" % instance.name)
12837 _ShutdownInstanceDisks(self, instance)
12839 if not (compat.all(dresults) and fin_resu):
12842 failures.append("export finalization")
12843 if not compat.all(dresults):
12844 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12846 failures.append("disk export: disk(s) %s" % fdsk)
12848 raise errors.OpExecError("Export failed, errors in %s" %
12849 utils.CommaJoin(failures))
12851 # At this point, the export was successful, we can cleanup/finish
12853 # Remove instance if requested
12854 if self.op.remove_instance:
12855 feedback_fn("Removing instance %s" % instance.name)
12856 _RemoveInstance(self, feedback_fn, instance,
12857 self.op.ignore_remove_failures)
12859 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12860 self._CleanupExports(feedback_fn)
12862 return fin_resu, dresults
12865 class LUBackupRemove(NoHooksLU):
12866 """Remove exports related to the named instance.
12871 def ExpandNames(self):
12872 self.needed_locks = {}
12873 # We need all nodes to be locked in order for RemoveExport to work, but we
12874 # don't need to lock the instance itself, as nothing will happen to it (and
12875 # we can remove exports also for a removed instance)
12876 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12878 def Exec(self, feedback_fn):
12879 """Remove any export.
12882 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12883 # If the instance was not found we'll try with the name that was passed in.
12884 # This will only work if it was an FQDN, though.
12886 if not instance_name:
12888 instance_name = self.op.instance_name
12890 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12891 exportlist = self.rpc.call_export_list(locked_nodes)
12893 for node in exportlist:
12894 msg = exportlist[node].fail_msg
12896 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12898 if instance_name in exportlist[node].payload:
12900 result = self.rpc.call_export_remove(node, instance_name)
12901 msg = result.fail_msg
12903 logging.error("Could not remove export for instance %s"
12904 " on node %s: %s", instance_name, node, msg)
12906 if fqdn_warn and not found:
12907 feedback_fn("Export not found. If trying to remove an export belonging"
12908 " to a deleted instance please use its Fully Qualified"
12912 class LUGroupAdd(LogicalUnit):
12913 """Logical unit for creating node groups.
12916 HPATH = "group-add"
12917 HTYPE = constants.HTYPE_GROUP
12920 def ExpandNames(self):
12921 # We need the new group's UUID here so that we can create and acquire the
12922 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12923 # that it should not check whether the UUID exists in the configuration.
12924 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12925 self.needed_locks = {}
12926 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12928 def CheckPrereq(self):
12929 """Check prerequisites.
12931 This checks that the given group name is not an existing node group
12936 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12937 except errors.OpPrereqError:
12940 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12941 " node group (UUID: %s)" %
12942 (self.op.group_name, existing_uuid),
12943 errors.ECODE_EXISTS)
12945 if self.op.ndparams:
12946 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12948 if self.op.hv_state:
12949 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12951 self.new_hv_state = None
12953 if self.op.disk_state:
12954 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12956 self.new_disk_state = None
12958 if self.op.diskparams:
12959 for templ in constants.DISK_TEMPLATES:
12960 if templ not in self.op.diskparams:
12961 self.op.diskparams[templ] = {}
12962 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12964 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12966 if self.op.ipolicy:
12967 cluster = self.cfg.GetClusterInfo()
12968 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12969 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12971 def BuildHooksEnv(self):
12972 """Build hooks env.
12976 "GROUP_NAME": self.op.group_name,
12979 def BuildHooksNodes(self):
12980 """Build hooks nodes.
12983 mn = self.cfg.GetMasterNode()
12984 return ([mn], [mn])
12986 def Exec(self, feedback_fn):
12987 """Add the node group to the cluster.
12990 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12991 uuid=self.group_uuid,
12992 alloc_policy=self.op.alloc_policy,
12993 ndparams=self.op.ndparams,
12994 diskparams=self.op.diskparams,
12995 ipolicy=self.op.ipolicy,
12996 hv_state_static=self.new_hv_state,
12997 disk_state_static=self.new_disk_state)
12999 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13000 del self.remove_locks[locking.LEVEL_NODEGROUP]
13003 class LUGroupAssignNodes(NoHooksLU):
13004 """Logical unit for assigning nodes to groups.
13009 def ExpandNames(self):
13010 # These raise errors.OpPrereqError on their own:
13011 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13012 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13014 # We want to lock all the affected nodes and groups. We have readily
13015 # available the list of nodes, and the *destination* group. To gather the
13016 # list of "source" groups, we need to fetch node information later on.
13017 self.needed_locks = {
13018 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13019 locking.LEVEL_NODE: self.op.nodes,
13022 def DeclareLocks(self, level):
13023 if level == locking.LEVEL_NODEGROUP:
13024 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13026 # Try to get all affected nodes' groups without having the group or node
13027 # lock yet. Needs verification later in the code flow.
13028 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13030 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13032 def CheckPrereq(self):
13033 """Check prerequisites.
13036 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13037 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13038 frozenset(self.op.nodes))
13040 expected_locks = (set([self.group_uuid]) |
13041 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13042 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13043 if actual_locks != expected_locks:
13044 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13045 " current groups are '%s', used to be '%s'" %
13046 (utils.CommaJoin(expected_locks),
13047 utils.CommaJoin(actual_locks)))
13049 self.node_data = self.cfg.GetAllNodesInfo()
13050 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13051 instance_data = self.cfg.GetAllInstancesInfo()
13053 if self.group is None:
13054 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13055 (self.op.group_name, self.group_uuid))
13057 (new_splits, previous_splits) = \
13058 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13059 for node in self.op.nodes],
13060 self.node_data, instance_data)
13063 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13065 if not self.op.force:
13066 raise errors.OpExecError("The following instances get split by this"
13067 " change and --force was not given: %s" %
13070 self.LogWarning("This operation will split the following instances: %s",
13073 if previous_splits:
13074 self.LogWarning("In addition, these already-split instances continue"
13075 " to be split across groups: %s",
13076 utils.CommaJoin(utils.NiceSort(previous_splits)))
13078 def Exec(self, feedback_fn):
13079 """Assign nodes to a new group.
13082 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13084 self.cfg.AssignGroupNodes(mods)
13087 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13088 """Check for split instances after a node assignment.
13090 This method considers a series of node assignments as an atomic operation,
13091 and returns information about split instances after applying the set of
13094 In particular, it returns information about newly split instances, and
13095 instances that were already split, and remain so after the change.
13097 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13100 @type changes: list of (node_name, new_group_uuid) pairs.
13101 @param changes: list of node assignments to consider.
13102 @param node_data: a dict with data for all nodes
13103 @param instance_data: a dict with all instances to consider
13104 @rtype: a two-tuple
13105 @return: a list of instances that were previously okay and result split as a
13106 consequence of this change, and a list of instances that were previously
13107 split and this change does not fix.
13110 changed_nodes = dict((node, group) for node, group in changes
13111 if node_data[node].group != group)
13113 all_split_instances = set()
13114 previously_split_instances = set()
13116 def InstanceNodes(instance):
13117 return [instance.primary_node] + list(instance.secondary_nodes)
13119 for inst in instance_data.values():
13120 if inst.disk_template not in constants.DTS_INT_MIRROR:
13123 instance_nodes = InstanceNodes(inst)
13125 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13126 previously_split_instances.add(inst.name)
13128 if len(set(changed_nodes.get(node, node_data[node].group)
13129 for node in instance_nodes)) > 1:
13130 all_split_instances.add(inst.name)
13132 return (list(all_split_instances - previously_split_instances),
13133 list(previously_split_instances & all_split_instances))
13136 class _GroupQuery(_QueryBase):
13137 FIELDS = query.GROUP_FIELDS
13139 def ExpandNames(self, lu):
13140 lu.needed_locks = {}
13142 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13143 self._cluster = lu.cfg.GetClusterInfo()
13144 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13147 self.wanted = [name_to_uuid[name]
13148 for name in utils.NiceSort(name_to_uuid.keys())]
13150 # Accept names to be either names or UUIDs.
13153 all_uuid = frozenset(self._all_groups.keys())
13155 for name in self.names:
13156 if name in all_uuid:
13157 self.wanted.append(name)
13158 elif name in name_to_uuid:
13159 self.wanted.append(name_to_uuid[name])
13161 missing.append(name)
13164 raise errors.OpPrereqError("Some groups do not exist: %s" %
13165 utils.CommaJoin(missing),
13166 errors.ECODE_NOENT)
13168 def DeclareLocks(self, lu, level):
13171 def _GetQueryData(self, lu):
13172 """Computes the list of node groups and their attributes.
13175 do_nodes = query.GQ_NODE in self.requested_data
13176 do_instances = query.GQ_INST in self.requested_data
13178 group_to_nodes = None
13179 group_to_instances = None
13181 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13182 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13183 # latter GetAllInstancesInfo() is not enough, for we have to go through
13184 # instance->node. Hence, we will need to process nodes even if we only need
13185 # instance information.
13186 if do_nodes or do_instances:
13187 all_nodes = lu.cfg.GetAllNodesInfo()
13188 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13191 for node in all_nodes.values():
13192 if node.group in group_to_nodes:
13193 group_to_nodes[node.group].append(node.name)
13194 node_to_group[node.name] = node.group
13197 all_instances = lu.cfg.GetAllInstancesInfo()
13198 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13200 for instance in all_instances.values():
13201 node = instance.primary_node
13202 if node in node_to_group:
13203 group_to_instances[node_to_group[node]].append(instance.name)
13206 # Do not pass on node information if it was not requested.
13207 group_to_nodes = None
13209 return query.GroupQueryData(self._cluster,
13210 [self._all_groups[uuid]
13211 for uuid in self.wanted],
13212 group_to_nodes, group_to_instances)
13215 class LUGroupQuery(NoHooksLU):
13216 """Logical unit for querying node groups.
13221 def CheckArguments(self):
13222 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13223 self.op.output_fields, False)
13225 def ExpandNames(self):
13226 self.gq.ExpandNames(self)
13228 def DeclareLocks(self, level):
13229 self.gq.DeclareLocks(self, level)
13231 def Exec(self, feedback_fn):
13232 return self.gq.OldStyleQuery(self)
13235 class LUGroupSetParams(LogicalUnit):
13236 """Modifies the parameters of a node group.
13239 HPATH = "group-modify"
13240 HTYPE = constants.HTYPE_GROUP
13243 def CheckArguments(self):
13246 self.op.diskparams,
13247 self.op.alloc_policy,
13249 self.op.disk_state,
13253 if all_changes.count(None) == len(all_changes):
13254 raise errors.OpPrereqError("Please pass at least one modification",
13255 errors.ECODE_INVAL)
13257 def ExpandNames(self):
13258 # This raises errors.OpPrereqError on its own:
13259 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13261 self.needed_locks = {
13262 locking.LEVEL_NODEGROUP: [self.group_uuid],
13265 def CheckPrereq(self):
13266 """Check prerequisites.
13269 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13271 if self.group is None:
13272 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13273 (self.op.group_name, self.group_uuid))
13275 if self.op.ndparams:
13276 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13277 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13278 self.new_ndparams = new_ndparams
13280 if self.op.diskparams:
13281 self.new_diskparams = dict()
13282 for templ in constants.DISK_TEMPLATES:
13283 if templ not in self.op.diskparams:
13284 self.op.diskparams[templ] = {}
13285 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13286 self.op.diskparams[templ])
13287 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13288 self.new_diskparams[templ] = new_templ_params
13290 if self.op.hv_state:
13291 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13292 self.group.hv_state_static)
13294 if self.op.disk_state:
13295 self.new_disk_state = \
13296 _MergeAndVerifyDiskState(self.op.disk_state,
13297 self.group.disk_state_static)
13299 if self.op.ipolicy:
13301 for key, value in self.op.ipolicy.iteritems():
13302 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13305 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13306 self.new_ipolicy = g_ipolicy
13307 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13309 def BuildHooksEnv(self):
13310 """Build hooks env.
13314 "GROUP_NAME": self.op.group_name,
13315 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13318 def BuildHooksNodes(self):
13319 """Build hooks nodes.
13322 mn = self.cfg.GetMasterNode()
13323 return ([mn], [mn])
13325 def Exec(self, feedback_fn):
13326 """Modifies the node group.
13331 if self.op.ndparams:
13332 self.group.ndparams = self.new_ndparams
13333 result.append(("ndparams", str(self.group.ndparams)))
13335 if self.op.diskparams:
13336 self.group.diskparams = self.new_diskparams
13337 result.append(("diskparams", str(self.group.diskparams)))
13339 if self.op.alloc_policy:
13340 self.group.alloc_policy = self.op.alloc_policy
13342 if self.op.hv_state:
13343 self.group.hv_state_static = self.new_hv_state
13345 if self.op.disk_state:
13346 self.group.disk_state_static = self.new_disk_state
13348 if self.op.ipolicy:
13349 self.group.ipolicy = self.new_ipolicy
13351 self.cfg.Update(self.group, feedback_fn)
13355 class LUGroupRemove(LogicalUnit):
13356 HPATH = "group-remove"
13357 HTYPE = constants.HTYPE_GROUP
13360 def ExpandNames(self):
13361 # This will raises errors.OpPrereqError on its own:
13362 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13363 self.needed_locks = {
13364 locking.LEVEL_NODEGROUP: [self.group_uuid],
13367 def CheckPrereq(self):
13368 """Check prerequisites.
13370 This checks that the given group name exists as a node group, that is
13371 empty (i.e., contains no nodes), and that is not the last group of the
13375 # Verify that the group is empty.
13376 group_nodes = [node.name
13377 for node in self.cfg.GetAllNodesInfo().values()
13378 if node.group == self.group_uuid]
13381 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13383 (self.op.group_name,
13384 utils.CommaJoin(utils.NiceSort(group_nodes))),
13385 errors.ECODE_STATE)
13387 # Verify the cluster would not be left group-less.
13388 if len(self.cfg.GetNodeGroupList()) == 1:
13389 raise errors.OpPrereqError("Group '%s' is the only group,"
13390 " cannot be removed" %
13391 self.op.group_name,
13392 errors.ECODE_STATE)
13394 def BuildHooksEnv(self):
13395 """Build hooks env.
13399 "GROUP_NAME": self.op.group_name,
13402 def BuildHooksNodes(self):
13403 """Build hooks nodes.
13406 mn = self.cfg.GetMasterNode()
13407 return ([mn], [mn])
13409 def Exec(self, feedback_fn):
13410 """Remove the node group.
13414 self.cfg.RemoveNodeGroup(self.group_uuid)
13415 except errors.ConfigurationError:
13416 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13417 (self.op.group_name, self.group_uuid))
13419 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13422 class LUGroupRename(LogicalUnit):
13423 HPATH = "group-rename"
13424 HTYPE = constants.HTYPE_GROUP
13427 def ExpandNames(self):
13428 # This raises errors.OpPrereqError on its own:
13429 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13431 self.needed_locks = {
13432 locking.LEVEL_NODEGROUP: [self.group_uuid],
13435 def CheckPrereq(self):
13436 """Check prerequisites.
13438 Ensures requested new name is not yet used.
13442 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13443 except errors.OpPrereqError:
13446 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13447 " node group (UUID: %s)" %
13448 (self.op.new_name, new_name_uuid),
13449 errors.ECODE_EXISTS)
13451 def BuildHooksEnv(self):
13452 """Build hooks env.
13456 "OLD_NAME": self.op.group_name,
13457 "NEW_NAME": self.op.new_name,
13460 def BuildHooksNodes(self):
13461 """Build hooks nodes.
13464 mn = self.cfg.GetMasterNode()
13466 all_nodes = self.cfg.GetAllNodesInfo()
13467 all_nodes.pop(mn, None)
13470 run_nodes.extend(node.name for node in all_nodes.values()
13471 if node.group == self.group_uuid)
13473 return (run_nodes, run_nodes)
13475 def Exec(self, feedback_fn):
13476 """Rename the node group.
13479 group = self.cfg.GetNodeGroup(self.group_uuid)
13482 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13483 (self.op.group_name, self.group_uuid))
13485 group.name = self.op.new_name
13486 self.cfg.Update(group, feedback_fn)
13488 return self.op.new_name
13491 class LUGroupEvacuate(LogicalUnit):
13492 HPATH = "group-evacuate"
13493 HTYPE = constants.HTYPE_GROUP
13496 def ExpandNames(self):
13497 # This raises errors.OpPrereqError on its own:
13498 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13500 if self.op.target_groups:
13501 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13502 self.op.target_groups)
13504 self.req_target_uuids = []
13506 if self.group_uuid in self.req_target_uuids:
13507 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13508 " as a target group (targets are %s)" %
13510 utils.CommaJoin(self.req_target_uuids)),
13511 errors.ECODE_INVAL)
13513 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13515 self.share_locks = _ShareAll()
13516 self.needed_locks = {
13517 locking.LEVEL_INSTANCE: [],
13518 locking.LEVEL_NODEGROUP: [],
13519 locking.LEVEL_NODE: [],
13522 def DeclareLocks(self, level):
13523 if level == locking.LEVEL_INSTANCE:
13524 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13526 # Lock instances optimistically, needs verification once node and group
13527 # locks have been acquired
13528 self.needed_locks[locking.LEVEL_INSTANCE] = \
13529 self.cfg.GetNodeGroupInstances(self.group_uuid)
13531 elif level == locking.LEVEL_NODEGROUP:
13532 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13534 if self.req_target_uuids:
13535 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13537 # Lock all groups used by instances optimistically; this requires going
13538 # via the node before it's locked, requiring verification later on
13539 lock_groups.update(group_uuid
13540 for instance_name in
13541 self.owned_locks(locking.LEVEL_INSTANCE)
13543 self.cfg.GetInstanceNodeGroups(instance_name))
13545 # No target groups, need to lock all of them
13546 lock_groups = locking.ALL_SET
13548 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13550 elif level == locking.LEVEL_NODE:
13551 # This will only lock the nodes in the group to be evacuated which
13552 # contain actual instances
13553 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13554 self._LockInstancesNodes()
13556 # Lock all nodes in group to be evacuated and target groups
13557 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13558 assert self.group_uuid in owned_groups
13559 member_nodes = [node_name
13560 for group in owned_groups
13561 for node_name in self.cfg.GetNodeGroup(group).members]
13562 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13564 def CheckPrereq(self):
13565 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13566 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13567 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13569 assert owned_groups.issuperset(self.req_target_uuids)
13570 assert self.group_uuid in owned_groups
13572 # Check if locked instances are still correct
13573 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13575 # Get instance information
13576 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13578 # Check if node groups for locked instances are still correct
13579 for instance_name in owned_instances:
13580 inst = self.instances[instance_name]
13581 assert owned_nodes.issuperset(inst.all_nodes), \
13582 "Instance %s's nodes changed while we kept the lock" % instance_name
13584 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13587 assert self.group_uuid in inst_groups, \
13588 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13590 if self.req_target_uuids:
13591 # User requested specific target groups
13592 self.target_uuids = self.req_target_uuids
13594 # All groups except the one to be evacuated are potential targets
13595 self.target_uuids = [group_uuid for group_uuid in owned_groups
13596 if group_uuid != self.group_uuid]
13598 if not self.target_uuids:
13599 raise errors.OpPrereqError("There are no possible target groups",
13600 errors.ECODE_INVAL)
13602 def BuildHooksEnv(self):
13603 """Build hooks env.
13607 "GROUP_NAME": self.op.group_name,
13608 "TARGET_GROUPS": " ".join(self.target_uuids),
13611 def BuildHooksNodes(self):
13612 """Build hooks nodes.
13615 mn = self.cfg.GetMasterNode()
13617 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13619 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13621 return (run_nodes, run_nodes)
13623 def Exec(self, feedback_fn):
13624 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13626 assert self.group_uuid not in self.target_uuids
13628 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13629 instances=instances, target_groups=self.target_uuids)
13631 ial.Run(self.op.iallocator)
13633 if not ial.success:
13634 raise errors.OpPrereqError("Can't compute group evacuation using"
13635 " iallocator '%s': %s" %
13636 (self.op.iallocator, ial.info),
13637 errors.ECODE_NORES)
13639 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13641 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13642 len(jobs), self.op.group_name)
13644 return ResultWithJobs(jobs)
13647 class TagsLU(NoHooksLU): # pylint: disable=W0223
13648 """Generic tags LU.
13650 This is an abstract class which is the parent of all the other tags LUs.
13653 def ExpandNames(self):
13654 self.group_uuid = None
13655 self.needed_locks = {}
13656 if self.op.kind == constants.TAG_NODE:
13657 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13658 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13659 elif self.op.kind == constants.TAG_INSTANCE:
13660 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13661 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13662 elif self.op.kind == constants.TAG_NODEGROUP:
13663 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13665 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13666 # not possible to acquire the BGL based on opcode parameters)
13668 def CheckPrereq(self):
13669 """Check prerequisites.
13672 if self.op.kind == constants.TAG_CLUSTER:
13673 self.target = self.cfg.GetClusterInfo()
13674 elif self.op.kind == constants.TAG_NODE:
13675 self.target = self.cfg.GetNodeInfo(self.op.name)
13676 elif self.op.kind == constants.TAG_INSTANCE:
13677 self.target = self.cfg.GetInstanceInfo(self.op.name)
13678 elif self.op.kind == constants.TAG_NODEGROUP:
13679 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13681 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13682 str(self.op.kind), errors.ECODE_INVAL)
13685 class LUTagsGet(TagsLU):
13686 """Returns the tags of a given object.
13691 def ExpandNames(self):
13692 TagsLU.ExpandNames(self)
13694 # Share locks as this is only a read operation
13695 self.share_locks = _ShareAll()
13697 def Exec(self, feedback_fn):
13698 """Returns the tag list.
13701 return list(self.target.GetTags())
13704 class LUTagsSearch(NoHooksLU):
13705 """Searches the tags for a given pattern.
13710 def ExpandNames(self):
13711 self.needed_locks = {}
13713 def CheckPrereq(self):
13714 """Check prerequisites.
13716 This checks the pattern passed for validity by compiling it.
13720 self.re = re.compile(self.op.pattern)
13721 except re.error, err:
13722 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13723 (self.op.pattern, err), errors.ECODE_INVAL)
13725 def Exec(self, feedback_fn):
13726 """Returns the tag list.
13730 tgts = [("/cluster", cfg.GetClusterInfo())]
13731 ilist = cfg.GetAllInstancesInfo().values()
13732 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13733 nlist = cfg.GetAllNodesInfo().values()
13734 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13735 tgts.extend(("/nodegroup/%s" % n.name, n)
13736 for n in cfg.GetAllNodeGroupsInfo().values())
13738 for path, target in tgts:
13739 for tag in target.GetTags():
13740 if self.re.search(tag):
13741 results.append((path, tag))
13745 class LUTagsSet(TagsLU):
13746 """Sets a tag on a given object.
13751 def CheckPrereq(self):
13752 """Check prerequisites.
13754 This checks the type and length of the tag name and value.
13757 TagsLU.CheckPrereq(self)
13758 for tag in self.op.tags:
13759 objects.TaggableObject.ValidateTag(tag)
13761 def Exec(self, feedback_fn):
13766 for tag in self.op.tags:
13767 self.target.AddTag(tag)
13768 except errors.TagError, err:
13769 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13770 self.cfg.Update(self.target, feedback_fn)
13773 class LUTagsDel(TagsLU):
13774 """Delete a list of tags from a given object.
13779 def CheckPrereq(self):
13780 """Check prerequisites.
13782 This checks that we have the given tag.
13785 TagsLU.CheckPrereq(self)
13786 for tag in self.op.tags:
13787 objects.TaggableObject.ValidateTag(tag)
13788 del_tags = frozenset(self.op.tags)
13789 cur_tags = self.target.GetTags()
13791 diff_tags = del_tags - cur_tags
13793 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13794 raise errors.OpPrereqError("Tag(s) %s not found" %
13795 (utils.CommaJoin(diff_names), ),
13796 errors.ECODE_NOENT)
13798 def Exec(self, feedback_fn):
13799 """Remove the tag from the object.
13802 for tag in self.op.tags:
13803 self.target.RemoveTag(tag)
13804 self.cfg.Update(self.target, feedback_fn)
13807 class LUTestDelay(NoHooksLU):
13808 """Sleep for a specified amount of time.
13810 This LU sleeps on the master and/or nodes for a specified amount of
13816 def ExpandNames(self):
13817 """Expand names and set required locks.
13819 This expands the node list, if any.
13822 self.needed_locks = {}
13823 if self.op.on_nodes:
13824 # _GetWantedNodes can be used here, but is not always appropriate to use
13825 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13826 # more information.
13827 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13828 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13830 def _TestDelay(self):
13831 """Do the actual sleep.
13834 if self.op.on_master:
13835 if not utils.TestDelay(self.op.duration):
13836 raise errors.OpExecError("Error during master delay test")
13837 if self.op.on_nodes:
13838 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13839 for node, node_result in result.items():
13840 node_result.Raise("Failure during rpc call to node %s" % node)
13842 def Exec(self, feedback_fn):
13843 """Execute the test delay opcode, with the wanted repetitions.
13846 if self.op.repeat == 0:
13849 top_value = self.op.repeat - 1
13850 for i in range(self.op.repeat):
13851 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13855 class LUTestJqueue(NoHooksLU):
13856 """Utility LU to test some aspects of the job queue.
13861 # Must be lower than default timeout for WaitForJobChange to see whether it
13862 # notices changed jobs
13863 _CLIENT_CONNECT_TIMEOUT = 20.0
13864 _CLIENT_CONFIRM_TIMEOUT = 60.0
13867 def _NotifyUsingSocket(cls, cb, errcls):
13868 """Opens a Unix socket and waits for another program to connect.
13871 @param cb: Callback to send socket name to client
13872 @type errcls: class
13873 @param errcls: Exception class to use for errors
13876 # Using a temporary directory as there's no easy way to create temporary
13877 # sockets without writing a custom loop around tempfile.mktemp and
13879 tmpdir = tempfile.mkdtemp()
13881 tmpsock = utils.PathJoin(tmpdir, "sock")
13883 logging.debug("Creating temporary socket at %s", tmpsock)
13884 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13889 # Send details to client
13892 # Wait for client to connect before continuing
13893 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13895 (conn, _) = sock.accept()
13896 except socket.error, err:
13897 raise errcls("Client didn't connect in time (%s)" % err)
13901 # Remove as soon as client is connected
13902 shutil.rmtree(tmpdir)
13904 # Wait for client to close
13907 # pylint: disable=E1101
13908 # Instance of '_socketobject' has no ... member
13909 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13911 except socket.error, err:
13912 raise errcls("Client failed to confirm notification (%s)" % err)
13916 def _SendNotification(self, test, arg, sockname):
13917 """Sends a notification to the client.
13920 @param test: Test name
13921 @param arg: Test argument (depends on test)
13922 @type sockname: string
13923 @param sockname: Socket path
13926 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13928 def _Notify(self, prereq, test, arg):
13929 """Notifies the client of a test.
13932 @param prereq: Whether this is a prereq-phase test
13934 @param test: Test name
13935 @param arg: Test argument (depends on test)
13939 errcls = errors.OpPrereqError
13941 errcls = errors.OpExecError
13943 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13947 def CheckArguments(self):
13948 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13949 self.expandnames_calls = 0
13951 def ExpandNames(self):
13952 checkargs_calls = getattr(self, "checkargs_calls", 0)
13953 if checkargs_calls < 1:
13954 raise errors.ProgrammerError("CheckArguments was not called")
13956 self.expandnames_calls += 1
13958 if self.op.notify_waitlock:
13959 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13961 self.LogInfo("Expanding names")
13963 # Get lock on master node (just to get a lock, not for a particular reason)
13964 self.needed_locks = {
13965 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13968 def Exec(self, feedback_fn):
13969 if self.expandnames_calls < 1:
13970 raise errors.ProgrammerError("ExpandNames was not called")
13972 if self.op.notify_exec:
13973 self._Notify(False, constants.JQT_EXEC, None)
13975 self.LogInfo("Executing")
13977 if self.op.log_messages:
13978 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13979 for idx, msg in enumerate(self.op.log_messages):
13980 self.LogInfo("Sending log message %s", idx + 1)
13981 feedback_fn(constants.JQT_MSGPREFIX + msg)
13982 # Report how many test messages have been sent
13983 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13986 raise errors.OpExecError("Opcode failure was requested")
13991 class IAllocator(object):
13992 """IAllocator framework.
13994 An IAllocator instance has three sets of attributes:
13995 - cfg that is needed to query the cluster
13996 - input data (all members of the _KEYS class attribute are required)
13997 - four buffer attributes (in|out_data|text), that represent the
13998 input (to the external script) in text and data structure format,
13999 and the output from it, again in two formats
14000 - the result variables from the script (success, info, nodes) for
14004 # pylint: disable=R0902
14005 # lots of instance attributes
14007 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14009 self.rpc = rpc_runner
14010 # init buffer variables
14011 self.in_text = self.out_text = self.in_data = self.out_data = None
14012 # init all input fields so that pylint is happy
14014 self.memory = self.disks = self.disk_template = None
14015 self.os = self.tags = self.nics = self.vcpus = None
14016 self.hypervisor = None
14017 self.relocate_from = None
14019 self.instances = None
14020 self.evac_mode = None
14021 self.target_groups = []
14023 self.required_nodes = None
14024 # init result fields
14025 self.success = self.info = self.result = None
14028 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14030 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14031 " IAllocator" % self.mode)
14033 keyset = [n for (n, _) in keydata]
14036 if key not in keyset:
14037 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14038 " IAllocator" % key)
14039 setattr(self, key, kwargs[key])
14042 if key not in kwargs:
14043 raise errors.ProgrammerError("Missing input parameter '%s' to"
14044 " IAllocator" % key)
14045 self._BuildInputData(compat.partial(fn, self), keydata)
14047 def _ComputeClusterData(self):
14048 """Compute the generic allocator input data.
14050 This is the data that is independent of the actual operation.
14054 cluster_info = cfg.GetClusterInfo()
14057 "version": constants.IALLOCATOR_VERSION,
14058 "cluster_name": cfg.GetClusterName(),
14059 "cluster_tags": list(cluster_info.GetTags()),
14060 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14061 # we don't have job IDs
14063 ninfo = cfg.GetAllNodesInfo()
14064 iinfo = cfg.GetAllInstancesInfo().values()
14065 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14068 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14070 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14071 hypervisor_name = self.hypervisor
14072 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14073 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14075 hypervisor_name = cluster_info.primary_hypervisor
14077 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14080 self.rpc.call_all_instances_info(node_list,
14081 cluster_info.enabled_hypervisors)
14083 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14085 config_ndata = self._ComputeBasicNodeData(ninfo)
14086 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14087 i_list, config_ndata)
14088 assert len(data["nodes"]) == len(ninfo), \
14089 "Incomplete node data computed"
14091 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14093 self.in_data = data
14096 def _ComputeNodeGroupData(cfg):
14097 """Compute node groups data.
14100 ng = dict((guuid, {
14101 "name": gdata.name,
14102 "alloc_policy": gdata.alloc_policy,
14104 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14109 def _ComputeBasicNodeData(node_cfg):
14110 """Compute global node data.
14113 @returns: a dict of name: (node dict, node config)
14116 # fill in static (config-based) values
14117 node_results = dict((ninfo.name, {
14118 "tags": list(ninfo.GetTags()),
14119 "primary_ip": ninfo.primary_ip,
14120 "secondary_ip": ninfo.secondary_ip,
14121 "offline": ninfo.offline,
14122 "drained": ninfo.drained,
14123 "master_candidate": ninfo.master_candidate,
14124 "group": ninfo.group,
14125 "master_capable": ninfo.master_capable,
14126 "vm_capable": ninfo.vm_capable,
14128 for ninfo in node_cfg.values())
14130 return node_results
14133 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14135 """Compute global node data.
14137 @param node_results: the basic node structures as filled from the config
14140 #TODO(dynmem): compute the right data on MAX and MIN memory
14141 # make a copy of the current dict
14142 node_results = dict(node_results)
14143 for nname, nresult in node_data.items():
14144 assert nname in node_results, "Missing basic data for node %s" % nname
14145 ninfo = node_cfg[nname]
14147 if not (ninfo.offline or ninfo.drained):
14148 nresult.Raise("Can't get data for node %s" % nname)
14149 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14151 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14153 for attr in ["memory_total", "memory_free", "memory_dom0",
14154 "vg_size", "vg_free", "cpu_total"]:
14155 if attr not in remote_info:
14156 raise errors.OpExecError("Node '%s' didn't return attribute"
14157 " '%s'" % (nname, attr))
14158 if not isinstance(remote_info[attr], int):
14159 raise errors.OpExecError("Node '%s' returned invalid value"
14161 (nname, attr, remote_info[attr]))
14162 # compute memory used by primary instances
14163 i_p_mem = i_p_up_mem = 0
14164 for iinfo, beinfo in i_list:
14165 if iinfo.primary_node == nname:
14166 i_p_mem += beinfo[constants.BE_MAXMEM]
14167 if iinfo.name not in node_iinfo[nname].payload:
14170 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14171 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14172 remote_info["memory_free"] -= max(0, i_mem_diff)
14174 if iinfo.admin_state == constants.ADMINST_UP:
14175 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14177 # compute memory used by instances
14179 "total_memory": remote_info["memory_total"],
14180 "reserved_memory": remote_info["memory_dom0"],
14181 "free_memory": remote_info["memory_free"],
14182 "total_disk": remote_info["vg_size"],
14183 "free_disk": remote_info["vg_free"],
14184 "total_cpus": remote_info["cpu_total"],
14185 "i_pri_memory": i_p_mem,
14186 "i_pri_up_memory": i_p_up_mem,
14188 pnr_dyn.update(node_results[nname])
14189 node_results[nname] = pnr_dyn
14191 return node_results
14194 def _ComputeInstanceData(cluster_info, i_list):
14195 """Compute global instance data.
14199 for iinfo, beinfo in i_list:
14201 for nic in iinfo.nics:
14202 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14206 "mode": filled_params[constants.NIC_MODE],
14207 "link": filled_params[constants.NIC_LINK],
14209 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14210 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14211 nic_data.append(nic_dict)
14213 "tags": list(iinfo.GetTags()),
14214 "admin_state": iinfo.admin_state,
14215 "vcpus": beinfo[constants.BE_VCPUS],
14216 "memory": beinfo[constants.BE_MAXMEM],
14218 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14220 "disks": [{constants.IDISK_SIZE: dsk.size,
14221 constants.IDISK_MODE: dsk.mode}
14222 for dsk in iinfo.disks],
14223 "disk_template": iinfo.disk_template,
14224 "hypervisor": iinfo.hypervisor,
14226 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14228 instance_data[iinfo.name] = pir
14230 return instance_data
14232 def _AddNewInstance(self):
14233 """Add new instance data to allocator structure.
14235 This in combination with _AllocatorGetClusterData will create the
14236 correct structure needed as input for the allocator.
14238 The checks for the completeness of the opcode must have already been
14242 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14244 if self.disk_template in constants.DTS_INT_MIRROR:
14245 self.required_nodes = 2
14247 self.required_nodes = 1
14251 "disk_template": self.disk_template,
14254 "vcpus": self.vcpus,
14255 "memory": self.memory,
14256 "disks": self.disks,
14257 "disk_space_total": disk_space,
14259 "required_nodes": self.required_nodes,
14260 "hypervisor": self.hypervisor,
14265 def _AddRelocateInstance(self):
14266 """Add relocate instance data to allocator structure.
14268 This in combination with _IAllocatorGetClusterData will create the
14269 correct structure needed as input for the allocator.
14271 The checks for the completeness of the opcode must have already been
14275 instance = self.cfg.GetInstanceInfo(self.name)
14276 if instance is None:
14277 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14278 " IAllocator" % self.name)
14280 if instance.disk_template not in constants.DTS_MIRRORED:
14281 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14282 errors.ECODE_INVAL)
14284 if instance.disk_template in constants.DTS_INT_MIRROR and \
14285 len(instance.secondary_nodes) != 1:
14286 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14287 errors.ECODE_STATE)
14289 self.required_nodes = 1
14290 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14291 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14295 "disk_space_total": disk_space,
14296 "required_nodes": self.required_nodes,
14297 "relocate_from": self.relocate_from,
14301 def _AddNodeEvacuate(self):
14302 """Get data for node-evacuate requests.
14306 "instances": self.instances,
14307 "evac_mode": self.evac_mode,
14310 def _AddChangeGroup(self):
14311 """Get data for node-evacuate requests.
14315 "instances": self.instances,
14316 "target_groups": self.target_groups,
14319 def _BuildInputData(self, fn, keydata):
14320 """Build input data structures.
14323 self._ComputeClusterData()
14326 request["type"] = self.mode
14327 for keyname, keytype in keydata:
14328 if keyname not in request:
14329 raise errors.ProgrammerError("Request parameter %s is missing" %
14331 val = request[keyname]
14332 if not keytype(val):
14333 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14334 " validation, value %s, expected"
14335 " type %s" % (keyname, val, keytype))
14336 self.in_data["request"] = request
14338 self.in_text = serializer.Dump(self.in_data)
14340 _STRING_LIST = ht.TListOf(ht.TString)
14341 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14342 # pylint: disable=E1101
14343 # Class '...' has no 'OP_ID' member
14344 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14345 opcodes.OpInstanceMigrate.OP_ID,
14346 opcodes.OpInstanceReplaceDisks.OP_ID])
14350 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14351 ht.TItems([ht.TNonEmptyString,
14352 ht.TNonEmptyString,
14353 ht.TListOf(ht.TNonEmptyString),
14356 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14357 ht.TItems([ht.TNonEmptyString,
14360 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14361 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14364 constants.IALLOCATOR_MODE_ALLOC:
14367 ("name", ht.TString),
14368 ("memory", ht.TInt),
14369 ("disks", ht.TListOf(ht.TDict)),
14370 ("disk_template", ht.TString),
14371 ("os", ht.TString),
14372 ("tags", _STRING_LIST),
14373 ("nics", ht.TListOf(ht.TDict)),
14374 ("vcpus", ht.TInt),
14375 ("hypervisor", ht.TString),
14377 constants.IALLOCATOR_MODE_RELOC:
14378 (_AddRelocateInstance,
14379 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14381 constants.IALLOCATOR_MODE_NODE_EVAC:
14382 (_AddNodeEvacuate, [
14383 ("instances", _STRING_LIST),
14384 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14386 constants.IALLOCATOR_MODE_CHG_GROUP:
14387 (_AddChangeGroup, [
14388 ("instances", _STRING_LIST),
14389 ("target_groups", _STRING_LIST),
14393 def Run(self, name, validate=True, call_fn=None):
14394 """Run an instance allocator and return the results.
14397 if call_fn is None:
14398 call_fn = self.rpc.call_iallocator_runner
14400 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14401 result.Raise("Failure while running the iallocator script")
14403 self.out_text = result.payload
14405 self._ValidateResult()
14407 def _ValidateResult(self):
14408 """Process the allocator results.
14410 This will process and if successful save the result in
14411 self.out_data and the other parameters.
14415 rdict = serializer.Load(self.out_text)
14416 except Exception, err:
14417 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14419 if not isinstance(rdict, dict):
14420 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14422 # TODO: remove backwards compatiblity in later versions
14423 if "nodes" in rdict and "result" not in rdict:
14424 rdict["result"] = rdict["nodes"]
14427 for key in "success", "info", "result":
14428 if key not in rdict:
14429 raise errors.OpExecError("Can't parse iallocator results:"
14430 " missing key '%s'" % key)
14431 setattr(self, key, rdict[key])
14433 if not self._result_check(self.result):
14434 raise errors.OpExecError("Iallocator returned invalid result,"
14435 " expected %s, got %s" %
14436 (self._result_check, self.result),
14437 errors.ECODE_INVAL)
14439 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14440 assert self.relocate_from is not None
14441 assert self.required_nodes == 1
14443 node2group = dict((name, ndata["group"])
14444 for (name, ndata) in self.in_data["nodes"].items())
14446 fn = compat.partial(self._NodesToGroups, node2group,
14447 self.in_data["nodegroups"])
14449 instance = self.cfg.GetInstanceInfo(self.name)
14450 request_groups = fn(self.relocate_from + [instance.primary_node])
14451 result_groups = fn(rdict["result"] + [instance.primary_node])
14453 if self.success and not set(result_groups).issubset(request_groups):
14454 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14455 " differ from original groups (%s)" %
14456 (utils.CommaJoin(result_groups),
14457 utils.CommaJoin(request_groups)))
14459 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14460 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14462 self.out_data = rdict
14465 def _NodesToGroups(node2group, groups, nodes):
14466 """Returns a list of unique group names for a list of nodes.
14468 @type node2group: dict
14469 @param node2group: Map from node name to group UUID
14471 @param groups: Group information
14473 @param nodes: Node names
14480 group_uuid = node2group[node]
14482 # Ignore unknown node
14486 group = groups[group_uuid]
14488 # Can't find group, let's use UUID
14489 group_name = group_uuid
14491 group_name = group["name"]
14493 result.add(group_name)
14495 return sorted(result)
14498 class LUTestAllocator(NoHooksLU):
14499 """Run allocator tests.
14501 This LU runs the allocator tests
14504 def CheckPrereq(self):
14505 """Check prerequisites.
14507 This checks the opcode parameters depending on the director and mode test.
14510 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14511 for attr in ["memory", "disks", "disk_template",
14512 "os", "tags", "nics", "vcpus"]:
14513 if not hasattr(self.op, attr):
14514 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14515 attr, errors.ECODE_INVAL)
14516 iname = self.cfg.ExpandInstanceName(self.op.name)
14517 if iname is not None:
14518 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14519 iname, errors.ECODE_EXISTS)
14520 if not isinstance(self.op.nics, list):
14521 raise errors.OpPrereqError("Invalid parameter 'nics'",
14522 errors.ECODE_INVAL)
14523 if not isinstance(self.op.disks, list):
14524 raise errors.OpPrereqError("Invalid parameter 'disks'",
14525 errors.ECODE_INVAL)
14526 for row in self.op.disks:
14527 if (not isinstance(row, dict) or
14528 constants.IDISK_SIZE not in row or
14529 not isinstance(row[constants.IDISK_SIZE], int) or
14530 constants.IDISK_MODE not in row or
14531 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14532 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14533 " parameter", errors.ECODE_INVAL)
14534 if self.op.hypervisor is None:
14535 self.op.hypervisor = self.cfg.GetHypervisorType()
14536 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14537 fname = _ExpandInstanceName(self.cfg, self.op.name)
14538 self.op.name = fname
14539 self.relocate_from = \
14540 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14541 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14542 constants.IALLOCATOR_MODE_NODE_EVAC):
14543 if not self.op.instances:
14544 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14545 self.op.instances = _GetWantedInstances(self, self.op.instances)
14547 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14548 self.op.mode, errors.ECODE_INVAL)
14550 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14551 if self.op.allocator is None:
14552 raise errors.OpPrereqError("Missing allocator name",
14553 errors.ECODE_INVAL)
14554 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14555 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14556 self.op.direction, errors.ECODE_INVAL)
14558 def Exec(self, feedback_fn):
14559 """Run the allocator test.
14562 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14563 ial = IAllocator(self.cfg, self.rpc,
14566 memory=self.op.memory,
14567 disks=self.op.disks,
14568 disk_template=self.op.disk_template,
14572 vcpus=self.op.vcpus,
14573 hypervisor=self.op.hypervisor,
14575 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14576 ial = IAllocator(self.cfg, self.rpc,
14579 relocate_from=list(self.relocate_from),
14581 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14582 ial = IAllocator(self.cfg, self.rpc,
14584 instances=self.op.instances,
14585 target_groups=self.op.target_groups)
14586 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14587 ial = IAllocator(self.cfg, self.rpc,
14589 instances=self.op.instances,
14590 evac_mode=self.op.evac_mode)
14592 raise errors.ProgrammerError("Uncatched mode %s in"
14593 " LUTestAllocator.Exec", self.op.mode)
14595 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14596 result = ial.in_text
14598 ial.Run(self.op.allocator, validate=False)
14599 result = ial.out_text
14603 #: Query type implementations
14605 constants.QR_INSTANCE: _InstanceQuery,
14606 constants.QR_NODE: _NodeQuery,
14607 constants.QR_GROUP: _GroupQuery,
14608 constants.QR_OS: _OsQuery,
14611 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14614 def _GetQueryImplementation(name):
14615 """Returns the implemtnation for a query type.
14617 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14621 return _QUERY_IMPL[name]
14623 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14624 errors.ECODE_INVAL)