4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1040 nic_count, disk_sizes,
1041 _check_spec_fn=_CheckMinMaxSpecs):
1042 """Verifies ipolicy against provided specs.
1045 @param ipolicy: The ipolicy
1047 @param mem_size: The memory size
1048 @type cpu_count: int
1049 @param cpu_count: Used cpu cores
1050 @type disk_count: int
1051 @param disk_count: Number of disks used
1052 @type nic_count: int
1053 @param nic_count: Number of nics used
1054 @type disk_sizes: list of ints
1055 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1056 @param _check_spec_fn: The checking function (unittest only)
1057 @return: A list of violations, or an empty list of no violations are found
1060 assert disk_count == len(disk_sizes)
1063 (constants.ISPEC_MEM_SIZE, mem_size),
1064 (constants.ISPEC_CPU_COUNT, cpu_count),
1065 (constants.ISPEC_DISK_COUNT, disk_count),
1066 (constants.ISPEC_NIC_COUNT, nic_count),
1067 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1070 (_check_spec_fn(name, ipolicy, value)
1071 for (name, value) in test_settings))
1074 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1075 _compute_fn=_ComputeIPolicySpecViolation):
1076 """Compute if instance meets the specs of ipolicy.
1079 @param ipolicy: The ipolicy to verify against
1080 @type instance: L{objects.Instance}
1081 @param instance: The instance to verify
1082 @param _compute_fn: The function to verify ipolicy (unittest only)
1083 @see: L{_ComputeIPolicySpecViolation}
1086 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1087 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1088 disk_count = len(instance.disks)
1089 disk_sizes = [disk.size for disk in instance.disks]
1090 nic_count = len(instance.nics)
1092 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1096 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1097 _compute_fn=_ComputeIPolicySpecViolation):
1098 """Compute if instance specs meets the specs of ipolicy.
1101 @param ipolicy: The ipolicy to verify against
1102 @param instance_spec: dict
1103 @param instance_spec: The instance spec to verify
1104 @param _compute_fn: The function to verify ipolicy (unittest only)
1105 @see: L{_ComputeIPolicySpecViolation}
1108 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1109 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1110 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1111 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1112 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1114 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1118 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1120 _compute_fn=_ComputeIPolicyInstanceViolation):
1121 """Compute if instance meets the specs of the new target group.
1123 @param ipolicy: The ipolicy to verify
1124 @param instance: The instance object to verify
1125 @param current_group: The current group of the instance
1126 @param target_group: The new group of the instance
1127 @param _compute_fn: The function to verify ipolicy (unittest only)
1128 @see: L{_ComputeIPolicySpecViolation}
1131 if current_group == target_group:
1134 return _compute_fn(ipolicy, instance)
1137 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1138 _compute_fn=_ComputeIPolicyNodeViolation):
1139 """Checks that the target node is correct in terms of instance policy.
1141 @param ipolicy: The ipolicy to verify
1142 @param instance: The instance object to verify
1143 @param node: The new node to relocate
1144 @param ignore: Ignore violations of the ipolicy
1145 @param _compute_fn: The function to verify ipolicy (unittest only)
1146 @see: L{_ComputeIPolicySpecViolation}
1149 res = _compute_fn(ipolicy, instance, instance.primary_node.group, node.group)
1152 msg = ("Instance does not meet target node group's (%s) instance"
1153 " policy: %s") % (node.group, utils.CommaJoin(res))
1157 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1160 def _ExpandItemName(fn, name, kind):
1161 """Expand an item name.
1163 @param fn: the function to use for expansion
1164 @param name: requested item name
1165 @param kind: text description ('Node' or 'Instance')
1166 @return: the resolved (full) name
1167 @raise errors.OpPrereqError: if the item is not found
1170 full_name = fn(name)
1171 if full_name is None:
1172 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1177 def _ExpandNodeName(cfg, name):
1178 """Wrapper over L{_ExpandItemName} for nodes."""
1179 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1182 def _ExpandInstanceName(cfg, name):
1183 """Wrapper over L{_ExpandItemName} for instance."""
1184 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1187 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1188 minmem, maxmem, vcpus, nics, disk_template, disks,
1189 bep, hvp, hypervisor_name, tags):
1190 """Builds instance related env variables for hooks
1192 This builds the hook environment from individual variables.
1195 @param name: the name of the instance
1196 @type primary_node: string
1197 @param primary_node: the name of the instance's primary node
1198 @type secondary_nodes: list
1199 @param secondary_nodes: list of secondary nodes as strings
1200 @type os_type: string
1201 @param os_type: the name of the instance's OS
1202 @type status: string
1203 @param status: the desired status of the instance
1204 @type minmem: string
1205 @param minmem: the minimum memory size of the instance
1206 @type maxmem: string
1207 @param maxmem: the maximum memory size of the instance
1209 @param vcpus: the count of VCPUs the instance has
1211 @param nics: list of tuples (ip, mac, mode, link) representing
1212 the NICs the instance has
1213 @type disk_template: string
1214 @param disk_template: the disk template of the instance
1216 @param disks: the list of (size, mode) pairs
1218 @param bep: the backend parameters for the instance
1220 @param hvp: the hypervisor parameters for the instance
1221 @type hypervisor_name: string
1222 @param hypervisor_name: the hypervisor for the instance
1224 @param tags: list of instance tags as strings
1226 @return: the hook environment for this instance
1231 "INSTANCE_NAME": name,
1232 "INSTANCE_PRIMARY": primary_node,
1233 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1234 "INSTANCE_OS_TYPE": os_type,
1235 "INSTANCE_STATUS": status,
1236 "INSTANCE_MINMEM": minmem,
1237 "INSTANCE_MAXMEM": maxmem,
1238 # TODO(2.7) remove deprecated "memory" value
1239 "INSTANCE_MEMORY": maxmem,
1240 "INSTANCE_VCPUS": vcpus,
1241 "INSTANCE_DISK_TEMPLATE": disk_template,
1242 "INSTANCE_HYPERVISOR": hypervisor_name,
1245 nic_count = len(nics)
1246 for idx, (ip, mac, mode, link) in enumerate(nics):
1249 env["INSTANCE_NIC%d_IP" % idx] = ip
1250 env["INSTANCE_NIC%d_MAC" % idx] = mac
1251 env["INSTANCE_NIC%d_MODE" % idx] = mode
1252 env["INSTANCE_NIC%d_LINK" % idx] = link
1253 if mode == constants.NIC_MODE_BRIDGED:
1254 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1258 env["INSTANCE_NIC_COUNT"] = nic_count
1261 disk_count = len(disks)
1262 for idx, (size, mode) in enumerate(disks):
1263 env["INSTANCE_DISK%d_SIZE" % idx] = size
1264 env["INSTANCE_DISK%d_MODE" % idx] = mode
1268 env["INSTANCE_DISK_COUNT"] = disk_count
1273 env["INSTANCE_TAGS"] = " ".join(tags)
1275 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1276 for key, value in source.items():
1277 env["INSTANCE_%s_%s" % (kind, key)] = value
1282 def _NICListToTuple(lu, nics):
1283 """Build a list of nic information tuples.
1285 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1286 value in LUInstanceQueryData.
1288 @type lu: L{LogicalUnit}
1289 @param lu: the logical unit on whose behalf we execute
1290 @type nics: list of L{objects.NIC}
1291 @param nics: list of nics to convert to hooks tuples
1295 cluster = lu.cfg.GetClusterInfo()
1299 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1300 mode = filled_params[constants.NIC_MODE]
1301 link = filled_params[constants.NIC_LINK]
1302 hooks_nics.append((ip, mac, mode, link))
1306 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1307 """Builds instance related env variables for hooks from an object.
1309 @type lu: L{LogicalUnit}
1310 @param lu: the logical unit on whose behalf we execute
1311 @type instance: L{objects.Instance}
1312 @param instance: the instance for which we should build the
1314 @type override: dict
1315 @param override: dictionary with key/values that will override
1318 @return: the hook environment dictionary
1321 cluster = lu.cfg.GetClusterInfo()
1322 bep = cluster.FillBE(instance)
1323 hvp = cluster.FillHV(instance)
1325 "name": instance.name,
1326 "primary_node": instance.primary_node,
1327 "secondary_nodes": instance.secondary_nodes,
1328 "os_type": instance.os,
1329 "status": instance.admin_state,
1330 "maxmem": bep[constants.BE_MAXMEM],
1331 "minmem": bep[constants.BE_MINMEM],
1332 "vcpus": bep[constants.BE_VCPUS],
1333 "nics": _NICListToTuple(lu, instance.nics),
1334 "disk_template": instance.disk_template,
1335 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1338 "hypervisor_name": instance.hypervisor,
1339 "tags": instance.tags,
1342 args.update(override)
1343 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1346 def _AdjustCandidatePool(lu, exceptions):
1347 """Adjust the candidate pool after node operations.
1350 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1352 lu.LogInfo("Promoted nodes to master candidate role: %s",
1353 utils.CommaJoin(node.name for node in mod_list))
1354 for name in mod_list:
1355 lu.context.ReaddNode(name)
1356 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1358 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1362 def _DecideSelfPromotion(lu, exceptions=None):
1363 """Decide whether I should promote myself as a master candidate.
1366 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1367 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1368 # the new node will increase mc_max with one, so:
1369 mc_should = min(mc_should + 1, cp_size)
1370 return mc_now < mc_should
1373 def _CalculateGroupIPolicy(cluster, group):
1374 """Calculate instance policy for group.
1377 return cluster.SimpleFillIPolicy(group.ipolicy)
1380 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1381 """Check that the brigdes needed by a list of nics exist.
1384 cluster = lu.cfg.GetClusterInfo()
1385 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1386 brlist = [params[constants.NIC_LINK] for params in paramslist
1387 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1389 result = lu.rpc.call_bridges_exist(target_node, brlist)
1390 result.Raise("Error checking bridges on destination node '%s'" %
1391 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1394 def _CheckInstanceBridgesExist(lu, instance, node=None):
1395 """Check that the brigdes needed by an instance exist.
1399 node = instance.primary_node
1400 _CheckNicsBridgesExist(lu, instance.nics, node)
1403 def _CheckOSVariant(os_obj, name):
1404 """Check whether an OS name conforms to the os variants specification.
1406 @type os_obj: L{objects.OS}
1407 @param os_obj: OS object to check
1409 @param name: OS name passed by the user, to check for validity
1412 variant = objects.OS.GetVariant(name)
1413 if not os_obj.supported_variants:
1415 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1416 " passed)" % (os_obj.name, variant),
1420 raise errors.OpPrereqError("OS name must include a variant",
1423 if variant not in os_obj.supported_variants:
1424 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1427 def _GetNodeInstancesInner(cfg, fn):
1428 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1431 def _GetNodeInstances(cfg, node_name):
1432 """Returns a list of all primary and secondary instances on a node.
1436 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1439 def _GetNodePrimaryInstances(cfg, node_name):
1440 """Returns primary instances on a node.
1443 return _GetNodeInstancesInner(cfg,
1444 lambda inst: node_name == inst.primary_node)
1447 def _GetNodeSecondaryInstances(cfg, node_name):
1448 """Returns secondary instances on a node.
1451 return _GetNodeInstancesInner(cfg,
1452 lambda inst: node_name in inst.secondary_nodes)
1455 def _GetStorageTypeArgs(cfg, storage_type):
1456 """Returns the arguments for a storage type.
1459 # Special case for file storage
1460 if storage_type == constants.ST_FILE:
1461 # storage.FileStorage wants a list of storage directories
1462 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1467 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1470 for dev in instance.disks:
1471 cfg.SetDiskID(dev, node_name)
1473 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1474 result.Raise("Failed to get disk status from node %s" % node_name,
1475 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1477 for idx, bdev_status in enumerate(result.payload):
1478 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1484 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1485 """Check the sanity of iallocator and node arguments and use the
1486 cluster-wide iallocator if appropriate.
1488 Check that at most one of (iallocator, node) is specified. If none is
1489 specified, then the LU's opcode's iallocator slot is filled with the
1490 cluster-wide default iallocator.
1492 @type iallocator_slot: string
1493 @param iallocator_slot: the name of the opcode iallocator slot
1494 @type node_slot: string
1495 @param node_slot: the name of the opcode target node slot
1498 node = getattr(lu.op, node_slot, None)
1499 iallocator = getattr(lu.op, iallocator_slot, None)
1501 if node is not None and iallocator is not None:
1502 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1504 elif node is None and iallocator is None:
1505 default_iallocator = lu.cfg.GetDefaultIAllocator()
1506 if default_iallocator:
1507 setattr(lu.op, iallocator_slot, default_iallocator)
1509 raise errors.OpPrereqError("No iallocator or node given and no"
1510 " cluster-wide default iallocator found;"
1511 " please specify either an iallocator or a"
1512 " node, or set a cluster-wide default"
1516 def _GetDefaultIAllocator(cfg, iallocator):
1517 """Decides on which iallocator to use.
1519 @type cfg: L{config.ConfigWriter}
1520 @param cfg: Cluster configuration object
1521 @type iallocator: string or None
1522 @param iallocator: Iallocator specified in opcode
1524 @return: Iallocator name
1528 # Use default iallocator
1529 iallocator = cfg.GetDefaultIAllocator()
1532 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1533 " opcode nor as a cluster-wide default",
1539 class LUClusterPostInit(LogicalUnit):
1540 """Logical unit for running hooks after cluster initialization.
1543 HPATH = "cluster-init"
1544 HTYPE = constants.HTYPE_CLUSTER
1546 def BuildHooksEnv(self):
1551 "OP_TARGET": self.cfg.GetClusterName(),
1554 def BuildHooksNodes(self):
1555 """Build hooks nodes.
1558 return ([], [self.cfg.GetMasterNode()])
1560 def Exec(self, feedback_fn):
1567 class LUClusterDestroy(LogicalUnit):
1568 """Logical unit for destroying the cluster.
1571 HPATH = "cluster-destroy"
1572 HTYPE = constants.HTYPE_CLUSTER
1574 def BuildHooksEnv(self):
1579 "OP_TARGET": self.cfg.GetClusterName(),
1582 def BuildHooksNodes(self):
1583 """Build hooks nodes.
1588 def CheckPrereq(self):
1589 """Check prerequisites.
1591 This checks whether the cluster is empty.
1593 Any errors are signaled by raising errors.OpPrereqError.
1596 master = self.cfg.GetMasterNode()
1598 nodelist = self.cfg.GetNodeList()
1599 if len(nodelist) != 1 or nodelist[0] != master:
1600 raise errors.OpPrereqError("There are still %d node(s) in"
1601 " this cluster." % (len(nodelist) - 1),
1603 instancelist = self.cfg.GetInstanceList()
1605 raise errors.OpPrereqError("There are still %d instance(s) in"
1606 " this cluster." % len(instancelist),
1609 def Exec(self, feedback_fn):
1610 """Destroys the cluster.
1613 master_params = self.cfg.GetMasterNetworkParameters()
1615 # Run post hooks on master node before it's removed
1616 _RunPostHook(self, master_params.name)
1618 ems = self.cfg.GetUseExternalMipScript()
1619 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1622 self.LogWarning("Error disabling the master IP address: %s",
1625 return master_params.name
1628 def _VerifyCertificate(filename):
1629 """Verifies a certificate for L{LUClusterVerifyConfig}.
1631 @type filename: string
1632 @param filename: Path to PEM file
1636 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1637 utils.ReadFile(filename))
1638 except Exception, err: # pylint: disable=W0703
1639 return (LUClusterVerifyConfig.ETYPE_ERROR,
1640 "Failed to load X509 certificate %s: %s" % (filename, err))
1643 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1644 constants.SSL_CERT_EXPIRATION_ERROR)
1647 fnamemsg = "While verifying %s: %s" % (filename, msg)
1652 return (None, fnamemsg)
1653 elif errcode == utils.CERT_WARNING:
1654 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1655 elif errcode == utils.CERT_ERROR:
1656 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1658 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1661 def _GetAllHypervisorParameters(cluster, instances):
1662 """Compute the set of all hypervisor parameters.
1664 @type cluster: L{objects.Cluster}
1665 @param cluster: the cluster object
1666 @param instances: list of L{objects.Instance}
1667 @param instances: additional instances from which to obtain parameters
1668 @rtype: list of (origin, hypervisor, parameters)
1669 @return: a list with all parameters found, indicating the hypervisor they
1670 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1675 for hv_name in cluster.enabled_hypervisors:
1676 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1678 for os_name, os_hvp in cluster.os_hvp.items():
1679 for hv_name, hv_params in os_hvp.items():
1681 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1682 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1684 # TODO: collapse identical parameter values in a single one
1685 for instance in instances:
1686 if instance.hvparams:
1687 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1688 cluster.FillHV(instance)))
1693 class _VerifyErrors(object):
1694 """Mix-in for cluster/group verify LUs.
1696 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1697 self.op and self._feedback_fn to be available.)
1701 ETYPE_FIELD = "code"
1702 ETYPE_ERROR = "ERROR"
1703 ETYPE_WARNING = "WARNING"
1705 def _Error(self, ecode, item, msg, *args, **kwargs):
1706 """Format an error message.
1708 Based on the opcode's error_codes parameter, either format a
1709 parseable error code, or a simpler error string.
1711 This must be called only from Exec and functions called from Exec.
1714 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1715 itype, etxt, _ = ecode
1716 # first complete the msg
1719 # then format the whole message
1720 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1721 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1727 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1728 # and finally report it via the feedback_fn
1729 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1731 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1732 """Log an error message if the passed condition is True.
1736 or self.op.debug_simulate_errors) # pylint: disable=E1101
1738 # If the error code is in the list of ignored errors, demote the error to a
1740 (_, etxt, _) = ecode
1741 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1742 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1745 self._Error(ecode, *args, **kwargs)
1747 # do not mark the operation as failed for WARN cases only
1748 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1749 self.bad = self.bad or cond
1752 class LUClusterVerify(NoHooksLU):
1753 """Submits all jobs necessary to verify the cluster.
1758 def ExpandNames(self):
1759 self.needed_locks = {}
1761 def Exec(self, feedback_fn):
1764 if self.op.group_name:
1765 groups = [self.op.group_name]
1766 depends_fn = lambda: None
1768 groups = self.cfg.GetNodeGroupList()
1770 # Verify global configuration
1772 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1775 # Always depend on global verification
1776 depends_fn = lambda: [(-len(jobs), [])]
1778 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1779 ignore_errors=self.op.ignore_errors,
1780 depends=depends_fn())]
1781 for group in groups)
1783 # Fix up all parameters
1784 for op in itertools.chain(*jobs): # pylint: disable=W0142
1785 op.debug_simulate_errors = self.op.debug_simulate_errors
1786 op.verbose = self.op.verbose
1787 op.error_codes = self.op.error_codes
1789 op.skip_checks = self.op.skip_checks
1790 except AttributeError:
1791 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1793 return ResultWithJobs(jobs)
1796 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1797 """Verifies the cluster config.
1802 def _VerifyHVP(self, hvp_data):
1803 """Verifies locally the syntax of the hypervisor parameters.
1806 for item, hv_name, hv_params in hvp_data:
1807 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1810 hv_class = hypervisor.GetHypervisor(hv_name)
1811 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1812 hv_class.CheckParameterSyntax(hv_params)
1813 except errors.GenericError, err:
1814 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1816 def ExpandNames(self):
1817 # Information can be safely retrieved as the BGL is acquired in exclusive
1819 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1820 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1821 self.all_node_info = self.cfg.GetAllNodesInfo()
1822 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1823 self.needed_locks = {}
1825 def Exec(self, feedback_fn):
1826 """Verify integrity of cluster, performing various test on nodes.
1830 self._feedback_fn = feedback_fn
1832 feedback_fn("* Verifying cluster config")
1834 for msg in self.cfg.VerifyConfig():
1835 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1837 feedback_fn("* Verifying cluster certificate files")
1839 for cert_filename in constants.ALL_CERT_FILES:
1840 (errcode, msg) = _VerifyCertificate(cert_filename)
1841 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1843 feedback_fn("* Verifying hypervisor parameters")
1845 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1846 self.all_inst_info.values()))
1848 feedback_fn("* Verifying all nodes belong to an existing group")
1850 # We do this verification here because, should this bogus circumstance
1851 # occur, it would never be caught by VerifyGroup, which only acts on
1852 # nodes/instances reachable from existing node groups.
1854 dangling_nodes = set(node.name for node in self.all_node_info.values()
1855 if node.group not in self.all_group_info)
1857 dangling_instances = {}
1858 no_node_instances = []
1860 for inst in self.all_inst_info.values():
1861 if inst.primary_node in dangling_nodes:
1862 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1863 elif inst.primary_node not in self.all_node_info:
1864 no_node_instances.append(inst.name)
1869 utils.CommaJoin(dangling_instances.get(node.name,
1871 for node in dangling_nodes]
1873 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1875 "the following nodes (and their instances) belong to a non"
1876 " existing group: %s", utils.CommaJoin(pretty_dangling))
1878 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1880 "the following instances have a non-existing primary-node:"
1881 " %s", utils.CommaJoin(no_node_instances))
1886 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1887 """Verifies the status of a node group.
1890 HPATH = "cluster-verify"
1891 HTYPE = constants.HTYPE_CLUSTER
1894 _HOOKS_INDENT_RE = re.compile("^", re.M)
1896 class NodeImage(object):
1897 """A class representing the logical and physical status of a node.
1900 @ivar name: the node name to which this object refers
1901 @ivar volumes: a structure as returned from
1902 L{ganeti.backend.GetVolumeList} (runtime)
1903 @ivar instances: a list of running instances (runtime)
1904 @ivar pinst: list of configured primary instances (config)
1905 @ivar sinst: list of configured secondary instances (config)
1906 @ivar sbp: dictionary of {primary-node: list of instances} for all
1907 instances for which this node is secondary (config)
1908 @ivar mfree: free memory, as reported by hypervisor (runtime)
1909 @ivar dfree: free disk, as reported by the node (runtime)
1910 @ivar offline: the offline status (config)
1911 @type rpc_fail: boolean
1912 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1913 not whether the individual keys were correct) (runtime)
1914 @type lvm_fail: boolean
1915 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1916 @type hyp_fail: boolean
1917 @ivar hyp_fail: whether the RPC call didn't return the instance list
1918 @type ghost: boolean
1919 @ivar ghost: whether this is a known node or not (config)
1920 @type os_fail: boolean
1921 @ivar os_fail: whether the RPC call didn't return valid OS data
1923 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1924 @type vm_capable: boolean
1925 @ivar vm_capable: whether the node can host instances
1928 def __init__(self, offline=False, name=None, vm_capable=True):
1937 self.offline = offline
1938 self.vm_capable = vm_capable
1939 self.rpc_fail = False
1940 self.lvm_fail = False
1941 self.hyp_fail = False
1943 self.os_fail = False
1946 def ExpandNames(self):
1947 # This raises errors.OpPrereqError on its own:
1948 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1950 # Get instances in node group; this is unsafe and needs verification later
1951 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1953 self.needed_locks = {
1954 locking.LEVEL_INSTANCE: inst_names,
1955 locking.LEVEL_NODEGROUP: [self.group_uuid],
1956 locking.LEVEL_NODE: [],
1959 self.share_locks = _ShareAll()
1961 def DeclareLocks(self, level):
1962 if level == locking.LEVEL_NODE:
1963 # Get members of node group; this is unsafe and needs verification later
1964 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1966 all_inst_info = self.cfg.GetAllInstancesInfo()
1968 # In Exec(), we warn about mirrored instances that have primary and
1969 # secondary living in separate node groups. To fully verify that
1970 # volumes for these instances are healthy, we will need to do an
1971 # extra call to their secondaries. We ensure here those nodes will
1973 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1974 # Important: access only the instances whose lock is owned
1975 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1976 nodes.update(all_inst_info[inst].secondary_nodes)
1978 self.needed_locks[locking.LEVEL_NODE] = nodes
1980 def CheckPrereq(self):
1981 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1982 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1984 group_nodes = set(self.group_info.members)
1985 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1988 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1990 unlocked_instances = \
1991 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1994 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1995 utils.CommaJoin(unlocked_nodes))
1997 if unlocked_instances:
1998 raise errors.OpPrereqError("Missing lock for instances: %s" %
1999 utils.CommaJoin(unlocked_instances))
2001 self.all_node_info = self.cfg.GetAllNodesInfo()
2002 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2004 self.my_node_names = utils.NiceSort(group_nodes)
2005 self.my_inst_names = utils.NiceSort(group_instances)
2007 self.my_node_info = dict((name, self.all_node_info[name])
2008 for name in self.my_node_names)
2010 self.my_inst_info = dict((name, self.all_inst_info[name])
2011 for name in self.my_inst_names)
2013 # We detect here the nodes that will need the extra RPC calls for verifying
2014 # split LV volumes; they should be locked.
2015 extra_lv_nodes = set()
2017 for inst in self.my_inst_info.values():
2018 if inst.disk_template in constants.DTS_INT_MIRROR:
2019 group = self.my_node_info[inst.primary_node].group
2020 for nname in inst.secondary_nodes:
2021 if self.all_node_info[nname].group != group:
2022 extra_lv_nodes.add(nname)
2024 unlocked_lv_nodes = \
2025 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2027 if unlocked_lv_nodes:
2028 raise errors.OpPrereqError("these nodes could be locked: %s" %
2029 utils.CommaJoin(unlocked_lv_nodes))
2030 self.extra_lv_nodes = list(extra_lv_nodes)
2032 def _VerifyNode(self, ninfo, nresult):
2033 """Perform some basic validation on data returned from a node.
2035 - check the result data structure is well formed and has all the
2037 - check ganeti version
2039 @type ninfo: L{objects.Node}
2040 @param ninfo: the node to check
2041 @param nresult: the results from the node
2043 @return: whether overall this call was successful (and we can expect
2044 reasonable values in the respose)
2048 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2050 # main result, nresult should be a non-empty dict
2051 test = not nresult or not isinstance(nresult, dict)
2052 _ErrorIf(test, constants.CV_ENODERPC, node,
2053 "unable to verify node: no data returned")
2057 # compares ganeti version
2058 local_version = constants.PROTOCOL_VERSION
2059 remote_version = nresult.get("version", None)
2060 test = not (remote_version and
2061 isinstance(remote_version, (list, tuple)) and
2062 len(remote_version) == 2)
2063 _ErrorIf(test, constants.CV_ENODERPC, node,
2064 "connection to node returned invalid data")
2068 test = local_version != remote_version[0]
2069 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2070 "incompatible protocol versions: master %s,"
2071 " node %s", local_version, remote_version[0])
2075 # node seems compatible, we can actually try to look into its results
2077 # full package version
2078 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2079 constants.CV_ENODEVERSION, node,
2080 "software version mismatch: master %s, node %s",
2081 constants.RELEASE_VERSION, remote_version[1],
2082 code=self.ETYPE_WARNING)
2084 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2085 if ninfo.vm_capable and isinstance(hyp_result, dict):
2086 for hv_name, hv_result in hyp_result.iteritems():
2087 test = hv_result is not None
2088 _ErrorIf(test, constants.CV_ENODEHV, node,
2089 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2091 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2092 if ninfo.vm_capable and isinstance(hvp_result, list):
2093 for item, hv_name, hv_result in hvp_result:
2094 _ErrorIf(True, constants.CV_ENODEHV, node,
2095 "hypervisor %s parameter verify failure (source %s): %s",
2096 hv_name, item, hv_result)
2098 test = nresult.get(constants.NV_NODESETUP,
2099 ["Missing NODESETUP results"])
2100 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2105 def _VerifyNodeTime(self, ninfo, nresult,
2106 nvinfo_starttime, nvinfo_endtime):
2107 """Check the node time.
2109 @type ninfo: L{objects.Node}
2110 @param ninfo: the node to check
2111 @param nresult: the remote results for the node
2112 @param nvinfo_starttime: the start time of the RPC call
2113 @param nvinfo_endtime: the end time of the RPC call
2117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119 ntime = nresult.get(constants.NV_TIME, None)
2121 ntime_merged = utils.MergeTime(ntime)
2122 except (ValueError, TypeError):
2123 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2126 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2127 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2128 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2129 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2133 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2134 "Node time diverges by at least %s from master node time",
2137 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2138 """Check the node LVM results.
2140 @type ninfo: L{objects.Node}
2141 @param ninfo: the node to check
2142 @param nresult: the remote results for the node
2143 @param vg_name: the configured VG name
2150 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2152 # checks vg existence and size > 20G
2153 vglist = nresult.get(constants.NV_VGLIST, None)
2155 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2157 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2158 constants.MIN_VG_SIZE)
2159 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2162 pvlist = nresult.get(constants.NV_PVLIST, None)
2163 test = pvlist is None
2164 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2166 # check that ':' is not present in PV names, since it's a
2167 # special character for lvcreate (denotes the range of PEs to
2169 for _, pvname, owner_vg in pvlist:
2170 test = ":" in pvname
2171 _ErrorIf(test, constants.CV_ENODELVM, node,
2172 "Invalid character ':' in PV '%s' of VG '%s'",
2175 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2176 """Check the node bridges.
2178 @type ninfo: L{objects.Node}
2179 @param ninfo: the node to check
2180 @param nresult: the remote results for the node
2181 @param bridges: the expected list of bridges
2188 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2190 missing = nresult.get(constants.NV_BRIDGES, None)
2191 test = not isinstance(missing, list)
2192 _ErrorIf(test, constants.CV_ENODENET, node,
2193 "did not return valid bridge information")
2195 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2196 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2198 def _VerifyNodeUserScripts(self, ninfo, nresult):
2199 """Check the results of user scripts presence and executability on the node
2201 @type ninfo: L{objects.Node}
2202 @param ninfo: the node to check
2203 @param nresult: the remote results for the node
2208 test = not constants.NV_USERSCRIPTS in nresult
2209 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2210 "did not return user scripts information")
2212 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2214 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2215 "user scripts not present or not executable: %s" %
2216 utils.CommaJoin(sorted(broken_scripts)))
2218 def _VerifyNodeNetwork(self, ninfo, nresult):
2219 """Check the node network connectivity results.
2221 @type ninfo: L{objects.Node}
2222 @param ninfo: the node to check
2223 @param nresult: the remote results for the node
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 test = constants.NV_NODELIST not in nresult
2230 _ErrorIf(test, constants.CV_ENODESSH, node,
2231 "node hasn't returned node ssh connectivity data")
2233 if nresult[constants.NV_NODELIST]:
2234 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2235 _ErrorIf(True, constants.CV_ENODESSH, node,
2236 "ssh communication with node '%s': %s", a_node, a_msg)
2238 test = constants.NV_NODENETTEST not in nresult
2239 _ErrorIf(test, constants.CV_ENODENET, node,
2240 "node hasn't returned node tcp connectivity data")
2242 if nresult[constants.NV_NODENETTEST]:
2243 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2245 _ErrorIf(True, constants.CV_ENODENET, node,
2246 "tcp communication with node '%s': %s",
2247 anode, nresult[constants.NV_NODENETTEST][anode])
2249 test = constants.NV_MASTERIP not in nresult
2250 _ErrorIf(test, constants.CV_ENODENET, node,
2251 "node hasn't returned node master IP reachability data")
2253 if not nresult[constants.NV_MASTERIP]:
2254 if node == self.master_node:
2255 msg = "the master node cannot reach the master IP (not configured?)"
2257 msg = "cannot reach the master IP"
2258 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2260 def _VerifyInstancePolicy(self, instance):
2261 """Verify instance specs against instance policy set on node group level.
2265 cluster = self.cfg.GetClusterInfo()
2266 full_beparams = cluster.FillBE(instance)
2267 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2269 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2270 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2271 disk_count = len(instance.disks)
2272 disk_sizes = [disk.size for disk in instance.disks]
2273 nic_count = len(instance.nics)
2276 (constants.ISPEC_MEM_SIZE, mem_size),
2277 (constants.ISPEC_CPU_COUNT, cpu_count),
2278 (constants.ISPEC_DISK_COUNT, disk_count),
2279 (constants.ISPEC_NIC_COUNT, nic_count),
2280 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2282 for (name, value) in test_settings:
2283 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2284 self._ErrorIf(test_result is not None,
2285 constants.CV_EINSTANCEPOLICY, instance.name,
2288 def _VerifyInstance(self, instance, instanceconfig, node_image,
2290 """Verify an instance.
2292 This function checks to see if the required block devices are
2293 available on the instance's node.
2296 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2297 node_current = instanceconfig.primary_node
2299 node_vol_should = {}
2300 instanceconfig.MapLVsByNode(node_vol_should)
2302 self._VerifyInstancePolicy(instanceconfig)
2304 for node in node_vol_should:
2305 n_img = node_image[node]
2306 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2307 # ignore missing volumes on offline or broken nodes
2309 for volume in node_vol_should[node]:
2310 test = volume not in n_img.volumes
2311 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2312 "volume %s missing on node %s", volume, node)
2314 if instanceconfig.admin_state == constants.ADMINST_UP:
2315 pri_img = node_image[node_current]
2316 test = instance not in pri_img.instances and not pri_img.offline
2317 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2318 "instance not running on its primary node %s",
2321 diskdata = [(nname, success, status, idx)
2322 for (nname, disks) in diskstatus.items()
2323 for idx, (success, status) in enumerate(disks)]
2325 for nname, success, bdev_status, idx in diskdata:
2326 # the 'ghost node' construction in Exec() ensures that we have a
2328 snode = node_image[nname]
2329 bad_snode = snode.ghost or snode.offline
2330 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2331 not success and not bad_snode,
2332 constants.CV_EINSTANCEFAULTYDISK, instance,
2333 "couldn't retrieve status for disk/%s on %s: %s",
2334 idx, nname, bdev_status)
2335 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2336 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2337 constants.CV_EINSTANCEFAULTYDISK, instance,
2338 "disk/%s on %s is faulty", idx, nname)
2340 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2341 """Verify if there are any unknown volumes in the cluster.
2343 The .os, .swap and backup volumes are ignored. All other volumes are
2344 reported as unknown.
2346 @type reserved: L{ganeti.utils.FieldSet}
2347 @param reserved: a FieldSet of reserved volume names
2350 for node, n_img in node_image.items():
2351 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2352 # skip non-healthy nodes
2354 for volume in n_img.volumes:
2355 test = ((node not in node_vol_should or
2356 volume not in node_vol_should[node]) and
2357 not reserved.Matches(volume))
2358 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2359 "volume %s is unknown", volume)
2361 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2362 """Verify N+1 Memory Resilience.
2364 Check that if one single node dies we can still start all the
2365 instances it was primary for.
2368 cluster_info = self.cfg.GetClusterInfo()
2369 for node, n_img in node_image.items():
2370 # This code checks that every node which is now listed as
2371 # secondary has enough memory to host all instances it is
2372 # supposed to should a single other node in the cluster fail.
2373 # FIXME: not ready for failover to an arbitrary node
2374 # FIXME: does not support file-backed instances
2375 # WARNING: we currently take into account down instances as well
2376 # as up ones, considering that even if they're down someone
2377 # might want to start them even in the event of a node failure.
2379 # we're skipping offline nodes from the N+1 warning, since
2380 # most likely we don't have good memory infromation from them;
2381 # we already list instances living on such nodes, and that's
2384 #TODO(dynmem): use MINMEM for checking
2385 #TODO(dynmem): also consider ballooning out other instances
2386 for prinode, instances in n_img.sbp.items():
2388 for instance in instances:
2389 bep = cluster_info.FillBE(instance_cfg[instance])
2390 if bep[constants.BE_AUTO_BALANCE]:
2391 needed_mem += bep[constants.BE_MAXMEM]
2392 test = n_img.mfree < needed_mem
2393 self._ErrorIf(test, constants.CV_ENODEN1, node,
2394 "not enough memory to accomodate instance failovers"
2395 " should node %s fail (%dMiB needed, %dMiB available)",
2396 prinode, needed_mem, n_img.mfree)
2399 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2400 (files_all, files_opt, files_mc, files_vm)):
2401 """Verifies file checksums collected from all nodes.
2403 @param errorif: Callback for reporting errors
2404 @param nodeinfo: List of L{objects.Node} objects
2405 @param master_node: Name of master node
2406 @param all_nvinfo: RPC results
2409 # Define functions determining which nodes to consider for a file
2412 (files_mc, lambda node: (node.master_candidate or
2413 node.name == master_node)),
2414 (files_vm, lambda node: node.vm_capable),
2417 # Build mapping from filename to list of nodes which should have the file
2419 for (files, fn) in files2nodefn:
2421 filenodes = nodeinfo
2423 filenodes = filter(fn, nodeinfo)
2424 nodefiles.update((filename,
2425 frozenset(map(operator.attrgetter("name"), filenodes)))
2426 for filename in files)
2428 assert set(nodefiles) == (files_all | files_mc | files_vm)
2430 fileinfo = dict((filename, {}) for filename in nodefiles)
2431 ignore_nodes = set()
2433 for node in nodeinfo:
2435 ignore_nodes.add(node.name)
2438 nresult = all_nvinfo[node.name]
2440 if nresult.fail_msg or not nresult.payload:
2443 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2445 test = not (node_files and isinstance(node_files, dict))
2446 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2447 "Node did not return file checksum data")
2449 ignore_nodes.add(node.name)
2452 # Build per-checksum mapping from filename to nodes having it
2453 for (filename, checksum) in node_files.items():
2454 assert filename in nodefiles
2455 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2457 for (filename, checksums) in fileinfo.items():
2458 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2460 # Nodes having the file
2461 with_file = frozenset(node_name
2462 for nodes in fileinfo[filename].values()
2463 for node_name in nodes) - ignore_nodes
2465 expected_nodes = nodefiles[filename] - ignore_nodes
2467 # Nodes missing file
2468 missing_file = expected_nodes - with_file
2470 if filename in files_opt:
2472 errorif(missing_file and missing_file != expected_nodes,
2473 constants.CV_ECLUSTERFILECHECK, None,
2474 "File %s is optional, but it must exist on all or no"
2475 " nodes (not found on %s)",
2476 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2478 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2479 "File %s is missing from node(s) %s", filename,
2480 utils.CommaJoin(utils.NiceSort(missing_file)))
2482 # Warn if a node has a file it shouldn't
2483 unexpected = with_file - expected_nodes
2485 constants.CV_ECLUSTERFILECHECK, None,
2486 "File %s should not exist on node(s) %s",
2487 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2489 # See if there are multiple versions of the file
2490 test = len(checksums) > 1
2492 variants = ["variant %s on %s" %
2493 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2494 for (idx, (checksum, nodes)) in
2495 enumerate(sorted(checksums.items()))]
2499 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2500 "File %s found with %s different checksums (%s)",
2501 filename, len(checksums), "; ".join(variants))
2503 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2505 """Verifies and the node DRBD status.
2507 @type ninfo: L{objects.Node}
2508 @param ninfo: the node to check
2509 @param nresult: the remote results for the node
2510 @param instanceinfo: the dict of instances
2511 @param drbd_helper: the configured DRBD usermode helper
2512 @param drbd_map: the DRBD map as returned by
2513 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2517 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2520 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2521 test = (helper_result == None)
2522 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2523 "no drbd usermode helper returned")
2525 status, payload = helper_result
2527 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2528 "drbd usermode helper check unsuccessful: %s", payload)
2529 test = status and (payload != drbd_helper)
2530 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2531 "wrong drbd usermode helper: %s", payload)
2533 # compute the DRBD minors
2535 for minor, instance in drbd_map[node].items():
2536 test = instance not in instanceinfo
2537 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2538 "ghost instance '%s' in temporary DRBD map", instance)
2539 # ghost instance should not be running, but otherwise we
2540 # don't give double warnings (both ghost instance and
2541 # unallocated minor in use)
2543 node_drbd[minor] = (instance, False)
2545 instance = instanceinfo[instance]
2546 node_drbd[minor] = (instance.name,
2547 instance.admin_state == constants.ADMINST_UP)
2549 # and now check them
2550 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2551 test = not isinstance(used_minors, (tuple, list))
2552 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2553 "cannot parse drbd status file: %s", str(used_minors))
2555 # we cannot check drbd status
2558 for minor, (iname, must_exist) in node_drbd.items():
2559 test = minor not in used_minors and must_exist
2560 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2561 "drbd minor %d of instance %s is not active", minor, iname)
2562 for minor in used_minors:
2563 test = minor not in node_drbd
2564 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2565 "unallocated drbd minor %d is in use", minor)
2567 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2568 """Builds the node OS structures.
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2573 @param nimg: the node image object
2577 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2579 remote_os = nresult.get(constants.NV_OSLIST, None)
2580 test = (not isinstance(remote_os, list) or
2581 not compat.all(isinstance(v, list) and len(v) == 7
2582 for v in remote_os))
2584 _ErrorIf(test, constants.CV_ENODEOS, node,
2585 "node hasn't returned valid OS data")
2594 for (name, os_path, status, diagnose,
2595 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2597 if name not in os_dict:
2600 # parameters is a list of lists instead of list of tuples due to
2601 # JSON lacking a real tuple type, fix it:
2602 parameters = [tuple(v) for v in parameters]
2603 os_dict[name].append((os_path, status, diagnose,
2604 set(variants), set(parameters), set(api_ver)))
2606 nimg.oslist = os_dict
2608 def _VerifyNodeOS(self, ninfo, nimg, base):
2609 """Verifies the node OS list.
2611 @type ninfo: L{objects.Node}
2612 @param ninfo: the node to check
2613 @param nimg: the node image object
2614 @param base: the 'template' node we match against (e.g. from the master)
2618 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2622 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2623 for os_name, os_data in nimg.oslist.items():
2624 assert os_data, "Empty OS status for OS %s?!" % os_name
2625 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2626 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2627 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2628 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2629 "OS '%s' has multiple entries (first one shadows the rest): %s",
2630 os_name, utils.CommaJoin([v[0] for v in os_data]))
2631 # comparisons with the 'base' image
2632 test = os_name not in base.oslist
2633 _ErrorIf(test, constants.CV_ENODEOS, node,
2634 "Extra OS %s not present on reference node (%s)",
2638 assert base.oslist[os_name], "Base node has empty OS status?"
2639 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2641 # base OS is invalid, skipping
2643 for kind, a, b in [("API version", f_api, b_api),
2644 ("variants list", f_var, b_var),
2645 ("parameters", beautify_params(f_param),
2646 beautify_params(b_param))]:
2647 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2648 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2649 kind, os_name, base.name,
2650 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2652 # check any missing OSes
2653 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2654 _ErrorIf(missing, constants.CV_ENODEOS, node,
2655 "OSes present on reference node %s but missing on this node: %s",
2656 base.name, utils.CommaJoin(missing))
2658 def _VerifyOob(self, ninfo, nresult):
2659 """Verifies out of band functionality of a node.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nresult: the remote results for the node
2667 # We just have to verify the paths on master and/or master candidates
2668 # as the oob helper is invoked on the master
2669 if ((ninfo.master_candidate or ninfo.master_capable) and
2670 constants.NV_OOB_PATHS in nresult):
2671 for path_result in nresult[constants.NV_OOB_PATHS]:
2672 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2674 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2675 """Verifies and updates the node volume data.
2677 This function will update a L{NodeImage}'s internal structures
2678 with data from the remote call.
2680 @type ninfo: L{objects.Node}
2681 @param ninfo: the node to check
2682 @param nresult: the remote results for the node
2683 @param nimg: the node image object
2684 @param vg_name: the configured VG name
2688 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2690 nimg.lvm_fail = True
2691 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2694 elif isinstance(lvdata, basestring):
2695 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2696 utils.SafeEncode(lvdata))
2697 elif not isinstance(lvdata, dict):
2698 _ErrorIf(True, constants.CV_ENODELVM, node,
2699 "rpc call to node failed (lvlist)")
2701 nimg.volumes = lvdata
2702 nimg.lvm_fail = False
2704 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2705 """Verifies and updates the node instance list.
2707 If the listing was successful, then updates this node's instance
2708 list. Otherwise, it marks the RPC call as failed for the instance
2711 @type ninfo: L{objects.Node}
2712 @param ninfo: the node to check
2713 @param nresult: the remote results for the node
2714 @param nimg: the node image object
2717 idata = nresult.get(constants.NV_INSTANCELIST, None)
2718 test = not isinstance(idata, list)
2719 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2720 "rpc call to node failed (instancelist): %s",
2721 utils.SafeEncode(str(idata)))
2723 nimg.hyp_fail = True
2725 nimg.instances = idata
2727 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2728 """Verifies and computes a node information map
2730 @type ninfo: L{objects.Node}
2731 @param ninfo: the node to check
2732 @param nresult: the remote results for the node
2733 @param nimg: the node image object
2734 @param vg_name: the configured VG name
2738 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2740 # try to read free memory (from the hypervisor)
2741 hv_info = nresult.get(constants.NV_HVINFO, None)
2742 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2743 _ErrorIf(test, constants.CV_ENODEHV, node,
2744 "rpc call to node failed (hvinfo)")
2747 nimg.mfree = int(hv_info["memory_free"])
2748 except (ValueError, TypeError):
2749 _ErrorIf(True, constants.CV_ENODERPC, node,
2750 "node returned invalid nodeinfo, check hypervisor")
2752 # FIXME: devise a free space model for file based instances as well
2753 if vg_name is not None:
2754 test = (constants.NV_VGLIST not in nresult or
2755 vg_name not in nresult[constants.NV_VGLIST])
2756 _ErrorIf(test, constants.CV_ENODELVM, node,
2757 "node didn't return data for the volume group '%s'"
2758 " - it is either missing or broken", vg_name)
2761 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2762 except (ValueError, TypeError):
2763 _ErrorIf(True, constants.CV_ENODERPC, node,
2764 "node returned invalid LVM info, check LVM status")
2766 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2767 """Gets per-disk status information for all instances.
2769 @type nodelist: list of strings
2770 @param nodelist: Node names
2771 @type node_image: dict of (name, L{objects.Node})
2772 @param node_image: Node objects
2773 @type instanceinfo: dict of (name, L{objects.Instance})
2774 @param instanceinfo: Instance objects
2775 @rtype: {instance: {node: [(succes, payload)]}}
2776 @return: a dictionary of per-instance dictionaries with nodes as
2777 keys and disk information as values; the disk information is a
2778 list of tuples (success, payload)
2781 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2784 node_disks_devonly = {}
2785 diskless_instances = set()
2786 diskless = constants.DT_DISKLESS
2788 for nname in nodelist:
2789 node_instances = list(itertools.chain(node_image[nname].pinst,
2790 node_image[nname].sinst))
2791 diskless_instances.update(inst for inst in node_instances
2792 if instanceinfo[inst].disk_template == diskless)
2793 disks = [(inst, disk)
2794 for inst in node_instances
2795 for disk in instanceinfo[inst].disks]
2798 # No need to collect data
2801 node_disks[nname] = disks
2803 # Creating copies as SetDiskID below will modify the objects and that can
2804 # lead to incorrect data returned from nodes
2805 devonly = [dev.Copy() for (_, dev) in disks]
2808 self.cfg.SetDiskID(dev, nname)
2810 node_disks_devonly[nname] = devonly
2812 assert len(node_disks) == len(node_disks_devonly)
2814 # Collect data from all nodes with disks
2815 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2818 assert len(result) == len(node_disks)
2822 for (nname, nres) in result.items():
2823 disks = node_disks[nname]
2826 # No data from this node
2827 data = len(disks) * [(False, "node offline")]
2830 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2831 "while getting disk information: %s", msg)
2833 # No data from this node
2834 data = len(disks) * [(False, msg)]
2837 for idx, i in enumerate(nres.payload):
2838 if isinstance(i, (tuple, list)) and len(i) == 2:
2841 logging.warning("Invalid result from node %s, entry %d: %s",
2843 data.append((False, "Invalid result from the remote node"))
2845 for ((inst, _), status) in zip(disks, data):
2846 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2848 # Add empty entries for diskless instances.
2849 for inst in diskless_instances:
2850 assert inst not in instdisk
2853 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2854 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2855 compat.all(isinstance(s, (tuple, list)) and
2856 len(s) == 2 for s in statuses)
2857 for inst, nnames in instdisk.items()
2858 for nname, statuses in nnames.items())
2859 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2864 def _SshNodeSelector(group_uuid, all_nodes):
2865 """Create endless iterators for all potential SSH check hosts.
2868 nodes = [node for node in all_nodes
2869 if (node.group != group_uuid and
2871 keyfunc = operator.attrgetter("group")
2873 return map(itertools.cycle,
2874 [sorted(map(operator.attrgetter("name"), names))
2875 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2879 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2880 """Choose which nodes should talk to which other nodes.
2882 We will make nodes contact all nodes in their group, and one node from
2885 @warning: This algorithm has a known issue if one node group is much
2886 smaller than others (e.g. just one node). In such a case all other
2887 nodes will talk to the single node.
2890 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2891 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2893 return (online_nodes,
2894 dict((name, sorted([i.next() for i in sel]))
2895 for name in online_nodes))
2897 def BuildHooksEnv(self):
2900 Cluster-Verify hooks just ran in the post phase and their failure makes
2901 the output be logged in the verify output and the verification to fail.
2905 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2908 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2909 for node in self.my_node_info.values())
2913 def BuildHooksNodes(self):
2914 """Build hooks nodes.
2917 return ([], self.my_node_names)
2919 def Exec(self, feedback_fn):
2920 """Verify integrity of the node group, performing various test on nodes.
2923 # This method has too many local variables. pylint: disable=R0914
2924 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2926 if not self.my_node_names:
2928 feedback_fn("* Empty node group, skipping verification")
2932 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2933 verbose = self.op.verbose
2934 self._feedback_fn = feedback_fn
2936 vg_name = self.cfg.GetVGName()
2937 drbd_helper = self.cfg.GetDRBDHelper()
2938 cluster = self.cfg.GetClusterInfo()
2939 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2940 hypervisors = cluster.enabled_hypervisors
2941 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2943 i_non_redundant = [] # Non redundant instances
2944 i_non_a_balanced = [] # Non auto-balanced instances
2945 i_offline = 0 # Count of offline instances
2946 n_offline = 0 # Count of offline nodes
2947 n_drained = 0 # Count of nodes being drained
2948 node_vol_should = {}
2950 # FIXME: verify OS list
2953 filemap = _ComputeAncillaryFiles(cluster, False)
2955 # do local checksums
2956 master_node = self.master_node = self.cfg.GetMasterNode()
2957 master_ip = self.cfg.GetMasterIP()
2959 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2962 if self.cfg.GetUseExternalMipScript():
2963 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2965 node_verify_param = {
2966 constants.NV_FILELIST:
2967 utils.UniqueSequence(filename
2968 for files in filemap
2969 for filename in files),
2970 constants.NV_NODELIST:
2971 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2972 self.all_node_info.values()),
2973 constants.NV_HYPERVISOR: hypervisors,
2974 constants.NV_HVPARAMS:
2975 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2976 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2977 for node in node_data_list
2978 if not node.offline],
2979 constants.NV_INSTANCELIST: hypervisors,
2980 constants.NV_VERSION: None,
2981 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2982 constants.NV_NODESETUP: None,
2983 constants.NV_TIME: None,
2984 constants.NV_MASTERIP: (master_node, master_ip),
2985 constants.NV_OSLIST: None,
2986 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2987 constants.NV_USERSCRIPTS: user_scripts,
2990 if vg_name is not None:
2991 node_verify_param[constants.NV_VGLIST] = None
2992 node_verify_param[constants.NV_LVLIST] = vg_name
2993 node_verify_param[constants.NV_PVLIST] = [vg_name]
2994 node_verify_param[constants.NV_DRBDLIST] = None
2997 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3000 # FIXME: this needs to be changed per node-group, not cluster-wide
3002 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3003 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3004 bridges.add(default_nicpp[constants.NIC_LINK])
3005 for instance in self.my_inst_info.values():
3006 for nic in instance.nics:
3007 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3008 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3009 bridges.add(full_nic[constants.NIC_LINK])
3012 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3014 # Build our expected cluster state
3015 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3017 vm_capable=node.vm_capable))
3018 for node in node_data_list)
3022 for node in self.all_node_info.values():
3023 path = _SupportsOob(self.cfg, node)
3024 if path and path not in oob_paths:
3025 oob_paths.append(path)
3028 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3030 for instance in self.my_inst_names:
3031 inst_config = self.my_inst_info[instance]
3033 for nname in inst_config.all_nodes:
3034 if nname not in node_image:
3035 gnode = self.NodeImage(name=nname)
3036 gnode.ghost = (nname not in self.all_node_info)
3037 node_image[nname] = gnode
3039 inst_config.MapLVsByNode(node_vol_should)
3041 pnode = inst_config.primary_node
3042 node_image[pnode].pinst.append(instance)
3044 for snode in inst_config.secondary_nodes:
3045 nimg = node_image[snode]
3046 nimg.sinst.append(instance)
3047 if pnode not in nimg.sbp:
3048 nimg.sbp[pnode] = []
3049 nimg.sbp[pnode].append(instance)
3051 # At this point, we have the in-memory data structures complete,
3052 # except for the runtime information, which we'll gather next
3054 # Due to the way our RPC system works, exact response times cannot be
3055 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3056 # time before and after executing the request, we can at least have a time
3058 nvinfo_starttime = time.time()
3059 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3061 self.cfg.GetClusterName())
3062 nvinfo_endtime = time.time()
3064 if self.extra_lv_nodes and vg_name is not None:
3066 self.rpc.call_node_verify(self.extra_lv_nodes,
3067 {constants.NV_LVLIST: vg_name},
3068 self.cfg.GetClusterName())
3070 extra_lv_nvinfo = {}
3072 all_drbd_map = self.cfg.ComputeDRBDMap()
3074 feedback_fn("* Gathering disk information (%s nodes)" %
3075 len(self.my_node_names))
3076 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3079 feedback_fn("* Verifying configuration file consistency")
3081 # If not all nodes are being checked, we need to make sure the master node
3082 # and a non-checked vm_capable node are in the list.
3083 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3085 vf_nvinfo = all_nvinfo.copy()
3086 vf_node_info = list(self.my_node_info.values())
3087 additional_nodes = []
3088 if master_node not in self.my_node_info:
3089 additional_nodes.append(master_node)
3090 vf_node_info.append(self.all_node_info[master_node])
3091 # Add the first vm_capable node we find which is not included
3092 for node in absent_nodes:
3093 nodeinfo = self.all_node_info[node]
3094 if nodeinfo.vm_capable and not nodeinfo.offline:
3095 additional_nodes.append(node)
3096 vf_node_info.append(self.all_node_info[node])
3098 key = constants.NV_FILELIST
3099 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3100 {key: node_verify_param[key]},
3101 self.cfg.GetClusterName()))
3103 vf_nvinfo = all_nvinfo
3104 vf_node_info = self.my_node_info.values()
3106 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3108 feedback_fn("* Verifying node status")
3112 for node_i in node_data_list:
3114 nimg = node_image[node]
3118 feedback_fn("* Skipping offline node %s" % (node,))
3122 if node == master_node:
3124 elif node_i.master_candidate:
3125 ntype = "master candidate"
3126 elif node_i.drained:
3132 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3134 msg = all_nvinfo[node].fail_msg
3135 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3138 nimg.rpc_fail = True
3141 nresult = all_nvinfo[node].payload
3143 nimg.call_ok = self._VerifyNode(node_i, nresult)
3144 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3145 self._VerifyNodeNetwork(node_i, nresult)
3146 self._VerifyNodeUserScripts(node_i, nresult)
3147 self._VerifyOob(node_i, nresult)
3150 self._VerifyNodeLVM(node_i, nresult, vg_name)
3151 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3154 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3155 self._UpdateNodeInstances(node_i, nresult, nimg)
3156 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3157 self._UpdateNodeOS(node_i, nresult, nimg)
3159 if not nimg.os_fail:
3160 if refos_img is None:
3162 self._VerifyNodeOS(node_i, nimg, refos_img)
3163 self._VerifyNodeBridges(node_i, nresult, bridges)
3165 # Check whether all running instancies are primary for the node. (This
3166 # can no longer be done from _VerifyInstance below, since some of the
3167 # wrong instances could be from other node groups.)
3168 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3170 for inst in non_primary_inst:
3171 # FIXME: investigate best way to handle offline insts
3172 if inst.admin_state == constants.ADMINST_OFFLINE:
3174 feedback_fn("* Skipping offline instance %s" % inst.name)
3177 test = inst in self.all_inst_info
3178 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3179 "instance should not run on node %s", node_i.name)
3180 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3181 "node is running unknown instance %s", inst)
3183 for node, result in extra_lv_nvinfo.items():
3184 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3185 node_image[node], vg_name)
3187 feedback_fn("* Verifying instance status")
3188 for instance in self.my_inst_names:
3190 feedback_fn("* Verifying instance %s" % instance)
3191 inst_config = self.my_inst_info[instance]
3192 self._VerifyInstance(instance, inst_config, node_image,
3194 inst_nodes_offline = []
3196 pnode = inst_config.primary_node
3197 pnode_img = node_image[pnode]
3198 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3199 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3200 " primary node failed", instance)
3202 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3204 constants.CV_EINSTANCEBADNODE, instance,
3205 "instance is marked as running and lives on offline node %s",
3206 inst_config.primary_node)
3208 # If the instance is non-redundant we cannot survive losing its primary
3209 # node, so we are not N+1 compliant. On the other hand we have no disk
3210 # templates with more than one secondary so that situation is not well
3212 # FIXME: does not support file-backed instances
3213 if not inst_config.secondary_nodes:
3214 i_non_redundant.append(instance)
3216 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3217 constants.CV_EINSTANCELAYOUT,
3218 instance, "instance has multiple secondary nodes: %s",
3219 utils.CommaJoin(inst_config.secondary_nodes),
3220 code=self.ETYPE_WARNING)
3222 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3223 pnode = inst_config.primary_node
3224 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3225 instance_groups = {}
3227 for node in instance_nodes:
3228 instance_groups.setdefault(self.all_node_info[node].group,
3232 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3233 # Sort so that we always list the primary node first.
3234 for group, nodes in sorted(instance_groups.items(),
3235 key=lambda (_, nodes): pnode in nodes,
3238 self._ErrorIf(len(instance_groups) > 1,
3239 constants.CV_EINSTANCESPLITGROUPS,
3240 instance, "instance has primary and secondary nodes in"
3241 " different groups: %s", utils.CommaJoin(pretty_list),
3242 code=self.ETYPE_WARNING)
3244 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3245 i_non_a_balanced.append(instance)
3247 for snode in inst_config.secondary_nodes:
3248 s_img = node_image[snode]
3249 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3250 snode, "instance %s, connection to secondary node failed",
3254 inst_nodes_offline.append(snode)
3256 # warn that the instance lives on offline nodes
3257 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3258 "instance has offline secondary node(s) %s",
3259 utils.CommaJoin(inst_nodes_offline))
3260 # ... or ghost/non-vm_capable nodes
3261 for node in inst_config.all_nodes:
3262 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3263 instance, "instance lives on ghost node %s", node)
3264 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3265 instance, "instance lives on non-vm_capable node %s", node)
3267 feedback_fn("* Verifying orphan volumes")
3268 reserved = utils.FieldSet(*cluster.reserved_lvs)
3270 # We will get spurious "unknown volume" warnings if any node of this group
3271 # is secondary for an instance whose primary is in another group. To avoid
3272 # them, we find these instances and add their volumes to node_vol_should.
3273 for inst in self.all_inst_info.values():
3274 for secondary in inst.secondary_nodes:
3275 if (secondary in self.my_node_info
3276 and inst.name not in self.my_inst_info):
3277 inst.MapLVsByNode(node_vol_should)
3280 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3282 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3283 feedback_fn("* Verifying N+1 Memory redundancy")
3284 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3286 feedback_fn("* Other Notes")
3288 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3289 % len(i_non_redundant))
3291 if i_non_a_balanced:
3292 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3293 % len(i_non_a_balanced))
3296 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3299 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3302 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3306 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3307 """Analyze the post-hooks' result
3309 This method analyses the hook result, handles it, and sends some
3310 nicely-formatted feedback back to the user.
3312 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3313 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3314 @param hooks_results: the results of the multi-node hooks rpc call
3315 @param feedback_fn: function used send feedback back to the caller
3316 @param lu_result: previous Exec result
3317 @return: the new Exec result, based on the previous result
3321 # We only really run POST phase hooks, only for non-empty groups,
3322 # and are only interested in their results
3323 if not self.my_node_names:
3326 elif phase == constants.HOOKS_PHASE_POST:
3327 # Used to change hooks' output to proper indentation
3328 feedback_fn("* Hooks Results")
3329 assert hooks_results, "invalid result from hooks"
3331 for node_name in hooks_results:
3332 res = hooks_results[node_name]
3334 test = msg and not res.offline
3335 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3336 "Communication failure in hooks execution: %s", msg)
3337 if res.offline or msg:
3338 # No need to investigate payload if node is offline or gave
3341 for script, hkr, output in res.payload:
3342 test = hkr == constants.HKR_FAIL
3343 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3344 "Script %s failed, output:", script)
3346 output = self._HOOKS_INDENT_RE.sub(" ", output)
3347 feedback_fn("%s" % output)
3353 class LUClusterVerifyDisks(NoHooksLU):
3354 """Verifies the cluster disks status.
3359 def ExpandNames(self):
3360 self.share_locks = _ShareAll()
3361 self.needed_locks = {
3362 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3365 def Exec(self, feedback_fn):
3366 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3368 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3369 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3370 for group in group_names])
3373 class LUGroupVerifyDisks(NoHooksLU):
3374 """Verifies the status of all disks in a node group.
3379 def ExpandNames(self):
3380 # Raises errors.OpPrereqError on its own if group can't be found
3381 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3383 self.share_locks = _ShareAll()
3384 self.needed_locks = {
3385 locking.LEVEL_INSTANCE: [],
3386 locking.LEVEL_NODEGROUP: [],
3387 locking.LEVEL_NODE: [],
3390 def DeclareLocks(self, level):
3391 if level == locking.LEVEL_INSTANCE:
3392 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3394 # Lock instances optimistically, needs verification once node and group
3395 # locks have been acquired
3396 self.needed_locks[locking.LEVEL_INSTANCE] = \
3397 self.cfg.GetNodeGroupInstances(self.group_uuid)
3399 elif level == locking.LEVEL_NODEGROUP:
3400 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3402 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3403 set([self.group_uuid] +
3404 # Lock all groups used by instances optimistically; this requires
3405 # going via the node before it's locked, requiring verification
3408 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3409 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3411 elif level == locking.LEVEL_NODE:
3412 # This will only lock the nodes in the group to be verified which contain
3414 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3415 self._LockInstancesNodes()
3417 # Lock all nodes in group to be verified
3418 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3419 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3420 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3422 def CheckPrereq(self):
3423 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3424 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3425 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3427 assert self.group_uuid in owned_groups
3429 # Check if locked instances are still correct
3430 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3432 # Get instance information
3433 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3435 # Check if node groups for locked instances are still correct
3436 for (instance_name, inst) in self.instances.items():
3437 assert owned_nodes.issuperset(inst.all_nodes), \
3438 "Instance %s's nodes changed while we kept the lock" % instance_name
3440 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3443 assert self.group_uuid in inst_groups, \
3444 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3446 def Exec(self, feedback_fn):
3447 """Verify integrity of cluster disks.
3449 @rtype: tuple of three items
3450 @return: a tuple of (dict of node-to-node_error, list of instances
3451 which need activate-disks, dict of instance: (node, volume) for
3456 res_instances = set()
3459 nv_dict = _MapInstanceDisksToNodes([inst
3460 for inst in self.instances.values()
3461 if inst.admin_state == constants.ADMINST_UP])
3464 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3465 set(self.cfg.GetVmCapableNodeList()))
3467 node_lvs = self.rpc.call_lv_list(nodes, [])
3469 for (node, node_res) in node_lvs.items():
3470 if node_res.offline:
3473 msg = node_res.fail_msg
3475 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3476 res_nodes[node] = msg
3479 for lv_name, (_, _, lv_online) in node_res.payload.items():
3480 inst = nv_dict.pop((node, lv_name), None)
3481 if not (lv_online or inst is None):
3482 res_instances.add(inst)
3484 # any leftover items in nv_dict are missing LVs, let's arrange the data
3486 for key, inst in nv_dict.iteritems():
3487 res_missing.setdefault(inst, []).append(list(key))
3489 return (res_nodes, list(res_instances), res_missing)
3492 class LUClusterRepairDiskSizes(NoHooksLU):
3493 """Verifies the cluster disks sizes.
3498 def ExpandNames(self):
3499 if self.op.instances:
3500 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3501 self.needed_locks = {
3502 locking.LEVEL_NODE_RES: [],
3503 locking.LEVEL_INSTANCE: self.wanted_names,
3505 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3507 self.wanted_names = None
3508 self.needed_locks = {
3509 locking.LEVEL_NODE_RES: locking.ALL_SET,
3510 locking.LEVEL_INSTANCE: locking.ALL_SET,
3512 self.share_locks = {
3513 locking.LEVEL_NODE_RES: 1,
3514 locking.LEVEL_INSTANCE: 0,
3517 def DeclareLocks(self, level):
3518 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3519 self._LockInstancesNodes(primary_only=True, level=level)
3521 def CheckPrereq(self):
3522 """Check prerequisites.
3524 This only checks the optional instance list against the existing names.
3527 if self.wanted_names is None:
3528 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3530 self.wanted_instances = \
3531 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3533 def _EnsureChildSizes(self, disk):
3534 """Ensure children of the disk have the needed disk size.
3536 This is valid mainly for DRBD8 and fixes an issue where the
3537 children have smaller disk size.
3539 @param disk: an L{ganeti.objects.Disk} object
3542 if disk.dev_type == constants.LD_DRBD8:
3543 assert disk.children, "Empty children for DRBD8?"
3544 fchild = disk.children[0]
3545 mismatch = fchild.size < disk.size
3547 self.LogInfo("Child disk has size %d, parent %d, fixing",
3548 fchild.size, disk.size)
3549 fchild.size = disk.size
3551 # and we recurse on this child only, not on the metadev
3552 return self._EnsureChildSizes(fchild) or mismatch
3556 def Exec(self, feedback_fn):
3557 """Verify the size of cluster disks.
3560 # TODO: check child disks too
3561 # TODO: check differences in size between primary/secondary nodes
3563 for instance in self.wanted_instances:
3564 pnode = instance.primary_node
3565 if pnode not in per_node_disks:
3566 per_node_disks[pnode] = []
3567 for idx, disk in enumerate(instance.disks):
3568 per_node_disks[pnode].append((instance, idx, disk))
3570 assert not (frozenset(per_node_disks.keys()) -
3571 self.owned_locks(locking.LEVEL_NODE_RES)), \
3572 "Not owning correct locks"
3573 assert not self.owned_locks(locking.LEVEL_NODE)
3576 for node, dskl in per_node_disks.items():
3577 newl = [v[2].Copy() for v in dskl]
3579 self.cfg.SetDiskID(dsk, node)
3580 result = self.rpc.call_blockdev_getsize(node, newl)
3582 self.LogWarning("Failure in blockdev_getsize call to node"
3583 " %s, ignoring", node)
3585 if len(result.payload) != len(dskl):
3586 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3587 " result.payload=%s", node, len(dskl), result.payload)
3588 self.LogWarning("Invalid result from node %s, ignoring node results",
3591 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3593 self.LogWarning("Disk %d of instance %s did not return size"
3594 " information, ignoring", idx, instance.name)
3596 if not isinstance(size, (int, long)):
3597 self.LogWarning("Disk %d of instance %s did not return valid"
3598 " size information, ignoring", idx, instance.name)
3601 if size != disk.size:
3602 self.LogInfo("Disk %d of instance %s has mismatched size,"
3603 " correcting: recorded %d, actual %d", idx,
3604 instance.name, disk.size, size)
3606 self.cfg.Update(instance, feedback_fn)
3607 changed.append((instance.name, idx, size))
3608 if self._EnsureChildSizes(disk):
3609 self.cfg.Update(instance, feedback_fn)
3610 changed.append((instance.name, idx, disk.size))
3614 class LUClusterRename(LogicalUnit):
3615 """Rename the cluster.
3618 HPATH = "cluster-rename"
3619 HTYPE = constants.HTYPE_CLUSTER
3621 def BuildHooksEnv(self):
3626 "OP_TARGET": self.cfg.GetClusterName(),
3627 "NEW_NAME": self.op.name,
3630 def BuildHooksNodes(self):
3631 """Build hooks nodes.
3634 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3636 def CheckPrereq(self):
3637 """Verify that the passed name is a valid one.
3640 hostname = netutils.GetHostname(name=self.op.name,
3641 family=self.cfg.GetPrimaryIPFamily())
3643 new_name = hostname.name
3644 self.ip = new_ip = hostname.ip
3645 old_name = self.cfg.GetClusterName()
3646 old_ip = self.cfg.GetMasterIP()
3647 if new_name == old_name and new_ip == old_ip:
3648 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3649 " cluster has changed",
3651 if new_ip != old_ip:
3652 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3653 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3654 " reachable on the network" %
3655 new_ip, errors.ECODE_NOTUNIQUE)
3657 self.op.name = new_name
3659 def Exec(self, feedback_fn):
3660 """Rename the cluster.
3663 clustername = self.op.name
3666 # shutdown the master IP
3667 master_params = self.cfg.GetMasterNetworkParameters()
3668 ems = self.cfg.GetUseExternalMipScript()
3669 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3671 result.Raise("Could not disable the master role")
3674 cluster = self.cfg.GetClusterInfo()
3675 cluster.cluster_name = clustername
3676 cluster.master_ip = new_ip
3677 self.cfg.Update(cluster, feedback_fn)
3679 # update the known hosts file
3680 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3681 node_list = self.cfg.GetOnlineNodeList()
3683 node_list.remove(master_params.name)
3686 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3688 master_params.ip = new_ip
3689 result = self.rpc.call_node_activate_master_ip(master_params.name,
3691 msg = result.fail_msg
3693 self.LogWarning("Could not re-enable the master role on"
3694 " the master, please restart manually: %s", msg)
3699 def _ValidateNetmask(cfg, netmask):
3700 """Checks if a netmask is valid.
3702 @type cfg: L{config.ConfigWriter}
3703 @param cfg: The cluster configuration
3705 @param netmask: the netmask to be verified
3706 @raise errors.OpPrereqError: if the validation fails
3709 ip_family = cfg.GetPrimaryIPFamily()
3711 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3712 except errors.ProgrammerError:
3713 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3715 if not ipcls.ValidateNetmask(netmask):
3716 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3720 class LUClusterSetParams(LogicalUnit):
3721 """Change the parameters of the cluster.
3724 HPATH = "cluster-modify"
3725 HTYPE = constants.HTYPE_CLUSTER
3728 def CheckArguments(self):
3732 if self.op.uid_pool:
3733 uidpool.CheckUidPool(self.op.uid_pool)
3735 if self.op.add_uids:
3736 uidpool.CheckUidPool(self.op.add_uids)
3738 if self.op.remove_uids:
3739 uidpool.CheckUidPool(self.op.remove_uids)
3741 if self.op.master_netmask is not None:
3742 _ValidateNetmask(self.cfg, self.op.master_netmask)
3744 if self.op.diskparams:
3745 for dt_params in self.op.diskparams.values():
3746 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3748 def ExpandNames(self):
3749 # FIXME: in the future maybe other cluster params won't require checking on
3750 # all nodes to be modified.
3751 self.needed_locks = {
3752 locking.LEVEL_NODE: locking.ALL_SET,
3754 self.share_locks[locking.LEVEL_NODE] = 1
3756 def BuildHooksEnv(self):
3761 "OP_TARGET": self.cfg.GetClusterName(),
3762 "NEW_VG_NAME": self.op.vg_name,
3765 def BuildHooksNodes(self):
3766 """Build hooks nodes.
3769 mn = self.cfg.GetMasterNode()
3772 def CheckPrereq(self):
3773 """Check prerequisites.
3775 This checks whether the given params don't conflict and
3776 if the given volume group is valid.
3779 if self.op.vg_name is not None and not self.op.vg_name:
3780 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3781 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3782 " instances exist", errors.ECODE_INVAL)
3784 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3785 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3786 raise errors.OpPrereqError("Cannot disable drbd helper while"
3787 " drbd-based instances exist",
3790 node_list = self.owned_locks(locking.LEVEL_NODE)
3792 # if vg_name not None, checks given volume group on all nodes
3794 vglist = self.rpc.call_vg_list(node_list)
3795 for node in node_list:
3796 msg = vglist[node].fail_msg
3798 # ignoring down node
3799 self.LogWarning("Error while gathering data on node %s"
3800 " (ignoring node): %s", node, msg)
3802 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3804 constants.MIN_VG_SIZE)
3806 raise errors.OpPrereqError("Error on node '%s': %s" %
3807 (node, vgstatus), errors.ECODE_ENVIRON)
3809 if self.op.drbd_helper:
3810 # checks given drbd helper on all nodes
3811 helpers = self.rpc.call_drbd_helper(node_list)
3812 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3814 self.LogInfo("Not checking drbd helper on offline node %s", node)
3816 msg = helpers[node].fail_msg
3818 raise errors.OpPrereqError("Error checking drbd helper on node"
3819 " '%s': %s" % (node, msg),
3820 errors.ECODE_ENVIRON)
3821 node_helper = helpers[node].payload
3822 if node_helper != self.op.drbd_helper:
3823 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3824 (node, node_helper), errors.ECODE_ENVIRON)
3826 self.cluster = cluster = self.cfg.GetClusterInfo()
3827 # validate params changes
3828 if self.op.beparams:
3829 objects.UpgradeBeParams(self.op.beparams)
3830 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3831 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3833 if self.op.ndparams:
3834 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3835 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3837 # TODO: we need a more general way to handle resetting
3838 # cluster-level parameters to default values
3839 if self.new_ndparams["oob_program"] == "":
3840 self.new_ndparams["oob_program"] = \
3841 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3843 if self.op.hv_state:
3844 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3845 self.cluster.hv_state_static)
3846 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3847 for hv, values in new_hv_state.items())
3849 if self.op.disk_state:
3850 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3851 self.cluster.disk_state_static)
3852 self.new_disk_state = \
3853 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3854 for name, values in svalues.items()))
3855 for storage, svalues in new_disk_state.items())
3859 for key, value in self.op.ipolicy.items():
3860 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3861 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3863 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3864 self.new_ipolicy = ipolicy
3866 if self.op.nicparams:
3867 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3868 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3869 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3872 # check all instances for consistency
3873 for instance in self.cfg.GetAllInstancesInfo().values():
3874 for nic_idx, nic in enumerate(instance.nics):
3875 params_copy = copy.deepcopy(nic.nicparams)
3876 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3878 # check parameter syntax
3880 objects.NIC.CheckParameterSyntax(params_filled)
3881 except errors.ConfigurationError, err:
3882 nic_errors.append("Instance %s, nic/%d: %s" %
3883 (instance.name, nic_idx, err))
3885 # if we're moving instances to routed, check that they have an ip
3886 target_mode = params_filled[constants.NIC_MODE]
3887 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3888 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3889 " address" % (instance.name, nic_idx))
3891 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3892 "\n".join(nic_errors))
3894 # hypervisor list/parameters
3895 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3896 if self.op.hvparams:
3897 for hv_name, hv_dict in self.op.hvparams.items():
3898 if hv_name not in self.new_hvparams:
3899 self.new_hvparams[hv_name] = hv_dict
3901 self.new_hvparams[hv_name].update(hv_dict)
3903 # disk template parameters
3904 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3905 if self.op.diskparams:
3906 for dt_name, dt_params in self.op.diskparams.items():
3907 if dt_name not in self.op.diskparams:
3908 self.new_diskparams[dt_name] = dt_params
3910 self.new_diskparams[dt_name].update(dt_params)
3912 # os hypervisor parameters
3913 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3915 for os_name, hvs in self.op.os_hvp.items():
3916 if os_name not in self.new_os_hvp:
3917 self.new_os_hvp[os_name] = hvs
3919 for hv_name, hv_dict in hvs.items():
3920 if hv_name not in self.new_os_hvp[os_name]:
3921 self.new_os_hvp[os_name][hv_name] = hv_dict
3923 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3926 self.new_osp = objects.FillDict(cluster.osparams, {})
3927 if self.op.osparams:
3928 for os_name, osp in self.op.osparams.items():
3929 if os_name not in self.new_osp:
3930 self.new_osp[os_name] = {}
3932 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3935 if not self.new_osp[os_name]:
3936 # we removed all parameters
3937 del self.new_osp[os_name]
3939 # check the parameter validity (remote check)
3940 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3941 os_name, self.new_osp[os_name])
3943 # changes to the hypervisor list
3944 if self.op.enabled_hypervisors is not None:
3945 self.hv_list = self.op.enabled_hypervisors
3946 for hv in self.hv_list:
3947 # if the hypervisor doesn't already exist in the cluster
3948 # hvparams, we initialize it to empty, and then (in both
3949 # cases) we make sure to fill the defaults, as we might not
3950 # have a complete defaults list if the hypervisor wasn't
3952 if hv not in new_hvp:
3954 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3955 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3957 self.hv_list = cluster.enabled_hypervisors
3959 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3960 # either the enabled list has changed, or the parameters have, validate
3961 for hv_name, hv_params in self.new_hvparams.items():
3962 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3963 (self.op.enabled_hypervisors and
3964 hv_name in self.op.enabled_hypervisors)):
3965 # either this is a new hypervisor, or its parameters have changed
3966 hv_class = hypervisor.GetHypervisor(hv_name)
3967 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3968 hv_class.CheckParameterSyntax(hv_params)
3969 _CheckHVParams(self, node_list, hv_name, hv_params)
3972 # no need to check any newly-enabled hypervisors, since the
3973 # defaults have already been checked in the above code-block
3974 for os_name, os_hvp in self.new_os_hvp.items():
3975 for hv_name, hv_params in os_hvp.items():
3976 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3977 # we need to fill in the new os_hvp on top of the actual hv_p
3978 cluster_defaults = self.new_hvparams.get(hv_name, {})
3979 new_osp = objects.FillDict(cluster_defaults, hv_params)
3980 hv_class = hypervisor.GetHypervisor(hv_name)
3981 hv_class.CheckParameterSyntax(new_osp)
3982 _CheckHVParams(self, node_list, hv_name, new_osp)
3984 if self.op.default_iallocator:
3985 alloc_script = utils.FindFile(self.op.default_iallocator,
3986 constants.IALLOCATOR_SEARCH_PATH,
3988 if alloc_script is None:
3989 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3990 " specified" % self.op.default_iallocator,
3993 def Exec(self, feedback_fn):
3994 """Change the parameters of the cluster.
3997 if self.op.vg_name is not None:
3998 new_volume = self.op.vg_name
4001 if new_volume != self.cfg.GetVGName():
4002 self.cfg.SetVGName(new_volume)
4004 feedback_fn("Cluster LVM configuration already in desired"
4005 " state, not changing")
4006 if self.op.drbd_helper is not None:
4007 new_helper = self.op.drbd_helper
4010 if new_helper != self.cfg.GetDRBDHelper():
4011 self.cfg.SetDRBDHelper(new_helper)
4013 feedback_fn("Cluster DRBD helper already in desired state,"
4015 if self.op.hvparams:
4016 self.cluster.hvparams = self.new_hvparams
4018 self.cluster.os_hvp = self.new_os_hvp
4019 if self.op.enabled_hypervisors is not None:
4020 self.cluster.hvparams = self.new_hvparams
4021 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4022 if self.op.beparams:
4023 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4024 if self.op.nicparams:
4025 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4027 self.cluster.ipolicy = self.new_ipolicy
4028 if self.op.osparams:
4029 self.cluster.osparams = self.new_osp
4030 if self.op.ndparams:
4031 self.cluster.ndparams = self.new_ndparams
4032 if self.op.diskparams:
4033 self.cluster.diskparams = self.new_diskparams
4034 if self.op.hv_state:
4035 self.cluster.hv_state_static = self.new_hv_state
4036 if self.op.disk_state:
4037 self.cluster.disk_state_static = self.new_disk_state
4039 if self.op.candidate_pool_size is not None:
4040 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4041 # we need to update the pool size here, otherwise the save will fail
4042 _AdjustCandidatePool(self, [])
4044 if self.op.maintain_node_health is not None:
4045 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4046 feedback_fn("Note: CONFD was disabled at build time, node health"
4047 " maintenance is not useful (still enabling it)")
4048 self.cluster.maintain_node_health = self.op.maintain_node_health
4050 if self.op.prealloc_wipe_disks is not None:
4051 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4053 if self.op.add_uids is not None:
4054 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4056 if self.op.remove_uids is not None:
4057 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4059 if self.op.uid_pool is not None:
4060 self.cluster.uid_pool = self.op.uid_pool
4062 if self.op.default_iallocator is not None:
4063 self.cluster.default_iallocator = self.op.default_iallocator
4065 if self.op.reserved_lvs is not None:
4066 self.cluster.reserved_lvs = self.op.reserved_lvs
4068 if self.op.use_external_mip_script is not None:
4069 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4071 def helper_os(aname, mods, desc):
4073 lst = getattr(self.cluster, aname)
4074 for key, val in mods:
4075 if key == constants.DDM_ADD:
4077 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4080 elif key == constants.DDM_REMOVE:
4084 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4086 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4088 if self.op.hidden_os:
4089 helper_os("hidden_os", self.op.hidden_os, "hidden")
4091 if self.op.blacklisted_os:
4092 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4094 if self.op.master_netdev:
4095 master_params = self.cfg.GetMasterNetworkParameters()
4096 ems = self.cfg.GetUseExternalMipScript()
4097 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4098 self.cluster.master_netdev)
4099 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4101 result.Raise("Could not disable the master ip")
4102 feedback_fn("Changing master_netdev from %s to %s" %
4103 (master_params.netdev, self.op.master_netdev))
4104 self.cluster.master_netdev = self.op.master_netdev
4106 if self.op.master_netmask:
4107 master_params = self.cfg.GetMasterNetworkParameters()
4108 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4109 result = self.rpc.call_node_change_master_netmask(master_params.name,
4110 master_params.netmask,
4111 self.op.master_netmask,
4113 master_params.netdev)
4115 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4118 self.cluster.master_netmask = self.op.master_netmask
4120 self.cfg.Update(self.cluster, feedback_fn)
4122 if self.op.master_netdev:
4123 master_params = self.cfg.GetMasterNetworkParameters()
4124 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4125 self.op.master_netdev)
4126 ems = self.cfg.GetUseExternalMipScript()
4127 result = self.rpc.call_node_activate_master_ip(master_params.name,
4130 self.LogWarning("Could not re-enable the master ip on"
4131 " the master, please restart manually: %s",
4135 def _UploadHelper(lu, nodes, fname):
4136 """Helper for uploading a file and showing warnings.
4139 if os.path.exists(fname):
4140 result = lu.rpc.call_upload_file(nodes, fname)
4141 for to_node, to_result in result.items():
4142 msg = to_result.fail_msg
4144 msg = ("Copy of file %s to node %s failed: %s" %
4145 (fname, to_node, msg))
4146 lu.proc.LogWarning(msg)
4149 def _ComputeAncillaryFiles(cluster, redist):
4150 """Compute files external to Ganeti which need to be consistent.
4152 @type redist: boolean
4153 @param redist: Whether to include files which need to be redistributed
4156 # Compute files for all nodes
4158 constants.SSH_KNOWN_HOSTS_FILE,
4159 constants.CONFD_HMAC_KEY,
4160 constants.CLUSTER_DOMAIN_SECRET_FILE,
4161 constants.SPICE_CERT_FILE,
4162 constants.SPICE_CACERT_FILE,
4163 constants.RAPI_USERS_FILE,
4167 files_all.update(constants.ALL_CERT_FILES)
4168 files_all.update(ssconf.SimpleStore().GetFileList())
4170 # we need to ship at least the RAPI certificate
4171 files_all.add(constants.RAPI_CERT_FILE)
4173 if cluster.modify_etc_hosts:
4174 files_all.add(constants.ETC_HOSTS)
4176 # Files which are optional, these must:
4177 # - be present in one other category as well
4178 # - either exist or not exist on all nodes of that category (mc, vm all)
4180 constants.RAPI_USERS_FILE,
4183 # Files which should only be on master candidates
4187 files_mc.add(constants.CLUSTER_CONF_FILE)
4189 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4191 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4193 # Files which should only be on VM-capable nodes
4194 files_vm = set(filename
4195 for hv_name in cluster.enabled_hypervisors
4196 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4198 files_opt |= set(filename
4199 for hv_name in cluster.enabled_hypervisors
4200 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4202 # Filenames in each category must be unique
4203 all_files_set = files_all | files_mc | files_vm
4204 assert (len(all_files_set) ==
4205 sum(map(len, [files_all, files_mc, files_vm]))), \
4206 "Found file listed in more than one file list"
4208 # Optional files must be present in one other category
4209 assert all_files_set.issuperset(files_opt), \
4210 "Optional file not in a different required list"
4212 return (files_all, files_opt, files_mc, files_vm)
4215 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4216 """Distribute additional files which are part of the cluster configuration.
4218 ConfigWriter takes care of distributing the config and ssconf files, but
4219 there are more files which should be distributed to all nodes. This function
4220 makes sure those are copied.
4222 @param lu: calling logical unit
4223 @param additional_nodes: list of nodes not in the config to distribute to
4224 @type additional_vm: boolean
4225 @param additional_vm: whether the additional nodes are vm-capable or not
4228 # Gather target nodes
4229 cluster = lu.cfg.GetClusterInfo()
4230 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4232 online_nodes = lu.cfg.GetOnlineNodeList()
4233 vm_nodes = lu.cfg.GetVmCapableNodeList()
4235 if additional_nodes is not None:
4236 online_nodes.extend(additional_nodes)
4238 vm_nodes.extend(additional_nodes)
4240 # Never distribute to master node
4241 for nodelist in [online_nodes, vm_nodes]:
4242 if master_info.name in nodelist:
4243 nodelist.remove(master_info.name)
4246 (files_all, _, files_mc, files_vm) = \
4247 _ComputeAncillaryFiles(cluster, True)
4249 # Never re-distribute configuration file from here
4250 assert not (constants.CLUSTER_CONF_FILE in files_all or
4251 constants.CLUSTER_CONF_FILE in files_vm)
4252 assert not files_mc, "Master candidates not handled in this function"
4255 (online_nodes, files_all),
4256 (vm_nodes, files_vm),
4260 for (node_list, files) in filemap:
4262 _UploadHelper(lu, node_list, fname)
4265 class LUClusterRedistConf(NoHooksLU):
4266 """Force the redistribution of cluster configuration.
4268 This is a very simple LU.
4273 def ExpandNames(self):
4274 self.needed_locks = {
4275 locking.LEVEL_NODE: locking.ALL_SET,
4277 self.share_locks[locking.LEVEL_NODE] = 1
4279 def Exec(self, feedback_fn):
4280 """Redistribute the configuration.
4283 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4284 _RedistributeAncillaryFiles(self)
4287 class LUClusterActivateMasterIp(NoHooksLU):
4288 """Activate the master IP on the master node.
4291 def Exec(self, feedback_fn):
4292 """Activate the master IP.
4295 master_params = self.cfg.GetMasterNetworkParameters()
4296 ems = self.cfg.GetUseExternalMipScript()
4297 result = self.rpc.call_node_activate_master_ip(master_params.name,
4299 result.Raise("Could not activate the master IP")
4302 class LUClusterDeactivateMasterIp(NoHooksLU):
4303 """Deactivate the master IP on the master node.
4306 def Exec(self, feedback_fn):
4307 """Deactivate the master IP.
4310 master_params = self.cfg.GetMasterNetworkParameters()
4311 ems = self.cfg.GetUseExternalMipScript()
4312 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4314 result.Raise("Could not deactivate the master IP")
4317 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4318 """Sleep and poll for an instance's disk to sync.
4321 if not instance.disks or disks is not None and not disks:
4324 disks = _ExpandCheckDisks(instance, disks)
4327 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4329 node = instance.primary_node
4332 lu.cfg.SetDiskID(dev, node)
4334 # TODO: Convert to utils.Retry
4337 degr_retries = 10 # in seconds, as we sleep 1 second each time
4341 cumul_degraded = False
4342 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4343 msg = rstats.fail_msg
4345 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4348 raise errors.RemoteError("Can't contact node %s for mirror data,"
4349 " aborting." % node)
4352 rstats = rstats.payload
4354 for i, mstat in enumerate(rstats):
4356 lu.LogWarning("Can't compute data for node %s/%s",
4357 node, disks[i].iv_name)
4360 cumul_degraded = (cumul_degraded or
4361 (mstat.is_degraded and mstat.sync_percent is None))
4362 if mstat.sync_percent is not None:
4364 if mstat.estimated_time is not None:
4365 rem_time = ("%s remaining (estimated)" %
4366 utils.FormatSeconds(mstat.estimated_time))
4367 max_time = mstat.estimated_time
4369 rem_time = "no time estimate"
4370 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4371 (disks[i].iv_name, mstat.sync_percent, rem_time))
4373 # if we're done but degraded, let's do a few small retries, to
4374 # make sure we see a stable and not transient situation; therefore
4375 # we force restart of the loop
4376 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4377 logging.info("Degraded disks found, %d retries left", degr_retries)
4385 time.sleep(min(60, max_time))
4388 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4389 return not cumul_degraded
4392 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4393 """Check that mirrors are not degraded.
4395 The ldisk parameter, if True, will change the test from the
4396 is_degraded attribute (which represents overall non-ok status for
4397 the device(s)) to the ldisk (representing the local storage status).
4400 lu.cfg.SetDiskID(dev, node)
4404 if on_primary or dev.AssembleOnSecondary():
4405 rstats = lu.rpc.call_blockdev_find(node, dev)
4406 msg = rstats.fail_msg
4408 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4410 elif not rstats.payload:
4411 lu.LogWarning("Can't find disk on node %s", node)
4415 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4417 result = result and not rstats.payload.is_degraded
4420 for child in dev.children:
4421 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4426 class LUOobCommand(NoHooksLU):
4427 """Logical unit for OOB handling.
4431 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4433 def ExpandNames(self):
4434 """Gather locks we need.
4437 if self.op.node_names:
4438 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4439 lock_names = self.op.node_names
4441 lock_names = locking.ALL_SET
4443 self.needed_locks = {
4444 locking.LEVEL_NODE: lock_names,
4447 def CheckPrereq(self):
4448 """Check prerequisites.
4451 - the node exists in the configuration
4454 Any errors are signaled by raising errors.OpPrereqError.
4458 self.master_node = self.cfg.GetMasterNode()
4460 assert self.op.power_delay >= 0.0
4462 if self.op.node_names:
4463 if (self.op.command in self._SKIP_MASTER and
4464 self.master_node in self.op.node_names):
4465 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4466 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4468 if master_oob_handler:
4469 additional_text = ("run '%s %s %s' if you want to operate on the"
4470 " master regardless") % (master_oob_handler,
4474 additional_text = "it does not support out-of-band operations"
4476 raise errors.OpPrereqError(("Operating on the master node %s is not"
4477 " allowed for %s; %s") %
4478 (self.master_node, self.op.command,
4479 additional_text), errors.ECODE_INVAL)
4481 self.op.node_names = self.cfg.GetNodeList()
4482 if self.op.command in self._SKIP_MASTER:
4483 self.op.node_names.remove(self.master_node)
4485 if self.op.command in self._SKIP_MASTER:
4486 assert self.master_node not in self.op.node_names
4488 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4490 raise errors.OpPrereqError("Node %s not found" % node_name,
4493 self.nodes.append(node)
4495 if (not self.op.ignore_status and
4496 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4497 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4498 " not marked offline") % node_name,
4501 def Exec(self, feedback_fn):
4502 """Execute OOB and return result if we expect any.
4505 master_node = self.master_node
4508 for idx, node in enumerate(utils.NiceSort(self.nodes,
4509 key=lambda node: node.name)):
4510 node_entry = [(constants.RS_NORMAL, node.name)]
4511 ret.append(node_entry)
4513 oob_program = _SupportsOob(self.cfg, node)
4516 node_entry.append((constants.RS_UNAVAIL, None))
4519 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4520 self.op.command, oob_program, node.name)
4521 result = self.rpc.call_run_oob(master_node, oob_program,
4522 self.op.command, node.name,
4526 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4527 node.name, result.fail_msg)
4528 node_entry.append((constants.RS_NODATA, None))
4531 self._CheckPayload(result)
4532 except errors.OpExecError, err:
4533 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4535 node_entry.append((constants.RS_NODATA, None))
4537 if self.op.command == constants.OOB_HEALTH:
4538 # For health we should log important events
4539 for item, status in result.payload:
4540 if status in [constants.OOB_STATUS_WARNING,
4541 constants.OOB_STATUS_CRITICAL]:
4542 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4543 item, node.name, status)
4545 if self.op.command == constants.OOB_POWER_ON:
4547 elif self.op.command == constants.OOB_POWER_OFF:
4548 node.powered = False
4549 elif self.op.command == constants.OOB_POWER_STATUS:
4550 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4551 if powered != node.powered:
4552 logging.warning(("Recorded power state (%s) of node '%s' does not"
4553 " match actual power state (%s)"), node.powered,
4556 # For configuration changing commands we should update the node
4557 if self.op.command in (constants.OOB_POWER_ON,
4558 constants.OOB_POWER_OFF):
4559 self.cfg.Update(node, feedback_fn)
4561 node_entry.append((constants.RS_NORMAL, result.payload))
4563 if (self.op.command == constants.OOB_POWER_ON and
4564 idx < len(self.nodes) - 1):
4565 time.sleep(self.op.power_delay)
4569 def _CheckPayload(self, result):
4570 """Checks if the payload is valid.
4572 @param result: RPC result
4573 @raises errors.OpExecError: If payload is not valid
4577 if self.op.command == constants.OOB_HEALTH:
4578 if not isinstance(result.payload, list):
4579 errs.append("command 'health' is expected to return a list but got %s" %
4580 type(result.payload))
4582 for item, status in result.payload:
4583 if status not in constants.OOB_STATUSES:
4584 errs.append("health item '%s' has invalid status '%s'" %
4587 if self.op.command == constants.OOB_POWER_STATUS:
4588 if not isinstance(result.payload, dict):
4589 errs.append("power-status is expected to return a dict but got %s" %
4590 type(result.payload))
4592 if self.op.command in [
4593 constants.OOB_POWER_ON,
4594 constants.OOB_POWER_OFF,
4595 constants.OOB_POWER_CYCLE,
4597 if result.payload is not None:
4598 errs.append("%s is expected to not return payload but got '%s'" %
4599 (self.op.command, result.payload))
4602 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4603 utils.CommaJoin(errs))
4606 class _OsQuery(_QueryBase):
4607 FIELDS = query.OS_FIELDS
4609 def ExpandNames(self, lu):
4610 # Lock all nodes in shared mode
4611 # Temporary removal of locks, should be reverted later
4612 # TODO: reintroduce locks when they are lighter-weight
4613 lu.needed_locks = {}
4614 #self.share_locks[locking.LEVEL_NODE] = 1
4615 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4617 # The following variables interact with _QueryBase._GetNames
4619 self.wanted = self.names
4621 self.wanted = locking.ALL_SET
4623 self.do_locking = self.use_locking
4625 def DeclareLocks(self, lu, level):
4629 def _DiagnoseByOS(rlist):
4630 """Remaps a per-node return list into an a per-os per-node dictionary
4632 @param rlist: a map with node names as keys and OS objects as values
4635 @return: a dictionary with osnames as keys and as value another
4636 map, with nodes as keys and tuples of (path, status, diagnose,
4637 variants, parameters, api_versions) as values, eg::
4639 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4640 (/srv/..., False, "invalid api")],
4641 "node2": [(/srv/..., True, "", [], [])]}
4646 # we build here the list of nodes that didn't fail the RPC (at RPC
4647 # level), so that nodes with a non-responding node daemon don't
4648 # make all OSes invalid
4649 good_nodes = [node_name for node_name in rlist
4650 if not rlist[node_name].fail_msg]
4651 for node_name, nr in rlist.items():
4652 if nr.fail_msg or not nr.payload:
4654 for (name, path, status, diagnose, variants,
4655 params, api_versions) in nr.payload:
4656 if name not in all_os:
4657 # build a list of nodes for this os containing empty lists
4658 # for each node in node_list
4660 for nname in good_nodes:
4661 all_os[name][nname] = []
4662 # convert params from [name, help] to (name, help)
4663 params = [tuple(v) for v in params]
4664 all_os[name][node_name].append((path, status, diagnose,
4665 variants, params, api_versions))
4668 def _GetQueryData(self, lu):
4669 """Computes the list of nodes and their attributes.
4672 # Locking is not used
4673 assert not (compat.any(lu.glm.is_owned(level)
4674 for level in locking.LEVELS
4675 if level != locking.LEVEL_CLUSTER) or
4676 self.do_locking or self.use_locking)
4678 valid_nodes = [node.name
4679 for node in lu.cfg.GetAllNodesInfo().values()
4680 if not node.offline and node.vm_capable]
4681 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4682 cluster = lu.cfg.GetClusterInfo()
4686 for (os_name, os_data) in pol.items():
4687 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4688 hidden=(os_name in cluster.hidden_os),
4689 blacklisted=(os_name in cluster.blacklisted_os))
4693 api_versions = set()
4695 for idx, osl in enumerate(os_data.values()):
4696 info.valid = bool(info.valid and osl and osl[0][1])
4700 (node_variants, node_params, node_api) = osl[0][3:6]
4703 variants.update(node_variants)
4704 parameters.update(node_params)
4705 api_versions.update(node_api)
4707 # Filter out inconsistent values
4708 variants.intersection_update(node_variants)
4709 parameters.intersection_update(node_params)
4710 api_versions.intersection_update(node_api)
4712 info.variants = list(variants)
4713 info.parameters = list(parameters)
4714 info.api_versions = list(api_versions)
4716 data[os_name] = info
4718 # Prepare data in requested order
4719 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4723 class LUOsDiagnose(NoHooksLU):
4724 """Logical unit for OS diagnose/query.
4730 def _BuildFilter(fields, names):
4731 """Builds a filter for querying OSes.
4734 name_filter = qlang.MakeSimpleFilter("name", names)
4736 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4737 # respective field is not requested
4738 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4739 for fname in ["hidden", "blacklisted"]
4740 if fname not in fields]
4741 if "valid" not in fields:
4742 status_filter.append([qlang.OP_TRUE, "valid"])
4745 status_filter.insert(0, qlang.OP_AND)
4747 status_filter = None
4749 if name_filter and status_filter:
4750 return [qlang.OP_AND, name_filter, status_filter]
4754 return status_filter
4756 def CheckArguments(self):
4757 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4758 self.op.output_fields, False)
4760 def ExpandNames(self):
4761 self.oq.ExpandNames(self)
4763 def Exec(self, feedback_fn):
4764 return self.oq.OldStyleQuery(self)
4767 class LUNodeRemove(LogicalUnit):
4768 """Logical unit for removing a node.
4771 HPATH = "node-remove"
4772 HTYPE = constants.HTYPE_NODE
4774 def BuildHooksEnv(self):
4777 This doesn't run on the target node in the pre phase as a failed
4778 node would then be impossible to remove.
4782 "OP_TARGET": self.op.node_name,
4783 "NODE_NAME": self.op.node_name,
4786 def BuildHooksNodes(self):
4787 """Build hooks nodes.
4790 all_nodes = self.cfg.GetNodeList()
4792 all_nodes.remove(self.op.node_name)
4794 logging.warning("Node '%s', which is about to be removed, was not found"
4795 " in the list of all nodes", self.op.node_name)
4796 return (all_nodes, all_nodes)
4798 def CheckPrereq(self):
4799 """Check prerequisites.
4802 - the node exists in the configuration
4803 - it does not have primary or secondary instances
4804 - it's not the master
4806 Any errors are signaled by raising errors.OpPrereqError.
4809 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4810 node = self.cfg.GetNodeInfo(self.op.node_name)
4811 assert node is not None
4813 masternode = self.cfg.GetMasterNode()
4814 if node.name == masternode:
4815 raise errors.OpPrereqError("Node is the master node, failover to another"
4816 " node is required", errors.ECODE_INVAL)
4818 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4819 if node.name in instance.all_nodes:
4820 raise errors.OpPrereqError("Instance %s is still running on the node,"
4821 " please remove first" % instance_name,
4823 self.op.node_name = node.name
4826 def Exec(self, feedback_fn):
4827 """Removes the node from the cluster.
4831 logging.info("Stopping the node daemon and removing configs from node %s",
4834 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4836 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4839 # Promote nodes to master candidate as needed
4840 _AdjustCandidatePool(self, exceptions=[node.name])
4841 self.context.RemoveNode(node.name)
4843 # Run post hooks on the node before it's removed
4844 _RunPostHook(self, node.name)
4846 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4847 msg = result.fail_msg
4849 self.LogWarning("Errors encountered on the remote node while leaving"
4850 " the cluster: %s", msg)
4852 # Remove node from our /etc/hosts
4853 if self.cfg.GetClusterInfo().modify_etc_hosts:
4854 master_node = self.cfg.GetMasterNode()
4855 result = self.rpc.call_etc_hosts_modify(master_node,
4856 constants.ETC_HOSTS_REMOVE,
4858 result.Raise("Can't update hosts file with new host data")
4859 _RedistributeAncillaryFiles(self)
4862 class _NodeQuery(_QueryBase):
4863 FIELDS = query.NODE_FIELDS
4865 def ExpandNames(self, lu):
4866 lu.needed_locks = {}
4867 lu.share_locks = _ShareAll()
4870 self.wanted = _GetWantedNodes(lu, self.names)
4872 self.wanted = locking.ALL_SET
4874 self.do_locking = (self.use_locking and
4875 query.NQ_LIVE in self.requested_data)
4878 # If any non-static field is requested we need to lock the nodes
4879 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4881 def DeclareLocks(self, lu, level):
4884 def _GetQueryData(self, lu):
4885 """Computes the list of nodes and their attributes.
4888 all_info = lu.cfg.GetAllNodesInfo()
4890 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4892 # Gather data as requested
4893 if query.NQ_LIVE in self.requested_data:
4894 # filter out non-vm_capable nodes
4895 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4897 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4898 [lu.cfg.GetHypervisorType()])
4899 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4900 for (name, nresult) in node_data.items()
4901 if not nresult.fail_msg and nresult.payload)
4905 if query.NQ_INST in self.requested_data:
4906 node_to_primary = dict([(name, set()) for name in nodenames])
4907 node_to_secondary = dict([(name, set()) for name in nodenames])
4909 inst_data = lu.cfg.GetAllInstancesInfo()
4911 for inst in inst_data.values():
4912 if inst.primary_node in node_to_primary:
4913 node_to_primary[inst.primary_node].add(inst.name)
4914 for secnode in inst.secondary_nodes:
4915 if secnode in node_to_secondary:
4916 node_to_secondary[secnode].add(inst.name)
4918 node_to_primary = None
4919 node_to_secondary = None
4921 if query.NQ_OOB in self.requested_data:
4922 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4923 for name, node in all_info.iteritems())
4927 if query.NQ_GROUP in self.requested_data:
4928 groups = lu.cfg.GetAllNodeGroupsInfo()
4932 return query.NodeQueryData([all_info[name] for name in nodenames],
4933 live_data, lu.cfg.GetMasterNode(),
4934 node_to_primary, node_to_secondary, groups,
4935 oob_support, lu.cfg.GetClusterInfo())
4938 class LUNodeQuery(NoHooksLU):
4939 """Logical unit for querying nodes.
4942 # pylint: disable=W0142
4945 def CheckArguments(self):
4946 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4947 self.op.output_fields, self.op.use_locking)
4949 def ExpandNames(self):
4950 self.nq.ExpandNames(self)
4952 def DeclareLocks(self, level):
4953 self.nq.DeclareLocks(self, level)
4955 def Exec(self, feedback_fn):
4956 return self.nq.OldStyleQuery(self)
4959 class LUNodeQueryvols(NoHooksLU):
4960 """Logical unit for getting volumes on node(s).
4964 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4965 _FIELDS_STATIC = utils.FieldSet("node")
4967 def CheckArguments(self):
4968 _CheckOutputFields(static=self._FIELDS_STATIC,
4969 dynamic=self._FIELDS_DYNAMIC,
4970 selected=self.op.output_fields)
4972 def ExpandNames(self):
4973 self.share_locks = _ShareAll()
4974 self.needed_locks = {}
4976 if not self.op.nodes:
4977 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4979 self.needed_locks[locking.LEVEL_NODE] = \
4980 _GetWantedNodes(self, self.op.nodes)
4982 def Exec(self, feedback_fn):
4983 """Computes the list of nodes and their attributes.
4986 nodenames = self.owned_locks(locking.LEVEL_NODE)
4987 volumes = self.rpc.call_node_volumes(nodenames)
4989 ilist = self.cfg.GetAllInstancesInfo()
4990 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4993 for node in nodenames:
4994 nresult = volumes[node]
4997 msg = nresult.fail_msg
4999 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5002 node_vols = sorted(nresult.payload,
5003 key=operator.itemgetter("dev"))
5005 for vol in node_vols:
5007 for field in self.op.output_fields:
5010 elif field == "phys":
5014 elif field == "name":
5016 elif field == "size":
5017 val = int(float(vol["size"]))
5018 elif field == "instance":
5019 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5021 raise errors.ParameterError(field)
5022 node_output.append(str(val))
5024 output.append(node_output)
5029 class LUNodeQueryStorage(NoHooksLU):
5030 """Logical unit for getting information on storage units on node(s).
5033 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5036 def CheckArguments(self):
5037 _CheckOutputFields(static=self._FIELDS_STATIC,
5038 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5039 selected=self.op.output_fields)
5041 def ExpandNames(self):
5042 self.share_locks = _ShareAll()
5043 self.needed_locks = {}
5046 self.needed_locks[locking.LEVEL_NODE] = \
5047 _GetWantedNodes(self, self.op.nodes)
5049 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5051 def Exec(self, feedback_fn):
5052 """Computes the list of nodes and their attributes.
5055 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5057 # Always get name to sort by
5058 if constants.SF_NAME in self.op.output_fields:
5059 fields = self.op.output_fields[:]
5061 fields = [constants.SF_NAME] + self.op.output_fields
5063 # Never ask for node or type as it's only known to the LU
5064 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5065 while extra in fields:
5066 fields.remove(extra)
5068 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5069 name_idx = field_idx[constants.SF_NAME]
5071 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5072 data = self.rpc.call_storage_list(self.nodes,
5073 self.op.storage_type, st_args,
5074 self.op.name, fields)
5078 for node in utils.NiceSort(self.nodes):
5079 nresult = data[node]
5083 msg = nresult.fail_msg
5085 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5088 rows = dict([(row[name_idx], row) for row in nresult.payload])
5090 for name in utils.NiceSort(rows.keys()):
5095 for field in self.op.output_fields:
5096 if field == constants.SF_NODE:
5098 elif field == constants.SF_TYPE:
5099 val = self.op.storage_type
5100 elif field in field_idx:
5101 val = row[field_idx[field]]
5103 raise errors.ParameterError(field)
5112 class _InstanceQuery(_QueryBase):
5113 FIELDS = query.INSTANCE_FIELDS
5115 def ExpandNames(self, lu):
5116 lu.needed_locks = {}
5117 lu.share_locks = _ShareAll()
5120 self.wanted = _GetWantedInstances(lu, self.names)
5122 self.wanted = locking.ALL_SET
5124 self.do_locking = (self.use_locking and
5125 query.IQ_LIVE in self.requested_data)
5127 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5128 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5129 lu.needed_locks[locking.LEVEL_NODE] = []
5130 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5132 self.do_grouplocks = (self.do_locking and
5133 query.IQ_NODES in self.requested_data)
5135 def DeclareLocks(self, lu, level):
5137 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5138 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5140 # Lock all groups used by instances optimistically; this requires going
5141 # via the node before it's locked, requiring verification later on
5142 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5144 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5145 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5146 elif level == locking.LEVEL_NODE:
5147 lu._LockInstancesNodes() # pylint: disable=W0212
5150 def _CheckGroupLocks(lu):
5151 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5152 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5154 # Check if node groups for locked instances are still correct
5155 for instance_name in owned_instances:
5156 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5158 def _GetQueryData(self, lu):
5159 """Computes the list of instances and their attributes.
5162 if self.do_grouplocks:
5163 self._CheckGroupLocks(lu)
5165 cluster = lu.cfg.GetClusterInfo()
5166 all_info = lu.cfg.GetAllInstancesInfo()
5168 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5170 instance_list = [all_info[name] for name in instance_names]
5171 nodes = frozenset(itertools.chain(*(inst.all_nodes
5172 for inst in instance_list)))
5173 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5176 wrongnode_inst = set()
5178 # Gather data as requested
5179 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5181 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5183 result = node_data[name]
5185 # offline nodes will be in both lists
5186 assert result.fail_msg
5187 offline_nodes.append(name)
5189 bad_nodes.append(name)
5190 elif result.payload:
5191 for inst in result.payload:
5192 if inst in all_info:
5193 if all_info[inst].primary_node == name:
5194 live_data.update(result.payload)
5196 wrongnode_inst.add(inst)
5198 # orphan instance; we don't list it here as we don't
5199 # handle this case yet in the output of instance listing
5200 logging.warning("Orphan instance '%s' found on node %s",
5202 # else no instance is alive
5206 if query.IQ_DISKUSAGE in self.requested_data:
5207 disk_usage = dict((inst.name,
5208 _ComputeDiskSize(inst.disk_template,
5209 [{constants.IDISK_SIZE: disk.size}
5210 for disk in inst.disks]))
5211 for inst in instance_list)
5215 if query.IQ_CONSOLE in self.requested_data:
5217 for inst in instance_list:
5218 if inst.name in live_data:
5219 # Instance is running
5220 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5222 consinfo[inst.name] = None
5223 assert set(consinfo.keys()) == set(instance_names)
5227 if query.IQ_NODES in self.requested_data:
5228 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5230 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5231 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5232 for uuid in set(map(operator.attrgetter("group"),
5238 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5239 disk_usage, offline_nodes, bad_nodes,
5240 live_data, wrongnode_inst, consinfo,
5244 class LUQuery(NoHooksLU):
5245 """Query for resources/items of a certain kind.
5248 # pylint: disable=W0142
5251 def CheckArguments(self):
5252 qcls = _GetQueryImplementation(self.op.what)
5254 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5256 def ExpandNames(self):
5257 self.impl.ExpandNames(self)
5259 def DeclareLocks(self, level):
5260 self.impl.DeclareLocks(self, level)
5262 def Exec(self, feedback_fn):
5263 return self.impl.NewStyleQuery(self)
5266 class LUQueryFields(NoHooksLU):
5267 """Query for resources/items of a certain kind.
5270 # pylint: disable=W0142
5273 def CheckArguments(self):
5274 self.qcls = _GetQueryImplementation(self.op.what)
5276 def ExpandNames(self):
5277 self.needed_locks = {}
5279 def Exec(self, feedback_fn):
5280 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5283 class LUNodeModifyStorage(NoHooksLU):
5284 """Logical unit for modifying a storage volume on a node.
5289 def CheckArguments(self):
5290 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5292 storage_type = self.op.storage_type
5295 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5297 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5298 " modified" % storage_type,
5301 diff = set(self.op.changes.keys()) - modifiable
5303 raise errors.OpPrereqError("The following fields can not be modified for"
5304 " storage units of type '%s': %r" %
5305 (storage_type, list(diff)),
5308 def ExpandNames(self):
5309 self.needed_locks = {
5310 locking.LEVEL_NODE: self.op.node_name,
5313 def Exec(self, feedback_fn):
5314 """Computes the list of nodes and their attributes.
5317 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5318 result = self.rpc.call_storage_modify(self.op.node_name,
5319 self.op.storage_type, st_args,
5320 self.op.name, self.op.changes)
5321 result.Raise("Failed to modify storage unit '%s' on %s" %
5322 (self.op.name, self.op.node_name))
5325 class LUNodeAdd(LogicalUnit):
5326 """Logical unit for adding node to the cluster.
5330 HTYPE = constants.HTYPE_NODE
5331 _NFLAGS = ["master_capable", "vm_capable"]
5333 def CheckArguments(self):
5334 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5335 # validate/normalize the node name
5336 self.hostname = netutils.GetHostname(name=self.op.node_name,
5337 family=self.primary_ip_family)
5338 self.op.node_name = self.hostname.name
5340 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5341 raise errors.OpPrereqError("Cannot readd the master node",
5344 if self.op.readd and self.op.group:
5345 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5346 " being readded", errors.ECODE_INVAL)
5348 def BuildHooksEnv(self):
5351 This will run on all nodes before, and on all nodes + the new node after.
5355 "OP_TARGET": self.op.node_name,
5356 "NODE_NAME": self.op.node_name,
5357 "NODE_PIP": self.op.primary_ip,
5358 "NODE_SIP": self.op.secondary_ip,
5359 "MASTER_CAPABLE": str(self.op.master_capable),
5360 "VM_CAPABLE": str(self.op.vm_capable),
5363 def BuildHooksNodes(self):
5364 """Build hooks nodes.
5367 # Exclude added node
5368 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5369 post_nodes = pre_nodes + [self.op.node_name, ]
5371 return (pre_nodes, post_nodes)
5373 def CheckPrereq(self):
5374 """Check prerequisites.
5377 - the new node is not already in the config
5379 - its parameters (single/dual homed) matches the cluster
5381 Any errors are signaled by raising errors.OpPrereqError.
5385 hostname = self.hostname
5386 node = hostname.name
5387 primary_ip = self.op.primary_ip = hostname.ip
5388 if self.op.secondary_ip is None:
5389 if self.primary_ip_family == netutils.IP6Address.family:
5390 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5391 " IPv4 address must be given as secondary",
5393 self.op.secondary_ip = primary_ip
5395 secondary_ip = self.op.secondary_ip
5396 if not netutils.IP4Address.IsValid(secondary_ip):
5397 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5398 " address" % secondary_ip, errors.ECODE_INVAL)
5400 node_list = cfg.GetNodeList()
5401 if not self.op.readd and node in node_list:
5402 raise errors.OpPrereqError("Node %s is already in the configuration" %
5403 node, errors.ECODE_EXISTS)
5404 elif self.op.readd and node not in node_list:
5405 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5408 self.changed_primary_ip = False
5410 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5411 if self.op.readd and node == existing_node_name:
5412 if existing_node.secondary_ip != secondary_ip:
5413 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5414 " address configuration as before",
5416 if existing_node.primary_ip != primary_ip:
5417 self.changed_primary_ip = True
5421 if (existing_node.primary_ip == primary_ip or
5422 existing_node.secondary_ip == primary_ip or
5423 existing_node.primary_ip == secondary_ip or
5424 existing_node.secondary_ip == secondary_ip):
5425 raise errors.OpPrereqError("New node ip address(es) conflict with"
5426 " existing node %s" % existing_node.name,
5427 errors.ECODE_NOTUNIQUE)
5429 # After this 'if' block, None is no longer a valid value for the
5430 # _capable op attributes
5432 old_node = self.cfg.GetNodeInfo(node)
5433 assert old_node is not None, "Can't retrieve locked node %s" % node
5434 for attr in self._NFLAGS:
5435 if getattr(self.op, attr) is None:
5436 setattr(self.op, attr, getattr(old_node, attr))
5438 for attr in self._NFLAGS:
5439 if getattr(self.op, attr) is None:
5440 setattr(self.op, attr, True)
5442 if self.op.readd and not self.op.vm_capable:
5443 pri, sec = cfg.GetNodeInstances(node)
5445 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5446 " flag set to false, but it already holds"
5447 " instances" % node,
5450 # check that the type of the node (single versus dual homed) is the
5451 # same as for the master
5452 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5453 master_singlehomed = myself.secondary_ip == myself.primary_ip
5454 newbie_singlehomed = secondary_ip == primary_ip
5455 if master_singlehomed != newbie_singlehomed:
5456 if master_singlehomed:
5457 raise errors.OpPrereqError("The master has no secondary ip but the"
5458 " new node has one",
5461 raise errors.OpPrereqError("The master has a secondary ip but the"
5462 " new node doesn't have one",
5465 # checks reachability
5466 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5467 raise errors.OpPrereqError("Node not reachable by ping",
5468 errors.ECODE_ENVIRON)
5470 if not newbie_singlehomed:
5471 # check reachability from my secondary ip to newbie's secondary ip
5472 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5473 source=myself.secondary_ip):
5474 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5475 " based ping to node daemon port",
5476 errors.ECODE_ENVIRON)
5483 if self.op.master_capable:
5484 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5486 self.master_candidate = False
5489 self.new_node = old_node
5491 node_group = cfg.LookupNodeGroup(self.op.group)
5492 self.new_node = objects.Node(name=node,
5493 primary_ip=primary_ip,
5494 secondary_ip=secondary_ip,
5495 master_candidate=self.master_candidate,
5496 offline=False, drained=False,
5499 if self.op.ndparams:
5500 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5502 if self.op.hv_state:
5503 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5505 if self.op.disk_state:
5506 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5508 def Exec(self, feedback_fn):
5509 """Adds the new node to the cluster.
5512 new_node = self.new_node
5513 node = new_node.name
5515 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5518 # We adding a new node so we assume it's powered
5519 new_node.powered = True
5521 # for re-adds, reset the offline/drained/master-candidate flags;
5522 # we need to reset here, otherwise offline would prevent RPC calls
5523 # later in the procedure; this also means that if the re-add
5524 # fails, we are left with a non-offlined, broken node
5526 new_node.drained = new_node.offline = False # pylint: disable=W0201
5527 self.LogInfo("Readding a node, the offline/drained flags were reset")
5528 # if we demote the node, we do cleanup later in the procedure
5529 new_node.master_candidate = self.master_candidate
5530 if self.changed_primary_ip:
5531 new_node.primary_ip = self.op.primary_ip
5533 # copy the master/vm_capable flags
5534 for attr in self._NFLAGS:
5535 setattr(new_node, attr, getattr(self.op, attr))
5537 # notify the user about any possible mc promotion
5538 if new_node.master_candidate:
5539 self.LogInfo("Node will be a master candidate")
5541 if self.op.ndparams:
5542 new_node.ndparams = self.op.ndparams
5544 new_node.ndparams = {}
5546 if self.op.hv_state:
5547 new_node.hv_state_static = self.new_hv_state
5549 if self.op.disk_state:
5550 new_node.disk_state_static = self.new_disk_state
5552 # check connectivity
5553 result = self.rpc.call_version([node])[node]
5554 result.Raise("Can't get version information from node %s" % node)
5555 if constants.PROTOCOL_VERSION == result.payload:
5556 logging.info("Communication to node %s fine, sw version %s match",
5557 node, result.payload)
5559 raise errors.OpExecError("Version mismatch master version %s,"
5560 " node version %s" %
5561 (constants.PROTOCOL_VERSION, result.payload))
5563 # Add node to our /etc/hosts, and add key to known_hosts
5564 if self.cfg.GetClusterInfo().modify_etc_hosts:
5565 master_node = self.cfg.GetMasterNode()
5566 result = self.rpc.call_etc_hosts_modify(master_node,
5567 constants.ETC_HOSTS_ADD,
5570 result.Raise("Can't update hosts file with new host data")
5572 if new_node.secondary_ip != new_node.primary_ip:
5573 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5576 node_verify_list = [self.cfg.GetMasterNode()]
5577 node_verify_param = {
5578 constants.NV_NODELIST: ([node], {}),
5579 # TODO: do a node-net-test as well?
5582 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5583 self.cfg.GetClusterName())
5584 for verifier in node_verify_list:
5585 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5586 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5588 for failed in nl_payload:
5589 feedback_fn("ssh/hostname verification failed"
5590 " (checking from %s): %s" %
5591 (verifier, nl_payload[failed]))
5592 raise errors.OpExecError("ssh/hostname verification failed")
5595 _RedistributeAncillaryFiles(self)
5596 self.context.ReaddNode(new_node)
5597 # make sure we redistribute the config
5598 self.cfg.Update(new_node, feedback_fn)
5599 # and make sure the new node will not have old files around
5600 if not new_node.master_candidate:
5601 result = self.rpc.call_node_demote_from_mc(new_node.name)
5602 msg = result.fail_msg
5604 self.LogWarning("Node failed to demote itself from master"
5605 " candidate status: %s" % msg)
5607 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5608 additional_vm=self.op.vm_capable)
5609 self.context.AddNode(new_node, self.proc.GetECId())
5612 class LUNodeSetParams(LogicalUnit):
5613 """Modifies the parameters of a node.
5615 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5616 to the node role (as _ROLE_*)
5617 @cvar _R2F: a dictionary from node role to tuples of flags
5618 @cvar _FLAGS: a list of attribute names corresponding to the flags
5621 HPATH = "node-modify"
5622 HTYPE = constants.HTYPE_NODE
5624 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5626 (True, False, False): _ROLE_CANDIDATE,
5627 (False, True, False): _ROLE_DRAINED,
5628 (False, False, True): _ROLE_OFFLINE,
5629 (False, False, False): _ROLE_REGULAR,
5631 _R2F = dict((v, k) for k, v in _F2R.items())
5632 _FLAGS = ["master_candidate", "drained", "offline"]
5634 def CheckArguments(self):
5635 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5636 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5637 self.op.master_capable, self.op.vm_capable,
5638 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5640 if all_mods.count(None) == len(all_mods):
5641 raise errors.OpPrereqError("Please pass at least one modification",
5643 if all_mods.count(True) > 1:
5644 raise errors.OpPrereqError("Can't set the node into more than one"
5645 " state at the same time",
5648 # Boolean value that tells us whether we might be demoting from MC
5649 self.might_demote = (self.op.master_candidate == False or
5650 self.op.offline == True or
5651 self.op.drained == True or
5652 self.op.master_capable == False)
5654 if self.op.secondary_ip:
5655 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5656 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5657 " address" % self.op.secondary_ip,
5660 self.lock_all = self.op.auto_promote and self.might_demote
5661 self.lock_instances = self.op.secondary_ip is not None
5663 def _InstanceFilter(self, instance):
5664 """Filter for getting affected instances.
5667 return (instance.disk_template in constants.DTS_INT_MIRROR and
5668 self.op.node_name in instance.all_nodes)
5670 def ExpandNames(self):
5672 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5674 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5676 # Since modifying a node can have severe effects on currently running
5677 # operations the resource lock is at least acquired in shared mode
5678 self.needed_locks[locking.LEVEL_NODE_RES] = \
5679 self.needed_locks[locking.LEVEL_NODE]
5681 # Get node resource and instance locks in shared mode; they are not used
5682 # for anything but read-only access
5683 self.share_locks[locking.LEVEL_NODE_RES] = 1
5684 self.share_locks[locking.LEVEL_INSTANCE] = 1
5686 if self.lock_instances:
5687 self.needed_locks[locking.LEVEL_INSTANCE] = \
5688 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5690 def BuildHooksEnv(self):
5693 This runs on the master node.
5697 "OP_TARGET": self.op.node_name,
5698 "MASTER_CANDIDATE": str(self.op.master_candidate),
5699 "OFFLINE": str(self.op.offline),
5700 "DRAINED": str(self.op.drained),
5701 "MASTER_CAPABLE": str(self.op.master_capable),
5702 "VM_CAPABLE": str(self.op.vm_capable),
5705 def BuildHooksNodes(self):
5706 """Build hooks nodes.
5709 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5712 def CheckPrereq(self):
5713 """Check prerequisites.
5715 This only checks the instance list against the existing names.
5718 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5720 if self.lock_instances:
5721 affected_instances = \
5722 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5724 # Verify instance locks
5725 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5726 wanted_instances = frozenset(affected_instances.keys())
5727 if wanted_instances - owned_instances:
5728 raise errors.OpPrereqError("Instances affected by changing node %s's"
5729 " secondary IP address have changed since"
5730 " locks were acquired, wanted '%s', have"
5731 " '%s'; retry the operation" %
5733 utils.CommaJoin(wanted_instances),
5734 utils.CommaJoin(owned_instances)),
5737 affected_instances = None
5739 if (self.op.master_candidate is not None or
5740 self.op.drained is not None or
5741 self.op.offline is not None):
5742 # we can't change the master's node flags
5743 if self.op.node_name == self.cfg.GetMasterNode():
5744 raise errors.OpPrereqError("The master role can be changed"
5745 " only via master-failover",
5748 if self.op.master_candidate and not node.master_capable:
5749 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5750 " it a master candidate" % node.name,
5753 if self.op.vm_capable == False:
5754 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5756 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5757 " the vm_capable flag" % node.name,
5760 if node.master_candidate and self.might_demote and not self.lock_all:
5761 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5762 # check if after removing the current node, we're missing master
5764 (mc_remaining, mc_should, _) = \
5765 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5766 if mc_remaining < mc_should:
5767 raise errors.OpPrereqError("Not enough master candidates, please"
5768 " pass auto promote option to allow"
5769 " promotion", errors.ECODE_STATE)
5771 self.old_flags = old_flags = (node.master_candidate,
5772 node.drained, node.offline)
5773 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5774 self.old_role = old_role = self._F2R[old_flags]
5776 # Check for ineffective changes
5777 for attr in self._FLAGS:
5778 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5779 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5780 setattr(self.op, attr, None)
5782 # Past this point, any flag change to False means a transition
5783 # away from the respective state, as only real changes are kept
5785 # TODO: We might query the real power state if it supports OOB
5786 if _SupportsOob(self.cfg, node):
5787 if self.op.offline is False and not (node.powered or
5788 self.op.powered == True):
5789 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5790 " offline status can be reset") %
5792 elif self.op.powered is not None:
5793 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5794 " as it does not support out-of-band"
5795 " handling") % self.op.node_name)
5797 # If we're being deofflined/drained, we'll MC ourself if needed
5798 if (self.op.drained == False or self.op.offline == False or
5799 (self.op.master_capable and not node.master_capable)):
5800 if _DecideSelfPromotion(self):
5801 self.op.master_candidate = True
5802 self.LogInfo("Auto-promoting node to master candidate")
5804 # If we're no longer master capable, we'll demote ourselves from MC
5805 if self.op.master_capable == False and node.master_candidate:
5806 self.LogInfo("Demoting from master candidate")
5807 self.op.master_candidate = False
5810 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5811 if self.op.master_candidate:
5812 new_role = self._ROLE_CANDIDATE
5813 elif self.op.drained:
5814 new_role = self._ROLE_DRAINED
5815 elif self.op.offline:
5816 new_role = self._ROLE_OFFLINE
5817 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5818 # False is still in new flags, which means we're un-setting (the
5820 new_role = self._ROLE_REGULAR
5821 else: # no new flags, nothing, keep old role
5824 self.new_role = new_role
5826 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5827 # Trying to transition out of offline status
5828 # TODO: Use standard RPC runner, but make sure it works when the node is
5829 # still marked offline
5830 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5832 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5833 " to report its version: %s" %
5834 (node.name, result.fail_msg),
5837 self.LogWarning("Transitioning node from offline to online state"
5838 " without using re-add. Please make sure the node"
5841 if self.op.secondary_ip:
5842 # Ok even without locking, because this can't be changed by any LU
5843 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5844 master_singlehomed = master.secondary_ip == master.primary_ip
5845 if master_singlehomed and self.op.secondary_ip:
5846 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5847 " homed cluster", errors.ECODE_INVAL)
5849 assert not (frozenset(affected_instances) -
5850 self.owned_locks(locking.LEVEL_INSTANCE))
5853 if affected_instances:
5854 raise errors.OpPrereqError("Cannot change secondary IP address:"
5855 " offline node has instances (%s)"
5856 " configured to use it" %
5857 utils.CommaJoin(affected_instances.keys()))
5859 # On online nodes, check that no instances are running, and that
5860 # the node has the new ip and we can reach it.
5861 for instance in affected_instances.values():
5862 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5863 msg="cannot change secondary ip")
5865 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5866 if master.name != node.name:
5867 # check reachability from master secondary ip to new secondary ip
5868 if not netutils.TcpPing(self.op.secondary_ip,
5869 constants.DEFAULT_NODED_PORT,
5870 source=master.secondary_ip):
5871 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5872 " based ping to node daemon port",
5873 errors.ECODE_ENVIRON)
5875 if self.op.ndparams:
5876 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5877 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5878 self.new_ndparams = new_ndparams
5880 if self.op.hv_state:
5881 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5882 self.node.hv_state_static)
5884 if self.op.disk_state:
5885 self.new_disk_state = \
5886 _MergeAndVerifyDiskState(self.op.disk_state,
5887 self.node.disk_state_static)
5889 def Exec(self, feedback_fn):
5894 old_role = self.old_role
5895 new_role = self.new_role
5899 if self.op.ndparams:
5900 node.ndparams = self.new_ndparams
5902 if self.op.powered is not None:
5903 node.powered = self.op.powered
5905 if self.op.hv_state:
5906 node.hv_state_static = self.new_hv_state
5908 if self.op.disk_state:
5909 node.disk_state_static = self.new_disk_state
5911 for attr in ["master_capable", "vm_capable"]:
5912 val = getattr(self.op, attr)
5914 setattr(node, attr, val)
5915 result.append((attr, str(val)))
5917 if new_role != old_role:
5918 # Tell the node to demote itself, if no longer MC and not offline
5919 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5920 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5922 self.LogWarning("Node failed to demote itself: %s", msg)
5924 new_flags = self._R2F[new_role]
5925 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5927 result.append((desc, str(nf)))
5928 (node.master_candidate, node.drained, node.offline) = new_flags
5930 # we locked all nodes, we adjust the CP before updating this node
5932 _AdjustCandidatePool(self, [node.name])
5934 if self.op.secondary_ip:
5935 node.secondary_ip = self.op.secondary_ip
5936 result.append(("secondary_ip", self.op.secondary_ip))
5938 # this will trigger configuration file update, if needed
5939 self.cfg.Update(node, feedback_fn)
5941 # this will trigger job queue propagation or cleanup if the mc
5943 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5944 self.context.ReaddNode(node)
5949 class LUNodePowercycle(NoHooksLU):
5950 """Powercycles a node.
5955 def CheckArguments(self):
5956 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5957 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5958 raise errors.OpPrereqError("The node is the master and the force"
5959 " parameter was not set",
5962 def ExpandNames(self):
5963 """Locking for PowercycleNode.
5965 This is a last-resort option and shouldn't block on other
5966 jobs. Therefore, we grab no locks.
5969 self.needed_locks = {}
5971 def Exec(self, feedback_fn):
5975 result = self.rpc.call_node_powercycle(self.op.node_name,
5976 self.cfg.GetHypervisorType())
5977 result.Raise("Failed to schedule the reboot")
5978 return result.payload
5981 class LUClusterQuery(NoHooksLU):
5982 """Query cluster configuration.
5987 def ExpandNames(self):
5988 self.needed_locks = {}
5990 def Exec(self, feedback_fn):
5991 """Return cluster config.
5994 cluster = self.cfg.GetClusterInfo()
5997 # Filter just for enabled hypervisors
5998 for os_name, hv_dict in cluster.os_hvp.items():
5999 os_hvp[os_name] = {}
6000 for hv_name, hv_params in hv_dict.items():
6001 if hv_name in cluster.enabled_hypervisors:
6002 os_hvp[os_name][hv_name] = hv_params
6004 # Convert ip_family to ip_version
6005 primary_ip_version = constants.IP4_VERSION
6006 if cluster.primary_ip_family == netutils.IP6Address.family:
6007 primary_ip_version = constants.IP6_VERSION
6010 "software_version": constants.RELEASE_VERSION,
6011 "protocol_version": constants.PROTOCOL_VERSION,
6012 "config_version": constants.CONFIG_VERSION,
6013 "os_api_version": max(constants.OS_API_VERSIONS),
6014 "export_version": constants.EXPORT_VERSION,
6015 "architecture": (platform.architecture()[0], platform.machine()),
6016 "name": cluster.cluster_name,
6017 "master": cluster.master_node,
6018 "default_hypervisor": cluster.primary_hypervisor,
6019 "enabled_hypervisors": cluster.enabled_hypervisors,
6020 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6021 for hypervisor_name in cluster.enabled_hypervisors]),
6023 "beparams": cluster.beparams,
6024 "osparams": cluster.osparams,
6025 "ipolicy": cluster.ipolicy,
6026 "nicparams": cluster.nicparams,
6027 "ndparams": cluster.ndparams,
6028 "candidate_pool_size": cluster.candidate_pool_size,
6029 "master_netdev": cluster.master_netdev,
6030 "master_netmask": cluster.master_netmask,
6031 "use_external_mip_script": cluster.use_external_mip_script,
6032 "volume_group_name": cluster.volume_group_name,
6033 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6034 "file_storage_dir": cluster.file_storage_dir,
6035 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6036 "maintain_node_health": cluster.maintain_node_health,
6037 "ctime": cluster.ctime,
6038 "mtime": cluster.mtime,
6039 "uuid": cluster.uuid,
6040 "tags": list(cluster.GetTags()),
6041 "uid_pool": cluster.uid_pool,
6042 "default_iallocator": cluster.default_iallocator,
6043 "reserved_lvs": cluster.reserved_lvs,
6044 "primary_ip_version": primary_ip_version,
6045 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6046 "hidden_os": cluster.hidden_os,
6047 "blacklisted_os": cluster.blacklisted_os,
6053 class LUClusterConfigQuery(NoHooksLU):
6054 """Return configuration values.
6058 _FIELDS_DYNAMIC = utils.FieldSet()
6059 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6060 "watcher_pause", "volume_group_name")
6062 def CheckArguments(self):
6063 _CheckOutputFields(static=self._FIELDS_STATIC,
6064 dynamic=self._FIELDS_DYNAMIC,
6065 selected=self.op.output_fields)
6067 def ExpandNames(self):
6068 self.needed_locks = {}
6070 def Exec(self, feedback_fn):
6071 """Dump a representation of the cluster config to the standard output.
6075 for field in self.op.output_fields:
6076 if field == "cluster_name":
6077 entry = self.cfg.GetClusterName()
6078 elif field == "master_node":
6079 entry = self.cfg.GetMasterNode()
6080 elif field == "drain_flag":
6081 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6082 elif field == "watcher_pause":
6083 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6084 elif field == "volume_group_name":
6085 entry = self.cfg.GetVGName()
6087 raise errors.ParameterError(field)
6088 values.append(entry)
6092 class LUInstanceActivateDisks(NoHooksLU):
6093 """Bring up an instance's disks.
6098 def ExpandNames(self):
6099 self._ExpandAndLockInstance()
6100 self.needed_locks[locking.LEVEL_NODE] = []
6101 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6103 def DeclareLocks(self, level):
6104 if level == locking.LEVEL_NODE:
6105 self._LockInstancesNodes()
6107 def CheckPrereq(self):
6108 """Check prerequisites.
6110 This checks that the instance is in the cluster.
6113 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6114 assert self.instance is not None, \
6115 "Cannot retrieve locked instance %s" % self.op.instance_name
6116 _CheckNodeOnline(self, self.instance.primary_node)
6118 def Exec(self, feedback_fn):
6119 """Activate the disks.
6122 disks_ok, disks_info = \
6123 _AssembleInstanceDisks(self, self.instance,
6124 ignore_size=self.op.ignore_size)
6126 raise errors.OpExecError("Cannot activate block devices")
6131 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6133 """Prepare the block devices for an instance.
6135 This sets up the block devices on all nodes.
6137 @type lu: L{LogicalUnit}
6138 @param lu: the logical unit on whose behalf we execute
6139 @type instance: L{objects.Instance}
6140 @param instance: the instance for whose disks we assemble
6141 @type disks: list of L{objects.Disk} or None
6142 @param disks: which disks to assemble (or all, if None)
6143 @type ignore_secondaries: boolean
6144 @param ignore_secondaries: if true, errors on secondary nodes
6145 won't result in an error return from the function
6146 @type ignore_size: boolean
6147 @param ignore_size: if true, the current known size of the disk
6148 will not be used during the disk activation, useful for cases
6149 when the size is wrong
6150 @return: False if the operation failed, otherwise a list of
6151 (host, instance_visible_name, node_visible_name)
6152 with the mapping from node devices to instance devices
6157 iname = instance.name
6158 disks = _ExpandCheckDisks(instance, disks)
6160 # With the two passes mechanism we try to reduce the window of
6161 # opportunity for the race condition of switching DRBD to primary
6162 # before handshaking occured, but we do not eliminate it
6164 # The proper fix would be to wait (with some limits) until the
6165 # connection has been made and drbd transitions from WFConnection
6166 # into any other network-connected state (Connected, SyncTarget,
6169 # 1st pass, assemble on all nodes in secondary mode
6170 for idx, inst_disk in enumerate(disks):
6171 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6173 node_disk = node_disk.Copy()
6174 node_disk.UnsetSize()
6175 lu.cfg.SetDiskID(node_disk, node)
6176 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6177 msg = result.fail_msg
6179 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6180 " (is_primary=False, pass=1): %s",
6181 inst_disk.iv_name, node, msg)
6182 if not ignore_secondaries:
6185 # FIXME: race condition on drbd migration to primary
6187 # 2nd pass, do only the primary node
6188 for idx, inst_disk in enumerate(disks):
6191 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6192 if node != instance.primary_node:
6195 node_disk = node_disk.Copy()
6196 node_disk.UnsetSize()
6197 lu.cfg.SetDiskID(node_disk, node)
6198 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6199 msg = result.fail_msg
6201 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6202 " (is_primary=True, pass=2): %s",
6203 inst_disk.iv_name, node, msg)
6206 dev_path = result.payload
6208 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6210 # leave the disks configured for the primary node
6211 # this is a workaround that would be fixed better by
6212 # improving the logical/physical id handling
6214 lu.cfg.SetDiskID(disk, instance.primary_node)
6216 return disks_ok, device_info
6219 def _StartInstanceDisks(lu, instance, force):
6220 """Start the disks of an instance.
6223 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6224 ignore_secondaries=force)
6226 _ShutdownInstanceDisks(lu, instance)
6227 if force is not None and not force:
6228 lu.proc.LogWarning("", hint="If the message above refers to a"
6230 " you can retry the operation using '--force'.")
6231 raise errors.OpExecError("Disk consistency error")
6234 class LUInstanceDeactivateDisks(NoHooksLU):
6235 """Shutdown an instance's disks.
6240 def ExpandNames(self):
6241 self._ExpandAndLockInstance()
6242 self.needed_locks[locking.LEVEL_NODE] = []
6243 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6245 def DeclareLocks(self, level):
6246 if level == locking.LEVEL_NODE:
6247 self._LockInstancesNodes()
6249 def CheckPrereq(self):
6250 """Check prerequisites.
6252 This checks that the instance is in the cluster.
6255 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6256 assert self.instance is not None, \
6257 "Cannot retrieve locked instance %s" % self.op.instance_name
6259 def Exec(self, feedback_fn):
6260 """Deactivate the disks
6263 instance = self.instance
6265 _ShutdownInstanceDisks(self, instance)
6267 _SafeShutdownInstanceDisks(self, instance)
6270 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6271 """Shutdown block devices of an instance.
6273 This function checks if an instance is running, before calling
6274 _ShutdownInstanceDisks.
6277 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6278 _ShutdownInstanceDisks(lu, instance, disks=disks)
6281 def _ExpandCheckDisks(instance, disks):
6282 """Return the instance disks selected by the disks list
6284 @type disks: list of L{objects.Disk} or None
6285 @param disks: selected disks
6286 @rtype: list of L{objects.Disk}
6287 @return: selected instance disks to act on
6291 return instance.disks
6293 if not set(disks).issubset(instance.disks):
6294 raise errors.ProgrammerError("Can only act on disks belonging to the"
6299 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6300 """Shutdown block devices of an instance.
6302 This does the shutdown on all nodes of the instance.
6304 If the ignore_primary is false, errors on the primary node are
6309 disks = _ExpandCheckDisks(instance, disks)
6312 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6313 lu.cfg.SetDiskID(top_disk, node)
6314 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6315 msg = result.fail_msg
6317 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6318 disk.iv_name, node, msg)
6319 if ((node == instance.primary_node and not ignore_primary) or
6320 (node != instance.primary_node and not result.offline)):
6325 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6326 """Checks if a node has enough free memory.
6328 This function check if a given node has the needed amount of free
6329 memory. In case the node has less memory or we cannot get the
6330 information from the node, this function raise an OpPrereqError
6333 @type lu: C{LogicalUnit}
6334 @param lu: a logical unit from which we get configuration data
6336 @param node: the node to check
6337 @type reason: C{str}
6338 @param reason: string to use in the error message
6339 @type requested: C{int}
6340 @param requested: the amount of memory in MiB to check for
6341 @type hypervisor_name: C{str}
6342 @param hypervisor_name: the hypervisor to ask for memory stats
6343 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6344 we cannot check the node
6347 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6348 nodeinfo[node].Raise("Can't get data from node %s" % node,
6349 prereq=True, ecode=errors.ECODE_ENVIRON)
6350 (_, _, (hv_info, )) = nodeinfo[node].payload
6352 free_mem = hv_info.get("memory_free", None)
6353 if not isinstance(free_mem, int):
6354 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6355 " was '%s'" % (node, free_mem),
6356 errors.ECODE_ENVIRON)
6357 if requested > free_mem:
6358 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6359 " needed %s MiB, available %s MiB" %
6360 (node, reason, requested, free_mem),
6364 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6365 """Checks if nodes have enough free disk space in the all VGs.
6367 This function check if all given nodes have the needed amount of
6368 free disk. In case any node has less disk or we cannot get the
6369 information from the node, this function raise an OpPrereqError
6372 @type lu: C{LogicalUnit}
6373 @param lu: a logical unit from which we get configuration data
6374 @type nodenames: C{list}
6375 @param nodenames: the list of node names to check
6376 @type req_sizes: C{dict}
6377 @param req_sizes: the hash of vg and corresponding amount of disk in
6379 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6380 or we cannot check the node
6383 for vg, req_size in req_sizes.items():
6384 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6387 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6388 """Checks if nodes have enough free disk space in the specified VG.
6390 This function check if all given nodes have the needed amount of
6391 free disk. In case any node has less disk or we cannot get the
6392 information from the node, this function raise an OpPrereqError
6395 @type lu: C{LogicalUnit}
6396 @param lu: a logical unit from which we get configuration data
6397 @type nodenames: C{list}
6398 @param nodenames: the list of node names to check
6400 @param vg: the volume group to check
6401 @type requested: C{int}
6402 @param requested: the amount of disk in MiB to check for
6403 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6404 or we cannot check the node
6407 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6408 for node in nodenames:
6409 info = nodeinfo[node]
6410 info.Raise("Cannot get current information from node %s" % node,
6411 prereq=True, ecode=errors.ECODE_ENVIRON)
6412 (_, (vg_info, ), _) = info.payload
6413 vg_free = vg_info.get("vg_free", None)
6414 if not isinstance(vg_free, int):
6415 raise errors.OpPrereqError("Can't compute free disk space on node"
6416 " %s for vg %s, result was '%s'" %
6417 (node, vg, vg_free), errors.ECODE_ENVIRON)
6418 if requested > vg_free:
6419 raise errors.OpPrereqError("Not enough disk space on target node %s"
6420 " vg %s: required %d MiB, available %d MiB" %
6421 (node, vg, requested, vg_free),
6425 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6426 """Checks if nodes have enough physical CPUs
6428 This function checks if all given nodes have the needed number of
6429 physical CPUs. In case any node has less CPUs or we cannot get the
6430 information from the node, this function raises an OpPrereqError
6433 @type lu: C{LogicalUnit}
6434 @param lu: a logical unit from which we get configuration data
6435 @type nodenames: C{list}
6436 @param nodenames: the list of node names to check
6437 @type requested: C{int}
6438 @param requested: the minimum acceptable number of physical CPUs
6439 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6440 or we cannot check the node
6443 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6444 for node in nodenames:
6445 info = nodeinfo[node]
6446 info.Raise("Cannot get current information from node %s" % node,
6447 prereq=True, ecode=errors.ECODE_ENVIRON)
6448 (_, _, (hv_info, )) = info.payload
6449 num_cpus = hv_info.get("cpu_total", None)
6450 if not isinstance(num_cpus, int):
6451 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6452 " on node %s, result was '%s'" %
6453 (node, num_cpus), errors.ECODE_ENVIRON)
6454 if requested > num_cpus:
6455 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6456 "required" % (node, num_cpus, requested),
6460 class LUInstanceStartup(LogicalUnit):
6461 """Starts an instance.
6464 HPATH = "instance-start"
6465 HTYPE = constants.HTYPE_INSTANCE
6468 def CheckArguments(self):
6470 if self.op.beparams:
6471 # fill the beparams dict
6472 objects.UpgradeBeParams(self.op.beparams)
6473 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6475 def ExpandNames(self):
6476 self._ExpandAndLockInstance()
6478 def BuildHooksEnv(self):
6481 This runs on master, primary and secondary nodes of the instance.
6485 "FORCE": self.op.force,
6488 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6492 def BuildHooksNodes(self):
6493 """Build hooks nodes.
6496 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6499 def CheckPrereq(self):
6500 """Check prerequisites.
6502 This checks that the instance is in the cluster.
6505 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6506 assert self.instance is not None, \
6507 "Cannot retrieve locked instance %s" % self.op.instance_name
6510 if self.op.hvparams:
6511 # check hypervisor parameter syntax (locally)
6512 cluster = self.cfg.GetClusterInfo()
6513 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6514 filled_hvp = cluster.FillHV(instance)
6515 filled_hvp.update(self.op.hvparams)
6516 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6517 hv_type.CheckParameterSyntax(filled_hvp)
6518 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6520 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6522 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6524 if self.primary_offline and self.op.ignore_offline_nodes:
6525 self.proc.LogWarning("Ignoring offline primary node")
6527 if self.op.hvparams or self.op.beparams:
6528 self.proc.LogWarning("Overridden parameters are ignored")
6530 _CheckNodeOnline(self, instance.primary_node)
6532 bep = self.cfg.GetClusterInfo().FillBE(instance)
6534 # check bridges existence
6535 _CheckInstanceBridgesExist(self, instance)
6537 remote_info = self.rpc.call_instance_info(instance.primary_node,
6539 instance.hypervisor)
6540 remote_info.Raise("Error checking node %s" % instance.primary_node,
6541 prereq=True, ecode=errors.ECODE_ENVIRON)
6542 if not remote_info.payload: # not running already
6543 _CheckNodeFreeMemory(self, instance.primary_node,
6544 "starting instance %s" % instance.name,
6545 bep[constants.BE_MAXMEM], instance.hypervisor)
6547 def Exec(self, feedback_fn):
6548 """Start the instance.
6551 instance = self.instance
6552 force = self.op.force
6554 if not self.op.no_remember:
6555 self.cfg.MarkInstanceUp(instance.name)
6557 if self.primary_offline:
6558 assert self.op.ignore_offline_nodes
6559 self.proc.LogInfo("Primary node offline, marked instance as started")
6561 node_current = instance.primary_node
6563 _StartInstanceDisks(self, instance, force)
6566 self.rpc.call_instance_start(node_current,
6567 (instance, self.op.hvparams,
6569 self.op.startup_paused)
6570 msg = result.fail_msg
6572 _ShutdownInstanceDisks(self, instance)
6573 raise errors.OpExecError("Could not start instance: %s" % msg)
6576 class LUInstanceReboot(LogicalUnit):
6577 """Reboot an instance.
6580 HPATH = "instance-reboot"
6581 HTYPE = constants.HTYPE_INSTANCE
6584 def ExpandNames(self):
6585 self._ExpandAndLockInstance()
6587 def BuildHooksEnv(self):
6590 This runs on master, primary and secondary nodes of the instance.
6594 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6595 "REBOOT_TYPE": self.op.reboot_type,
6596 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6599 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6603 def BuildHooksNodes(self):
6604 """Build hooks nodes.
6607 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6610 def CheckPrereq(self):
6611 """Check prerequisites.
6613 This checks that the instance is in the cluster.
6616 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6617 assert self.instance is not None, \
6618 "Cannot retrieve locked instance %s" % self.op.instance_name
6619 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6620 _CheckNodeOnline(self, instance.primary_node)
6622 # check bridges existence
6623 _CheckInstanceBridgesExist(self, instance)
6625 def Exec(self, feedback_fn):
6626 """Reboot the instance.
6629 instance = self.instance
6630 ignore_secondaries = self.op.ignore_secondaries
6631 reboot_type = self.op.reboot_type
6633 remote_info = self.rpc.call_instance_info(instance.primary_node,
6635 instance.hypervisor)
6636 remote_info.Raise("Error checking node %s" % instance.primary_node)
6637 instance_running = bool(remote_info.payload)
6639 node_current = instance.primary_node
6641 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6642 constants.INSTANCE_REBOOT_HARD]:
6643 for disk in instance.disks:
6644 self.cfg.SetDiskID(disk, node_current)
6645 result = self.rpc.call_instance_reboot(node_current, instance,
6647 self.op.shutdown_timeout)
6648 result.Raise("Could not reboot instance")
6650 if instance_running:
6651 result = self.rpc.call_instance_shutdown(node_current, instance,
6652 self.op.shutdown_timeout)
6653 result.Raise("Could not shutdown instance for full reboot")
6654 _ShutdownInstanceDisks(self, instance)
6656 self.LogInfo("Instance %s was already stopped, starting now",
6658 _StartInstanceDisks(self, instance, ignore_secondaries)
6659 result = self.rpc.call_instance_start(node_current,
6660 (instance, None, None), False)
6661 msg = result.fail_msg
6663 _ShutdownInstanceDisks(self, instance)
6664 raise errors.OpExecError("Could not start instance for"
6665 " full reboot: %s" % msg)
6667 self.cfg.MarkInstanceUp(instance.name)
6670 class LUInstanceShutdown(LogicalUnit):
6671 """Shutdown an instance.
6674 HPATH = "instance-stop"
6675 HTYPE = constants.HTYPE_INSTANCE
6678 def ExpandNames(self):
6679 self._ExpandAndLockInstance()
6681 def BuildHooksEnv(self):
6684 This runs on master, primary and secondary nodes of the instance.
6687 env = _BuildInstanceHookEnvByObject(self, self.instance)
6688 env["TIMEOUT"] = self.op.timeout
6691 def BuildHooksNodes(self):
6692 """Build hooks nodes.
6695 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6698 def CheckPrereq(self):
6699 """Check prerequisites.
6701 This checks that the instance is in the cluster.
6704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6705 assert self.instance is not None, \
6706 "Cannot retrieve locked instance %s" % self.op.instance_name
6708 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6710 self.primary_offline = \
6711 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6713 if self.primary_offline and self.op.ignore_offline_nodes:
6714 self.proc.LogWarning("Ignoring offline primary node")
6716 _CheckNodeOnline(self, self.instance.primary_node)
6718 def Exec(self, feedback_fn):
6719 """Shutdown the instance.
6722 instance = self.instance
6723 node_current = instance.primary_node
6724 timeout = self.op.timeout
6726 if not self.op.no_remember:
6727 self.cfg.MarkInstanceDown(instance.name)
6729 if self.primary_offline:
6730 assert self.op.ignore_offline_nodes
6731 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6733 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6734 msg = result.fail_msg
6736 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6738 _ShutdownInstanceDisks(self, instance)
6741 class LUInstanceReinstall(LogicalUnit):
6742 """Reinstall an instance.
6745 HPATH = "instance-reinstall"
6746 HTYPE = constants.HTYPE_INSTANCE
6749 def ExpandNames(self):
6750 self._ExpandAndLockInstance()
6752 def BuildHooksEnv(self):
6755 This runs on master, primary and secondary nodes of the instance.
6758 return _BuildInstanceHookEnvByObject(self, self.instance)
6760 def BuildHooksNodes(self):
6761 """Build hooks nodes.
6764 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6767 def CheckPrereq(self):
6768 """Check prerequisites.
6770 This checks that the instance is in the cluster and is not running.
6773 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6774 assert instance is not None, \
6775 "Cannot retrieve locked instance %s" % self.op.instance_name
6776 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6777 " offline, cannot reinstall")
6778 for node in instance.secondary_nodes:
6779 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6780 " cannot reinstall")
6782 if instance.disk_template == constants.DT_DISKLESS:
6783 raise errors.OpPrereqError("Instance '%s' has no disks" %
6784 self.op.instance_name,
6786 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6788 if self.op.os_type is not None:
6790 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6791 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6792 instance_os = self.op.os_type
6794 instance_os = instance.os
6796 nodelist = list(instance.all_nodes)
6798 if self.op.osparams:
6799 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6800 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6801 self.os_inst = i_osdict # the new dict (without defaults)
6805 self.instance = instance
6807 def Exec(self, feedback_fn):
6808 """Reinstall the instance.
6811 inst = self.instance
6813 if self.op.os_type is not None:
6814 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6815 inst.os = self.op.os_type
6816 # Write to configuration
6817 self.cfg.Update(inst, feedback_fn)
6819 _StartInstanceDisks(self, inst, None)
6821 feedback_fn("Running the instance OS create scripts...")
6822 # FIXME: pass debug option from opcode to backend
6823 result = self.rpc.call_instance_os_add(inst.primary_node,
6824 (inst, self.os_inst), True,
6825 self.op.debug_level)
6826 result.Raise("Could not install OS for instance %s on node %s" %
6827 (inst.name, inst.primary_node))
6829 _ShutdownInstanceDisks(self, inst)
6832 class LUInstanceRecreateDisks(LogicalUnit):
6833 """Recreate an instance's missing disks.
6836 HPATH = "instance-recreate-disks"
6837 HTYPE = constants.HTYPE_INSTANCE
6840 def CheckArguments(self):
6841 # normalise the disk list
6842 self.op.disks = sorted(frozenset(self.op.disks))
6844 def ExpandNames(self):
6845 self._ExpandAndLockInstance()
6846 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6848 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6849 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6851 self.needed_locks[locking.LEVEL_NODE] = []
6853 def DeclareLocks(self, level):
6854 if level == locking.LEVEL_NODE:
6855 # if we replace the nodes, we only need to lock the old primary,
6856 # otherwise we need to lock all nodes for disk re-creation
6857 primary_only = bool(self.op.nodes)
6858 self._LockInstancesNodes(primary_only=primary_only)
6859 elif level == locking.LEVEL_NODE_RES:
6861 self.needed_locks[locking.LEVEL_NODE_RES] = \
6862 self.needed_locks[locking.LEVEL_NODE][:]
6864 def BuildHooksEnv(self):
6867 This runs on master, primary and secondary nodes of the instance.
6870 return _BuildInstanceHookEnvByObject(self, self.instance)
6872 def BuildHooksNodes(self):
6873 """Build hooks nodes.
6876 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6879 def CheckPrereq(self):
6880 """Check prerequisites.
6882 This checks that the instance is in the cluster and is not running.
6885 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6886 assert instance is not None, \
6887 "Cannot retrieve locked instance %s" % self.op.instance_name
6889 if len(self.op.nodes) != len(instance.all_nodes):
6890 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6891 " %d replacement nodes were specified" %
6892 (instance.name, len(instance.all_nodes),
6893 len(self.op.nodes)),
6895 assert instance.disk_template != constants.DT_DRBD8 or \
6896 len(self.op.nodes) == 2
6897 assert instance.disk_template != constants.DT_PLAIN or \
6898 len(self.op.nodes) == 1
6899 primary_node = self.op.nodes[0]
6901 primary_node = instance.primary_node
6902 _CheckNodeOnline(self, primary_node)
6904 if instance.disk_template == constants.DT_DISKLESS:
6905 raise errors.OpPrereqError("Instance '%s' has no disks" %
6906 self.op.instance_name, errors.ECODE_INVAL)
6907 # if we replace nodes *and* the old primary is offline, we don't
6909 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6910 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6911 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6912 if not (self.op.nodes and old_pnode.offline):
6913 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6914 msg="cannot recreate disks")
6916 if not self.op.disks:
6917 self.op.disks = range(len(instance.disks))
6919 for idx in self.op.disks:
6920 if idx >= len(instance.disks):
6921 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6923 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6924 raise errors.OpPrereqError("Can't recreate disks partially and"
6925 " change the nodes at the same time",
6927 self.instance = instance
6929 def Exec(self, feedback_fn):
6930 """Recreate the disks.
6933 instance = self.instance
6935 assert (self.owned_locks(locking.LEVEL_NODE) ==
6936 self.owned_locks(locking.LEVEL_NODE_RES))
6939 mods = [] # keeps track of needed logical_id changes
6941 for idx, disk in enumerate(instance.disks):
6942 if idx not in self.op.disks: # disk idx has not been passed in
6945 # update secondaries for disks, if needed
6947 if disk.dev_type == constants.LD_DRBD8:
6948 # need to update the nodes and minors
6949 assert len(self.op.nodes) == 2
6950 assert len(disk.logical_id) == 6 # otherwise disk internals
6952 (_, _, old_port, _, _, old_secret) = disk.logical_id
6953 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6954 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6955 new_minors[0], new_minors[1], old_secret)
6956 assert len(disk.logical_id) == len(new_id)
6957 mods.append((idx, new_id))
6959 # now that we have passed all asserts above, we can apply the mods
6960 # in a single run (to avoid partial changes)
6961 for idx, new_id in mods:
6962 instance.disks[idx].logical_id = new_id
6964 # change primary node, if needed
6966 instance.primary_node = self.op.nodes[0]
6967 self.LogWarning("Changing the instance's nodes, you will have to"
6968 " remove any disks left on the older nodes manually")
6971 self.cfg.Update(instance, feedback_fn)
6973 _CreateDisks(self, instance, to_skip=to_skip)
6976 class LUInstanceRename(LogicalUnit):
6977 """Rename an instance.
6980 HPATH = "instance-rename"
6981 HTYPE = constants.HTYPE_INSTANCE
6983 def CheckArguments(self):
6987 if self.op.ip_check and not self.op.name_check:
6988 # TODO: make the ip check more flexible and not depend on the name check
6989 raise errors.OpPrereqError("IP address check requires a name check",
6992 def BuildHooksEnv(self):
6995 This runs on master, primary and secondary nodes of the instance.
6998 env = _BuildInstanceHookEnvByObject(self, self.instance)
6999 env["INSTANCE_NEW_NAME"] = self.op.new_name
7002 def BuildHooksNodes(self):
7003 """Build hooks nodes.
7006 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7009 def CheckPrereq(self):
7010 """Check prerequisites.
7012 This checks that the instance is in the cluster and is not running.
7015 self.op.instance_name = _ExpandInstanceName(self.cfg,
7016 self.op.instance_name)
7017 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7018 assert instance is not None
7019 _CheckNodeOnline(self, instance.primary_node)
7020 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7021 msg="cannot rename")
7022 self.instance = instance
7024 new_name = self.op.new_name
7025 if self.op.name_check:
7026 hostname = netutils.GetHostname(name=new_name)
7027 if hostname.name != new_name:
7028 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7030 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7031 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7032 " same as given hostname '%s'") %
7033 (hostname.name, self.op.new_name),
7035 new_name = self.op.new_name = hostname.name
7036 if (self.op.ip_check and
7037 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7038 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7039 (hostname.ip, new_name),
7040 errors.ECODE_NOTUNIQUE)
7042 instance_list = self.cfg.GetInstanceList()
7043 if new_name in instance_list and new_name != instance.name:
7044 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7045 new_name, errors.ECODE_EXISTS)
7047 def Exec(self, feedback_fn):
7048 """Rename the instance.
7051 inst = self.instance
7052 old_name = inst.name
7054 rename_file_storage = False
7055 if (inst.disk_template in constants.DTS_FILEBASED and
7056 self.op.new_name != inst.name):
7057 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7058 rename_file_storage = True
7060 self.cfg.RenameInstance(inst.name, self.op.new_name)
7061 # Change the instance lock. This is definitely safe while we hold the BGL.
7062 # Otherwise the new lock would have to be added in acquired mode.
7064 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7065 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7067 # re-read the instance from the configuration after rename
7068 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7070 if rename_file_storage:
7071 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7072 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7073 old_file_storage_dir,
7074 new_file_storage_dir)
7075 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7076 " (but the instance has been renamed in Ganeti)" %
7077 (inst.primary_node, old_file_storage_dir,
7078 new_file_storage_dir))
7080 _StartInstanceDisks(self, inst, None)
7082 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7083 old_name, self.op.debug_level)
7084 msg = result.fail_msg
7086 msg = ("Could not run OS rename script for instance %s on node %s"
7087 " (but the instance has been renamed in Ganeti): %s" %
7088 (inst.name, inst.primary_node, msg))
7089 self.proc.LogWarning(msg)
7091 _ShutdownInstanceDisks(self, inst)
7096 class LUInstanceRemove(LogicalUnit):
7097 """Remove an instance.
7100 HPATH = "instance-remove"
7101 HTYPE = constants.HTYPE_INSTANCE
7104 def ExpandNames(self):
7105 self._ExpandAndLockInstance()
7106 self.needed_locks[locking.LEVEL_NODE] = []
7107 self.needed_locks[locking.LEVEL_NODE_RES] = []
7108 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7110 def DeclareLocks(self, level):
7111 if level == locking.LEVEL_NODE:
7112 self._LockInstancesNodes()
7113 elif level == locking.LEVEL_NODE_RES:
7115 self.needed_locks[locking.LEVEL_NODE_RES] = \
7116 self.needed_locks[locking.LEVEL_NODE][:]
7118 def BuildHooksEnv(self):
7121 This runs on master, primary and secondary nodes of the instance.
7124 env = _BuildInstanceHookEnvByObject(self, self.instance)
7125 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7128 def BuildHooksNodes(self):
7129 """Build hooks nodes.
7132 nl = [self.cfg.GetMasterNode()]
7133 nl_post = list(self.instance.all_nodes) + nl
7134 return (nl, nl_post)
7136 def CheckPrereq(self):
7137 """Check prerequisites.
7139 This checks that the instance is in the cluster.
7142 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7143 assert self.instance is not None, \
7144 "Cannot retrieve locked instance %s" % self.op.instance_name
7146 def Exec(self, feedback_fn):
7147 """Remove the instance.
7150 instance = self.instance
7151 logging.info("Shutting down instance %s on node %s",
7152 instance.name, instance.primary_node)
7154 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7155 self.op.shutdown_timeout)
7156 msg = result.fail_msg
7158 if self.op.ignore_failures:
7159 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7161 raise errors.OpExecError("Could not shutdown instance %s on"
7163 (instance.name, instance.primary_node, msg))
7165 assert (self.owned_locks(locking.LEVEL_NODE) ==
7166 self.owned_locks(locking.LEVEL_NODE_RES))
7167 assert not (set(instance.all_nodes) -
7168 self.owned_locks(locking.LEVEL_NODE)), \
7169 "Not owning correct locks"
7171 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7174 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7175 """Utility function to remove an instance.
7178 logging.info("Removing block devices for instance %s", instance.name)
7180 if not _RemoveDisks(lu, instance):
7181 if not ignore_failures:
7182 raise errors.OpExecError("Can't remove instance's disks")
7183 feedback_fn("Warning: can't remove instance's disks")
7185 logging.info("Removing instance %s out of cluster config", instance.name)
7187 lu.cfg.RemoveInstance(instance.name)
7189 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7190 "Instance lock removal conflict"
7192 # Remove lock for the instance
7193 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7196 class LUInstanceQuery(NoHooksLU):
7197 """Logical unit for querying instances.
7200 # pylint: disable=W0142
7203 def CheckArguments(self):
7204 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7205 self.op.output_fields, self.op.use_locking)
7207 def ExpandNames(self):
7208 self.iq.ExpandNames(self)
7210 def DeclareLocks(self, level):
7211 self.iq.DeclareLocks(self, level)
7213 def Exec(self, feedback_fn):
7214 return self.iq.OldStyleQuery(self)
7217 class LUInstanceFailover(LogicalUnit):
7218 """Failover an instance.
7221 HPATH = "instance-failover"
7222 HTYPE = constants.HTYPE_INSTANCE
7225 def CheckArguments(self):
7226 """Check the arguments.
7229 self.iallocator = getattr(self.op, "iallocator", None)
7230 self.target_node = getattr(self.op, "target_node", None)
7232 def ExpandNames(self):
7233 self._ExpandAndLockInstance()
7235 if self.op.target_node is not None:
7236 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7238 self.needed_locks[locking.LEVEL_NODE] = []
7239 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7241 ignore_consistency = self.op.ignore_consistency
7242 shutdown_timeout = self.op.shutdown_timeout
7243 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7246 ignore_consistency=ignore_consistency,
7247 shutdown_timeout=shutdown_timeout)
7248 self.tasklets = [self._migrater]
7250 def DeclareLocks(self, level):
7251 if level == locking.LEVEL_NODE:
7252 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7253 if instance.disk_template in constants.DTS_EXT_MIRROR:
7254 if self.op.target_node is None:
7255 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7257 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7258 self.op.target_node]
7259 del self.recalculate_locks[locking.LEVEL_NODE]
7261 self._LockInstancesNodes()
7263 def BuildHooksEnv(self):
7266 This runs on master, primary and secondary nodes of the instance.
7269 instance = self._migrater.instance
7270 source_node = instance.primary_node
7271 target_node = self.op.target_node
7273 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7274 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7275 "OLD_PRIMARY": source_node,
7276 "NEW_PRIMARY": target_node,
7279 if instance.disk_template in constants.DTS_INT_MIRROR:
7280 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7281 env["NEW_SECONDARY"] = source_node
7283 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7285 env.update(_BuildInstanceHookEnvByObject(self, instance))
7289 def BuildHooksNodes(self):
7290 """Build hooks nodes.
7293 instance = self._migrater.instance
7294 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7295 return (nl, nl + [instance.primary_node])
7298 class LUInstanceMigrate(LogicalUnit):
7299 """Migrate an instance.
7301 This is migration without shutting down, compared to the failover,
7302 which is done with shutdown.
7305 HPATH = "instance-migrate"
7306 HTYPE = constants.HTYPE_INSTANCE
7309 def ExpandNames(self):
7310 self._ExpandAndLockInstance()
7312 if self.op.target_node is not None:
7313 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7315 self.needed_locks[locking.LEVEL_NODE] = []
7316 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7318 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7319 cleanup=self.op.cleanup,
7321 fallback=self.op.allow_failover)
7322 self.tasklets = [self._migrater]
7324 def DeclareLocks(self, level):
7325 if level == locking.LEVEL_NODE:
7326 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7327 if instance.disk_template in constants.DTS_EXT_MIRROR:
7328 if self.op.target_node is None:
7329 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7331 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7332 self.op.target_node]
7333 del self.recalculate_locks[locking.LEVEL_NODE]
7335 self._LockInstancesNodes()
7337 def BuildHooksEnv(self):
7340 This runs on master, primary and secondary nodes of the instance.
7343 instance = self._migrater.instance
7344 source_node = instance.primary_node
7345 target_node = self.op.target_node
7346 env = _BuildInstanceHookEnvByObject(self, instance)
7348 "MIGRATE_LIVE": self._migrater.live,
7349 "MIGRATE_CLEANUP": self.op.cleanup,
7350 "OLD_PRIMARY": source_node,
7351 "NEW_PRIMARY": target_node,
7354 if instance.disk_template in constants.DTS_INT_MIRROR:
7355 env["OLD_SECONDARY"] = target_node
7356 env["NEW_SECONDARY"] = source_node
7358 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7362 def BuildHooksNodes(self):
7363 """Build hooks nodes.
7366 instance = self._migrater.instance
7367 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7368 return (nl, nl + [instance.primary_node])
7371 class LUInstanceMove(LogicalUnit):
7372 """Move an instance by data-copying.
7375 HPATH = "instance-move"
7376 HTYPE = constants.HTYPE_INSTANCE
7379 def ExpandNames(self):
7380 self._ExpandAndLockInstance()
7381 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7382 self.op.target_node = target_node
7383 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7384 self.needed_locks[locking.LEVEL_NODE_RES] = []
7385 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7387 def DeclareLocks(self, level):
7388 if level == locking.LEVEL_NODE:
7389 self._LockInstancesNodes(primary_only=True)
7390 elif level == locking.LEVEL_NODE_RES:
7392 self.needed_locks[locking.LEVEL_NODE_RES] = \
7393 self.needed_locks[locking.LEVEL_NODE][:]
7395 def BuildHooksEnv(self):
7398 This runs on master, primary and secondary nodes of the instance.
7402 "TARGET_NODE": self.op.target_node,
7403 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7405 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7408 def BuildHooksNodes(self):
7409 """Build hooks nodes.
7413 self.cfg.GetMasterNode(),
7414 self.instance.primary_node,
7415 self.op.target_node,
7419 def CheckPrereq(self):
7420 """Check prerequisites.
7422 This checks that the instance is in the cluster.
7425 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7426 assert self.instance is not None, \
7427 "Cannot retrieve locked instance %s" % self.op.instance_name
7429 node = self.cfg.GetNodeInfo(self.op.target_node)
7430 assert node is not None, \
7431 "Cannot retrieve locked node %s" % self.op.target_node
7433 self.target_node = target_node = node.name
7435 if target_node == instance.primary_node:
7436 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7437 (instance.name, target_node),
7440 bep = self.cfg.GetClusterInfo().FillBE(instance)
7442 for idx, dsk in enumerate(instance.disks):
7443 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7444 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7445 " cannot copy" % idx, errors.ECODE_STATE)
7447 _CheckNodeOnline(self, target_node)
7448 _CheckNodeNotDrained(self, target_node)
7449 _CheckNodeVmCapable(self, target_node)
7451 if instance.admin_state == constants.ADMINST_UP:
7452 # check memory requirements on the secondary node
7453 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7454 instance.name, bep[constants.BE_MAXMEM],
7455 instance.hypervisor)
7457 self.LogInfo("Not checking memory on the secondary node as"
7458 " instance will not be started")
7460 # check bridge existance
7461 _CheckInstanceBridgesExist(self, instance, node=target_node)
7463 def Exec(self, feedback_fn):
7464 """Move an instance.
7466 The move is done by shutting it down on its present node, copying
7467 the data over (slow) and starting it on the new node.
7470 instance = self.instance
7472 source_node = instance.primary_node
7473 target_node = self.target_node
7475 self.LogInfo("Shutting down instance %s on source node %s",
7476 instance.name, source_node)
7478 assert (self.owned_locks(locking.LEVEL_NODE) ==
7479 self.owned_locks(locking.LEVEL_NODE_RES))
7481 result = self.rpc.call_instance_shutdown(source_node, instance,
7482 self.op.shutdown_timeout)
7483 msg = result.fail_msg
7485 if self.op.ignore_consistency:
7486 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7487 " Proceeding anyway. Please make sure node"
7488 " %s is down. Error details: %s",
7489 instance.name, source_node, source_node, msg)
7491 raise errors.OpExecError("Could not shutdown instance %s on"
7493 (instance.name, source_node, msg))
7495 # create the target disks
7497 _CreateDisks(self, instance, target_node=target_node)
7498 except errors.OpExecError:
7499 self.LogWarning("Device creation failed, reverting...")
7501 _RemoveDisks(self, instance, target_node=target_node)
7503 self.cfg.ReleaseDRBDMinors(instance.name)
7506 cluster_name = self.cfg.GetClusterInfo().cluster_name
7509 # activate, get path, copy the data over
7510 for idx, disk in enumerate(instance.disks):
7511 self.LogInfo("Copying data for disk %d", idx)
7512 result = self.rpc.call_blockdev_assemble(target_node, disk,
7513 instance.name, True, idx)
7515 self.LogWarning("Can't assemble newly created disk %d: %s",
7516 idx, result.fail_msg)
7517 errs.append(result.fail_msg)
7519 dev_path = result.payload
7520 result = self.rpc.call_blockdev_export(source_node, disk,
7521 target_node, dev_path,
7524 self.LogWarning("Can't copy data over for disk %d: %s",
7525 idx, result.fail_msg)
7526 errs.append(result.fail_msg)
7530 self.LogWarning("Some disks failed to copy, aborting")
7532 _RemoveDisks(self, instance, target_node=target_node)
7534 self.cfg.ReleaseDRBDMinors(instance.name)
7535 raise errors.OpExecError("Errors during disk copy: %s" %
7538 instance.primary_node = target_node
7539 self.cfg.Update(instance, feedback_fn)
7541 self.LogInfo("Removing the disks on the original node")
7542 _RemoveDisks(self, instance, target_node=source_node)
7544 # Only start the instance if it's marked as up
7545 if instance.admin_state == constants.ADMINST_UP:
7546 self.LogInfo("Starting instance %s on node %s",
7547 instance.name, target_node)
7549 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7550 ignore_secondaries=True)
7552 _ShutdownInstanceDisks(self, instance)
7553 raise errors.OpExecError("Can't activate the instance's disks")
7555 result = self.rpc.call_instance_start(target_node,
7556 (instance, None, None), False)
7557 msg = result.fail_msg
7559 _ShutdownInstanceDisks(self, instance)
7560 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7561 (instance.name, target_node, msg))
7564 class LUNodeMigrate(LogicalUnit):
7565 """Migrate all instances from a node.
7568 HPATH = "node-migrate"
7569 HTYPE = constants.HTYPE_NODE
7572 def CheckArguments(self):
7575 def ExpandNames(self):
7576 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7578 self.share_locks = _ShareAll()
7579 self.needed_locks = {
7580 locking.LEVEL_NODE: [self.op.node_name],
7583 def BuildHooksEnv(self):
7586 This runs on the master, the primary and all the secondaries.
7590 "NODE_NAME": self.op.node_name,
7593 def BuildHooksNodes(self):
7594 """Build hooks nodes.
7597 nl = [self.cfg.GetMasterNode()]
7600 def CheckPrereq(self):
7603 def Exec(self, feedback_fn):
7604 # Prepare jobs for migration instances
7606 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7609 iallocator=self.op.iallocator,
7610 target_node=self.op.target_node)]
7611 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7614 # TODO: Run iallocator in this opcode and pass correct placement options to
7615 # OpInstanceMigrate. Since other jobs can modify the cluster between
7616 # running the iallocator and the actual migration, a good consistency model
7617 # will have to be found.
7619 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7620 frozenset([self.op.node_name]))
7622 return ResultWithJobs(jobs)
7625 class TLMigrateInstance(Tasklet):
7626 """Tasklet class for instance migration.
7629 @ivar live: whether the migration will be done live or non-live;
7630 this variable is initalized only after CheckPrereq has run
7631 @type cleanup: boolean
7632 @ivar cleanup: Wheater we cleanup from a failed migration
7633 @type iallocator: string
7634 @ivar iallocator: The iallocator used to determine target_node
7635 @type target_node: string
7636 @ivar target_node: If given, the target_node to reallocate the instance to
7637 @type failover: boolean
7638 @ivar failover: Whether operation results in failover or migration
7639 @type fallback: boolean
7640 @ivar fallback: Whether fallback to failover is allowed if migration not
7642 @type ignore_consistency: boolean
7643 @ivar ignore_consistency: Wheter we should ignore consistency between source
7645 @type shutdown_timeout: int
7646 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7651 _MIGRATION_POLL_INTERVAL = 1 # seconds
7652 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7654 def __init__(self, lu, instance_name, cleanup=False,
7655 failover=False, fallback=False,
7656 ignore_consistency=False,
7657 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7658 """Initializes this class.
7661 Tasklet.__init__(self, lu)
7664 self.instance_name = instance_name
7665 self.cleanup = cleanup
7666 self.live = False # will be overridden later
7667 self.failover = failover
7668 self.fallback = fallback
7669 self.ignore_consistency = ignore_consistency
7670 self.shutdown_timeout = shutdown_timeout
7672 def CheckPrereq(self):
7673 """Check prerequisites.
7675 This checks that the instance is in the cluster.
7678 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7679 instance = self.cfg.GetInstanceInfo(instance_name)
7680 assert instance is not None
7681 self.instance = instance
7683 if (not self.cleanup and
7684 not instance.admin_state == constants.ADMINST_UP and
7685 not self.failover and self.fallback):
7686 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7687 " switching to failover")
7688 self.failover = True
7690 if instance.disk_template not in constants.DTS_MIRRORED:
7695 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7696 " %s" % (instance.disk_template, text),
7699 if instance.disk_template in constants.DTS_EXT_MIRROR:
7700 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7702 if self.lu.op.iallocator:
7703 self._RunAllocator()
7705 # We set set self.target_node as it is required by
7707 self.target_node = self.lu.op.target_node
7709 # self.target_node is already populated, either directly or by the
7711 target_node = self.target_node
7712 if self.target_node == instance.primary_node:
7713 raise errors.OpPrereqError("Cannot migrate instance %s"
7714 " to its primary (%s)" %
7715 (instance.name, instance.primary_node))
7717 if len(self.lu.tasklets) == 1:
7718 # It is safe to release locks only when we're the only tasklet
7720 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7721 keep=[instance.primary_node, self.target_node])
7724 secondary_nodes = instance.secondary_nodes
7725 if not secondary_nodes:
7726 raise errors.ConfigurationError("No secondary node but using"
7727 " %s disk template" %
7728 instance.disk_template)
7729 target_node = secondary_nodes[0]
7730 if self.lu.op.iallocator or (self.lu.op.target_node and
7731 self.lu.op.target_node != target_node):
7733 text = "failed over"
7736 raise errors.OpPrereqError("Instances with disk template %s cannot"
7737 " be %s to arbitrary nodes"
7738 " (neither an iallocator nor a target"
7739 " node can be passed)" %
7740 (instance.disk_template, text),
7743 i_be = self.cfg.GetClusterInfo().FillBE(instance)
7745 # check memory requirements on the secondary node
7746 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7747 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7748 instance.name, i_be[constants.BE_MAXMEM],
7749 instance.hypervisor)
7751 self.lu.LogInfo("Not checking memory on the secondary node as"
7752 " instance will not be started")
7754 # check if failover must be forced instead of migration
7755 if (not self.cleanup and not self.failover and
7756 i_be[constants.BE_ALWAYS_FAILOVER]):
7758 self.lu.LogInfo("Instance configured to always failover; fallback"
7760 self.failover = True
7762 raise errors.OpPrereqError("This instance has been configured to"
7763 " always failover, please allow failover",
7766 # check bridge existance
7767 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7769 if not self.cleanup:
7770 _CheckNodeNotDrained(self.lu, target_node)
7771 if not self.failover:
7772 result = self.rpc.call_instance_migratable(instance.primary_node,
7774 if result.fail_msg and self.fallback:
7775 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7777 self.failover = True
7779 result.Raise("Can't migrate, please use failover",
7780 prereq=True, ecode=errors.ECODE_STATE)
7782 assert not (self.failover and self.cleanup)
7784 if not self.failover:
7785 if self.lu.op.live is not None and self.lu.op.mode is not None:
7786 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7787 " parameters are accepted",
7789 if self.lu.op.live is not None:
7791 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7793 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7794 # reset the 'live' parameter to None so that repeated
7795 # invocations of CheckPrereq do not raise an exception
7796 self.lu.op.live = None
7797 elif self.lu.op.mode is None:
7798 # read the default value from the hypervisor
7799 i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7801 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7803 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7805 # Failover is never live
7808 def _RunAllocator(self):
7809 """Run the allocator based on input opcode.
7812 ial = IAllocator(self.cfg, self.rpc,
7813 mode=constants.IALLOCATOR_MODE_RELOC,
7814 name=self.instance_name,
7815 # TODO See why hail breaks with a single node below
7816 relocate_from=[self.instance.primary_node,
7817 self.instance.primary_node],
7820 ial.Run(self.lu.op.iallocator)
7823 raise errors.OpPrereqError("Can't compute nodes using"
7824 " iallocator '%s': %s" %
7825 (self.lu.op.iallocator, ial.info),
7827 if len(ial.result) != ial.required_nodes:
7828 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7829 " of nodes (%s), required %s" %
7830 (self.lu.op.iallocator, len(ial.result),
7831 ial.required_nodes), errors.ECODE_FAULT)
7832 self.target_node = ial.result[0]
7833 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7834 self.instance_name, self.lu.op.iallocator,
7835 utils.CommaJoin(ial.result))
7837 def _WaitUntilSync(self):
7838 """Poll with custom rpc for disk sync.
7840 This uses our own step-based rpc call.
7843 self.feedback_fn("* wait until resync is done")
7847 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7849 self.instance.disks)
7851 for node, nres in result.items():
7852 nres.Raise("Cannot resync disks on node %s" % node)
7853 node_done, node_percent = nres.payload
7854 all_done = all_done and node_done
7855 if node_percent is not None:
7856 min_percent = min(min_percent, node_percent)
7858 if min_percent < 100:
7859 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7862 def _EnsureSecondary(self, node):
7863 """Demote a node to secondary.
7866 self.feedback_fn("* switching node %s to secondary mode" % node)
7868 for dev in self.instance.disks:
7869 self.cfg.SetDiskID(dev, node)
7871 result = self.rpc.call_blockdev_close(node, self.instance.name,
7872 self.instance.disks)
7873 result.Raise("Cannot change disk to secondary on node %s" % node)
7875 def _GoStandalone(self):
7876 """Disconnect from the network.
7879 self.feedback_fn("* changing into standalone mode")
7880 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7881 self.instance.disks)
7882 for node, nres in result.items():
7883 nres.Raise("Cannot disconnect disks node %s" % node)
7885 def _GoReconnect(self, multimaster):
7886 """Reconnect to the network.
7892 msg = "single-master"
7893 self.feedback_fn("* changing disks into %s mode" % msg)
7894 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7895 self.instance.disks,
7896 self.instance.name, multimaster)
7897 for node, nres in result.items():
7898 nres.Raise("Cannot change disks config on node %s" % node)
7900 def _ExecCleanup(self):
7901 """Try to cleanup after a failed migration.
7903 The cleanup is done by:
7904 - check that the instance is running only on one node
7905 (and update the config if needed)
7906 - change disks on its secondary node to secondary
7907 - wait until disks are fully synchronized
7908 - disconnect from the network
7909 - change disks into single-master mode
7910 - wait again until disks are fully synchronized
7913 instance = self.instance
7914 target_node = self.target_node
7915 source_node = self.source_node
7917 # check running on only one node
7918 self.feedback_fn("* checking where the instance actually runs"
7919 " (if this hangs, the hypervisor might be in"
7921 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7922 for node, result in ins_l.items():
7923 result.Raise("Can't contact node %s" % node)
7925 runningon_source = instance.name in ins_l[source_node].payload
7926 runningon_target = instance.name in ins_l[target_node].payload
7928 if runningon_source and runningon_target:
7929 raise errors.OpExecError("Instance seems to be running on two nodes,"
7930 " or the hypervisor is confused; you will have"
7931 " to ensure manually that it runs only on one"
7932 " and restart this operation")
7934 if not (runningon_source or runningon_target):
7935 raise errors.OpExecError("Instance does not seem to be running at all;"
7936 " in this case it's safer to repair by"
7937 " running 'gnt-instance stop' to ensure disk"
7938 " shutdown, and then restarting it")
7940 if runningon_target:
7941 # the migration has actually succeeded, we need to update the config
7942 self.feedback_fn("* instance running on secondary node (%s),"
7943 " updating config" % target_node)
7944 instance.primary_node = target_node
7945 self.cfg.Update(instance, self.feedback_fn)
7946 demoted_node = source_node
7948 self.feedback_fn("* instance confirmed to be running on its"
7949 " primary node (%s)" % source_node)
7950 demoted_node = target_node
7952 if instance.disk_template in constants.DTS_INT_MIRROR:
7953 self._EnsureSecondary(demoted_node)
7955 self._WaitUntilSync()
7956 except errors.OpExecError:
7957 # we ignore here errors, since if the device is standalone, it
7958 # won't be able to sync
7960 self._GoStandalone()
7961 self._GoReconnect(False)
7962 self._WaitUntilSync()
7964 self.feedback_fn("* done")
7966 def _RevertDiskStatus(self):
7967 """Try to revert the disk status after a failed migration.
7970 target_node = self.target_node
7971 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7975 self._EnsureSecondary(target_node)
7976 self._GoStandalone()
7977 self._GoReconnect(False)
7978 self._WaitUntilSync()
7979 except errors.OpExecError, err:
7980 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7981 " please try to recover the instance manually;"
7982 " error '%s'" % str(err))
7984 def _AbortMigration(self):
7985 """Call the hypervisor code to abort a started migration.
7988 instance = self.instance
7989 target_node = self.target_node
7990 source_node = self.source_node
7991 migration_info = self.migration_info
7993 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7997 abort_msg = abort_result.fail_msg
7999 logging.error("Aborting migration failed on target node %s: %s",
8000 target_node, abort_msg)
8001 # Don't raise an exception here, as we stil have to try to revert the
8002 # disk status, even if this step failed.
8004 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8005 instance, False, self.live)
8006 abort_msg = abort_result.fail_msg
8008 logging.error("Aborting migration failed on source node %s: %s",
8009 source_node, abort_msg)
8011 def _ExecMigration(self):
8012 """Migrate an instance.
8014 The migrate is done by:
8015 - change the disks into dual-master mode
8016 - wait until disks are fully synchronized again
8017 - migrate the instance
8018 - change disks on the new secondary node (the old primary) to secondary
8019 - wait until disks are fully synchronized
8020 - change disks into single-master mode
8023 instance = self.instance
8024 target_node = self.target_node
8025 source_node = self.source_node
8027 # Check for hypervisor version mismatch and warn the user.
8028 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8029 None, [self.instance.hypervisor])
8030 for ninfo in nodeinfo.values():
8031 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8033 (_, _, (src_info, )) = nodeinfo[source_node].payload
8034 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8036 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8037 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8038 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8039 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8040 if src_version != dst_version:
8041 self.feedback_fn("* warning: hypervisor version mismatch between"
8042 " source (%s) and target (%s) node" %
8043 (src_version, dst_version))
8045 self.feedback_fn("* checking disk consistency between source and target")
8046 for dev in instance.disks:
8047 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8048 raise errors.OpExecError("Disk %s is degraded or not fully"
8049 " synchronized on target node,"
8050 " aborting migration" % dev.iv_name)
8052 # First get the migration information from the remote node
8053 result = self.rpc.call_migration_info(source_node, instance)
8054 msg = result.fail_msg
8056 log_err = ("Failed fetching source migration information from %s: %s" %
8058 logging.error(log_err)
8059 raise errors.OpExecError(log_err)
8061 self.migration_info = migration_info = result.payload
8063 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8064 # Then switch the disks to master/master mode
8065 self._EnsureSecondary(target_node)
8066 self._GoStandalone()
8067 self._GoReconnect(True)
8068 self._WaitUntilSync()
8070 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8071 result = self.rpc.call_accept_instance(target_node,
8074 self.nodes_ip[target_node])
8076 msg = result.fail_msg
8078 logging.error("Instance pre-migration failed, trying to revert"
8079 " disk status: %s", msg)
8080 self.feedback_fn("Pre-migration failed, aborting")
8081 self._AbortMigration()
8082 self._RevertDiskStatus()
8083 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8084 (instance.name, msg))
8086 self.feedback_fn("* migrating instance to %s" % target_node)
8087 result = self.rpc.call_instance_migrate(source_node, instance,
8088 self.nodes_ip[target_node],
8090 msg = result.fail_msg
8092 logging.error("Instance migration failed, trying to revert"
8093 " disk status: %s", msg)
8094 self.feedback_fn("Migration failed, aborting")
8095 self._AbortMigration()
8096 self._RevertDiskStatus()
8097 raise errors.OpExecError("Could not migrate instance %s: %s" %
8098 (instance.name, msg))
8100 self.feedback_fn("* starting memory transfer")
8101 last_feedback = time.time()
8103 result = self.rpc.call_instance_get_migration_status(source_node,
8105 msg = result.fail_msg
8106 ms = result.payload # MigrationStatus instance
8107 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8108 logging.error("Instance migration failed, trying to revert"
8109 " disk status: %s", msg)
8110 self.feedback_fn("Migration failed, aborting")
8111 self._AbortMigration()
8112 self._RevertDiskStatus()
8113 raise errors.OpExecError("Could not migrate instance %s: %s" %
8114 (instance.name, msg))
8116 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8117 self.feedback_fn("* memory transfer complete")
8120 if (utils.TimeoutExpired(last_feedback,
8121 self._MIGRATION_FEEDBACK_INTERVAL) and
8122 ms.transferred_ram is not None):
8123 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8124 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8125 last_feedback = time.time()
8127 time.sleep(self._MIGRATION_POLL_INTERVAL)
8129 result = self.rpc.call_instance_finalize_migration_src(source_node,
8133 msg = result.fail_msg
8135 logging.error("Instance migration succeeded, but finalization failed"
8136 " on the source node: %s", msg)
8137 raise errors.OpExecError("Could not finalize instance migration: %s" %
8140 instance.primary_node = target_node
8142 # distribute new instance config to the other nodes
8143 self.cfg.Update(instance, self.feedback_fn)
8145 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8149 msg = result.fail_msg
8151 logging.error("Instance migration succeeded, but finalization failed"
8152 " on the target node: %s", msg)
8153 raise errors.OpExecError("Could not finalize instance migration: %s" %
8156 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8157 self._EnsureSecondary(source_node)
8158 self._WaitUntilSync()
8159 self._GoStandalone()
8160 self._GoReconnect(False)
8161 self._WaitUntilSync()
8163 self.feedback_fn("* done")
8165 def _ExecFailover(self):
8166 """Failover an instance.
8168 The failover is done by shutting it down on its present node and
8169 starting it on the secondary.
8172 instance = self.instance
8173 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8175 source_node = instance.primary_node
8176 target_node = self.target_node
8178 if instance.admin_state == constants.ADMINST_UP:
8179 self.feedback_fn("* checking disk consistency between source and target")
8180 for dev in instance.disks:
8181 # for drbd, these are drbd over lvm
8182 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8183 if primary_node.offline:
8184 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8186 (primary_node.name, dev.iv_name, target_node))
8187 elif not self.ignore_consistency:
8188 raise errors.OpExecError("Disk %s is degraded on target node,"
8189 " aborting failover" % dev.iv_name)
8191 self.feedback_fn("* not checking disk consistency as instance is not"
8194 self.feedback_fn("* shutting down instance on source node")
8195 logging.info("Shutting down instance %s on node %s",
8196 instance.name, source_node)
8198 result = self.rpc.call_instance_shutdown(source_node, instance,
8199 self.shutdown_timeout)
8200 msg = result.fail_msg
8202 if self.ignore_consistency or primary_node.offline:
8203 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8204 " proceeding anyway; please make sure node"
8205 " %s is down; error details: %s",
8206 instance.name, source_node, source_node, msg)
8208 raise errors.OpExecError("Could not shutdown instance %s on"
8210 (instance.name, source_node, msg))
8212 self.feedback_fn("* deactivating the instance's disks on source node")
8213 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8214 raise errors.OpExecError("Can't shut down the instance's disks")
8216 instance.primary_node = target_node
8217 # distribute new instance config to the other nodes
8218 self.cfg.Update(instance, self.feedback_fn)
8220 # Only start the instance if it's marked as up
8221 if instance.admin_state == constants.ADMINST_UP:
8222 self.feedback_fn("* activating the instance's disks on target node %s" %
8224 logging.info("Starting instance %s on node %s",
8225 instance.name, target_node)
8227 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8228 ignore_secondaries=True)
8230 _ShutdownInstanceDisks(self.lu, instance)
8231 raise errors.OpExecError("Can't activate the instance's disks")
8233 self.feedback_fn("* starting the instance on the target node %s" %
8235 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8237 msg = result.fail_msg
8239 _ShutdownInstanceDisks(self.lu, instance)
8240 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8241 (instance.name, target_node, msg))
8243 def Exec(self, feedback_fn):
8244 """Perform the migration.
8247 self.feedback_fn = feedback_fn
8248 self.source_node = self.instance.primary_node
8250 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8251 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8252 self.target_node = self.instance.secondary_nodes[0]
8253 # Otherwise self.target_node has been populated either
8254 # directly, or through an iallocator.
8256 self.all_nodes = [self.source_node, self.target_node]
8257 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8258 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8261 feedback_fn("Failover instance %s" % self.instance.name)
8262 self._ExecFailover()
8264 feedback_fn("Migrating instance %s" % self.instance.name)
8267 return self._ExecCleanup()
8269 return self._ExecMigration()
8272 def _CreateBlockDev(lu, node, instance, device, force_create,
8274 """Create a tree of block devices on a given node.
8276 If this device type has to be created on secondaries, create it and
8279 If not, just recurse to children keeping the same 'force' value.
8281 @param lu: the lu on whose behalf we execute
8282 @param node: the node on which to create the device
8283 @type instance: L{objects.Instance}
8284 @param instance: the instance which owns the device
8285 @type device: L{objects.Disk}
8286 @param device: the device to create
8287 @type force_create: boolean
8288 @param force_create: whether to force creation of this device; this
8289 will be change to True whenever we find a device which has
8290 CreateOnSecondary() attribute
8291 @param info: the extra 'metadata' we should attach to the device
8292 (this will be represented as a LVM tag)
8293 @type force_open: boolean
8294 @param force_open: this parameter will be passes to the
8295 L{backend.BlockdevCreate} function where it specifies
8296 whether we run on primary or not, and it affects both
8297 the child assembly and the device own Open() execution
8300 if device.CreateOnSecondary():
8304 for child in device.children:
8305 _CreateBlockDev(lu, node, instance, child, force_create,
8308 if not force_create:
8311 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8314 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8315 """Create a single block device on a given node.
8317 This will not recurse over children of the device, so they must be
8320 @param lu: the lu on whose behalf we execute
8321 @param node: the node on which to create the device
8322 @type instance: L{objects.Instance}
8323 @param instance: the instance which owns the device
8324 @type device: L{objects.Disk}
8325 @param device: the device to create
8326 @param info: the extra 'metadata' we should attach to the device
8327 (this will be represented as a LVM tag)
8328 @type force_open: boolean
8329 @param force_open: this parameter will be passes to the
8330 L{backend.BlockdevCreate} function where it specifies
8331 whether we run on primary or not, and it affects both
8332 the child assembly and the device own Open() execution
8335 lu.cfg.SetDiskID(device, node)
8336 result = lu.rpc.call_blockdev_create(node, device, device.size,
8337 instance.name, force_open, info)
8338 result.Raise("Can't create block device %s on"
8339 " node %s for instance %s" % (device, node, instance.name))
8340 if device.physical_id is None:
8341 device.physical_id = result.payload
8344 def _GenerateUniqueNames(lu, exts):
8345 """Generate a suitable LV name.
8347 This will generate a logical volume name for the given instance.
8352 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8353 results.append("%s%s" % (new_id, val))
8357 def _ComputeLDParams(disk_template, disk_params):
8358 """Computes Logical Disk parameters from Disk Template parameters.
8360 @type disk_template: string
8361 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8362 @type disk_params: dict
8363 @param disk_params: disk template parameters; dict(template_name -> parameters
8365 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8366 contains the LD parameters of the node. The tree is flattened in-order.
8369 if disk_template not in constants.DISK_TEMPLATES:
8370 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8373 dt_params = disk_params[disk_template]
8374 if disk_template == constants.DT_DRBD8:
8376 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8377 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8378 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8379 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8380 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8381 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8382 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8383 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8384 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8385 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8386 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8387 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8391 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8394 result.append(drbd_params)
8398 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8401 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8403 result.append(data_params)
8407 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8410 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8412 result.append(meta_params)
8414 elif (disk_template == constants.DT_FILE or
8415 disk_template == constants.DT_SHARED_FILE):
8416 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8418 elif disk_template == constants.DT_PLAIN:
8420 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8423 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8425 result.append(params)
8427 elif disk_template == constants.DT_BLOCK:
8428 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8433 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8434 iv_name, p_minor, s_minor, drbd_params, data_params,
8436 """Generate a drbd8 device complete with its children.
8439 assert len(vgnames) == len(names) == 2
8440 port = lu.cfg.AllocatePort()
8441 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8443 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8444 logical_id=(vgnames[0], names[0]),
8446 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8447 logical_id=(vgnames[1], names[1]),
8449 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8450 logical_id=(primary, secondary, port,
8453 children=[dev_data, dev_meta],
8454 iv_name=iv_name, params=drbd_params)
8458 def _GenerateDiskTemplate(lu, template_name,
8459 instance_name, primary_node,
8460 secondary_nodes, disk_info,
8461 file_storage_dir, file_driver,
8462 base_index, feedback_fn, disk_params):
8463 """Generate the entire disk layout for a given template type.
8466 #TODO: compute space requirements
8468 vgname = lu.cfg.GetVGName()
8469 disk_count = len(disk_info)
8471 ld_params = _ComputeLDParams(template_name, disk_params)
8472 if template_name == constants.DT_DISKLESS:
8474 elif template_name == constants.DT_PLAIN:
8475 if len(secondary_nodes) != 0:
8476 raise errors.ProgrammerError("Wrong template configuration")
8478 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8479 for i in range(disk_count)])
8480 for idx, disk in enumerate(disk_info):
8481 disk_index = idx + base_index
8482 vg = disk.get(constants.IDISK_VG, vgname)
8483 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8484 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8485 size=disk[constants.IDISK_SIZE],
8486 logical_id=(vg, names[idx]),
8487 iv_name="disk/%d" % disk_index,
8488 mode=disk[constants.IDISK_MODE],
8489 params=ld_params[0])
8490 disks.append(disk_dev)
8491 elif template_name == constants.DT_DRBD8:
8492 drbd_params, data_params, meta_params = ld_params
8493 if len(secondary_nodes) != 1:
8494 raise errors.ProgrammerError("Wrong template configuration")
8495 remote_node = secondary_nodes[0]
8496 minors = lu.cfg.AllocateDRBDMinor(
8497 [primary_node, remote_node] * len(disk_info), instance_name)
8500 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8501 for i in range(disk_count)]):
8502 names.append(lv_prefix + "_data")
8503 names.append(lv_prefix + "_meta")
8504 for idx, disk in enumerate(disk_info):
8505 disk_index = idx + base_index
8506 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8507 data_vg = disk.get(constants.IDISK_VG, vgname)
8508 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8509 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8510 disk[constants.IDISK_SIZE],
8512 names[idx * 2:idx * 2 + 2],
8513 "disk/%d" % disk_index,
8514 minors[idx * 2], minors[idx * 2 + 1],
8515 drbd_params, data_params, meta_params)
8516 disk_dev.mode = disk[constants.IDISK_MODE]
8517 disks.append(disk_dev)
8518 elif template_name == constants.DT_FILE:
8519 if len(secondary_nodes) != 0:
8520 raise errors.ProgrammerError("Wrong template configuration")
8522 opcodes.RequireFileStorage()
8524 for idx, disk in enumerate(disk_info):
8525 disk_index = idx + base_index
8526 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8527 size=disk[constants.IDISK_SIZE],
8528 iv_name="disk/%d" % disk_index,
8529 logical_id=(file_driver,
8530 "%s/disk%d" % (file_storage_dir,
8532 mode=disk[constants.IDISK_MODE],
8533 params=ld_params[0])
8534 disks.append(disk_dev)
8535 elif template_name == constants.DT_SHARED_FILE:
8536 if len(secondary_nodes) != 0:
8537 raise errors.ProgrammerError("Wrong template configuration")
8539 opcodes.RequireSharedFileStorage()
8541 for idx, disk in enumerate(disk_info):
8542 disk_index = idx + base_index
8543 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8544 size=disk[constants.IDISK_SIZE],
8545 iv_name="disk/%d" % disk_index,
8546 logical_id=(file_driver,
8547 "%s/disk%d" % (file_storage_dir,
8549 mode=disk[constants.IDISK_MODE],
8550 params=ld_params[0])
8551 disks.append(disk_dev)
8552 elif template_name == constants.DT_BLOCK:
8553 if len(secondary_nodes) != 0:
8554 raise errors.ProgrammerError("Wrong template configuration")
8556 for idx, disk in enumerate(disk_info):
8557 disk_index = idx + base_index
8558 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8559 size=disk[constants.IDISK_SIZE],
8560 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8561 disk[constants.IDISK_ADOPT]),
8562 iv_name="disk/%d" % disk_index,
8563 mode=disk[constants.IDISK_MODE],
8564 params=ld_params[0])
8565 disks.append(disk_dev)
8568 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8572 def _GetInstanceInfoText(instance):
8573 """Compute that text that should be added to the disk's metadata.
8576 return "originstname+%s" % instance.name
8579 def _CalcEta(time_taken, written, total_size):
8580 """Calculates the ETA based on size written and total size.
8582 @param time_taken: The time taken so far
8583 @param written: amount written so far
8584 @param total_size: The total size of data to be written
8585 @return: The remaining time in seconds
8588 avg_time = time_taken / float(written)
8589 return (total_size - written) * avg_time
8592 def _WipeDisks(lu, instance):
8593 """Wipes instance disks.
8595 @type lu: L{LogicalUnit}
8596 @param lu: the logical unit on whose behalf we execute
8597 @type instance: L{objects.Instance}
8598 @param instance: the instance whose disks we should create
8599 @return: the success of the wipe
8602 node = instance.primary_node
8604 for device in instance.disks:
8605 lu.cfg.SetDiskID(device, node)
8607 logging.info("Pause sync of instance %s disks", instance.name)
8608 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8610 for idx, success in enumerate(result.payload):
8612 logging.warn("pause-sync of instance %s for disks %d failed",
8616 for idx, device in enumerate(instance.disks):
8617 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8618 # MAX_WIPE_CHUNK at max
8619 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8620 constants.MIN_WIPE_CHUNK_PERCENT)
8621 # we _must_ make this an int, otherwise rounding errors will
8623 wipe_chunk_size = int(wipe_chunk_size)
8625 lu.LogInfo("* Wiping disk %d", idx)
8626 logging.info("Wiping disk %d for instance %s, node %s using"
8627 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8632 start_time = time.time()
8634 while offset < size:
8635 wipe_size = min(wipe_chunk_size, size - offset)
8636 logging.debug("Wiping disk %d, offset %s, chunk %s",
8637 idx, offset, wipe_size)
8638 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8639 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8640 (idx, offset, wipe_size))
8643 if now - last_output >= 60:
8644 eta = _CalcEta(now - start_time, offset, size)
8645 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8646 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8649 logging.info("Resume sync of instance %s disks", instance.name)
8651 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8653 for idx, success in enumerate(result.payload):
8655 lu.LogWarning("Resume sync of disk %d failed, please have a"
8656 " look at the status and troubleshoot the issue", idx)
8657 logging.warn("resume-sync of instance %s for disks %d failed",
8661 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8662 """Create all disks for an instance.
8664 This abstracts away some work from AddInstance.
8666 @type lu: L{LogicalUnit}
8667 @param lu: the logical unit on whose behalf we execute
8668 @type instance: L{objects.Instance}
8669 @param instance: the instance whose disks we should create
8671 @param to_skip: list of indices to skip
8672 @type target_node: string
8673 @param target_node: if passed, overrides the target node for creation
8675 @return: the success of the creation
8678 info = _GetInstanceInfoText(instance)
8679 if target_node is None:
8680 pnode = instance.primary_node
8681 all_nodes = instance.all_nodes
8686 if instance.disk_template in constants.DTS_FILEBASED:
8687 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8688 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8690 result.Raise("Failed to create directory '%s' on"
8691 " node %s" % (file_storage_dir, pnode))
8693 # Note: this needs to be kept in sync with adding of disks in
8694 # LUInstanceSetParams
8695 for idx, device in enumerate(instance.disks):
8696 if to_skip and idx in to_skip:
8698 logging.info("Creating volume %s for instance %s",
8699 device.iv_name, instance.name)
8701 for node in all_nodes:
8702 f_create = node == pnode
8703 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8706 def _RemoveDisks(lu, instance, target_node=None):
8707 """Remove all disks for an instance.
8709 This abstracts away some work from `AddInstance()` and
8710 `RemoveInstance()`. Note that in case some of the devices couldn't
8711 be removed, the removal will continue with the other ones (compare
8712 with `_CreateDisks()`).
8714 @type lu: L{LogicalUnit}
8715 @param lu: the logical unit on whose behalf we execute
8716 @type instance: L{objects.Instance}
8717 @param instance: the instance whose disks we should remove
8718 @type target_node: string
8719 @param target_node: used to override the node on which to remove the disks
8721 @return: the success of the removal
8724 logging.info("Removing block devices for instance %s", instance.name)
8727 for device in instance.disks:
8729 edata = [(target_node, device)]
8731 edata = device.ComputeNodeTree(instance.primary_node)
8732 for node, disk in edata:
8733 lu.cfg.SetDiskID(disk, node)
8734 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8736 lu.LogWarning("Could not remove block device %s on node %s,"
8737 " continuing anyway: %s", device.iv_name, node, msg)
8740 # if this is a DRBD disk, return its port to the pool
8741 if device.dev_type in constants.LDS_DRBD:
8742 tcp_port = device.logical_id[2]
8743 lu.cfg.AddTcpUdpPort(tcp_port)
8745 if instance.disk_template == constants.DT_FILE:
8746 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8750 tgt = instance.primary_node
8751 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8753 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8754 file_storage_dir, instance.primary_node, result.fail_msg)
8760 def _ComputeDiskSizePerVG(disk_template, disks):
8761 """Compute disk size requirements in the volume group
8764 def _compute(disks, payload):
8765 """Universal algorithm.
8770 vgs[disk[constants.IDISK_VG]] = \
8771 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8775 # Required free disk space as a function of disk and swap space
8777 constants.DT_DISKLESS: {},
8778 constants.DT_PLAIN: _compute(disks, 0),
8779 # 128 MB are added for drbd metadata for each disk
8780 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8781 constants.DT_FILE: {},
8782 constants.DT_SHARED_FILE: {},
8785 if disk_template not in req_size_dict:
8786 raise errors.ProgrammerError("Disk template '%s' size requirement"
8787 " is unknown" % disk_template)
8789 return req_size_dict[disk_template]
8792 def _ComputeDiskSize(disk_template, disks):
8793 """Compute disk size requirements in the volume group
8796 # Required free disk space as a function of disk and swap space
8798 constants.DT_DISKLESS: None,
8799 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8800 # 128 MB are added for drbd metadata for each disk
8802 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8803 constants.DT_FILE: None,
8804 constants.DT_SHARED_FILE: 0,
8805 constants.DT_BLOCK: 0,
8808 if disk_template not in req_size_dict:
8809 raise errors.ProgrammerError("Disk template '%s' size requirement"
8810 " is unknown" % disk_template)
8812 return req_size_dict[disk_template]
8815 def _FilterVmNodes(lu, nodenames):
8816 """Filters out non-vm_capable nodes from a list.
8818 @type lu: L{LogicalUnit}
8819 @param lu: the logical unit for which we check
8820 @type nodenames: list
8821 @param nodenames: the list of nodes on which we should check
8823 @return: the list of vm-capable nodes
8826 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8827 return [name for name in nodenames if name not in vm_nodes]
8830 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8831 """Hypervisor parameter validation.
8833 This function abstract the hypervisor parameter validation to be
8834 used in both instance create and instance modify.
8836 @type lu: L{LogicalUnit}
8837 @param lu: the logical unit for which we check
8838 @type nodenames: list
8839 @param nodenames: the list of nodes on which we should check
8840 @type hvname: string
8841 @param hvname: the name of the hypervisor we should use
8842 @type hvparams: dict
8843 @param hvparams: the parameters which we need to check
8844 @raise errors.OpPrereqError: if the parameters are not valid
8847 nodenames = _FilterVmNodes(lu, nodenames)
8849 cluster = lu.cfg.GetClusterInfo()
8850 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8852 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8853 for node in nodenames:
8857 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8860 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8861 """OS parameters validation.
8863 @type lu: L{LogicalUnit}
8864 @param lu: the logical unit for which we check
8865 @type required: boolean
8866 @param required: whether the validation should fail if the OS is not
8868 @type nodenames: list
8869 @param nodenames: the list of nodes on which we should check
8870 @type osname: string
8871 @param osname: the name of the hypervisor we should use
8872 @type osparams: dict
8873 @param osparams: the parameters which we need to check
8874 @raise errors.OpPrereqError: if the parameters are not valid
8877 nodenames = _FilterVmNodes(lu, nodenames)
8878 result = lu.rpc.call_os_validate(nodenames, required, osname,
8879 [constants.OS_VALIDATE_PARAMETERS],
8881 for node, nres in result.items():
8882 # we don't check for offline cases since this should be run only
8883 # against the master node and/or an instance's nodes
8884 nres.Raise("OS Parameters validation failed on node %s" % node)
8885 if not nres.payload:
8886 lu.LogInfo("OS %s not found on node %s, validation skipped",
8890 class LUInstanceCreate(LogicalUnit):
8891 """Create an instance.
8894 HPATH = "instance-add"
8895 HTYPE = constants.HTYPE_INSTANCE
8898 def CheckArguments(self):
8902 # do not require name_check to ease forward/backward compatibility
8904 if self.op.no_install and self.op.start:
8905 self.LogInfo("No-installation mode selected, disabling startup")
8906 self.op.start = False
8907 # validate/normalize the instance name
8908 self.op.instance_name = \
8909 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8911 if self.op.ip_check and not self.op.name_check:
8912 # TODO: make the ip check more flexible and not depend on the name check
8913 raise errors.OpPrereqError("Cannot do IP address check without a name"
8914 " check", errors.ECODE_INVAL)
8916 # check nics' parameter names
8917 for nic in self.op.nics:
8918 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8920 # check disks. parameter names and consistent adopt/no-adopt strategy
8921 has_adopt = has_no_adopt = False
8922 for disk in self.op.disks:
8923 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8924 if constants.IDISK_ADOPT in disk:
8928 if has_adopt and has_no_adopt:
8929 raise errors.OpPrereqError("Either all disks are adopted or none is",
8932 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8933 raise errors.OpPrereqError("Disk adoption is not supported for the"
8934 " '%s' disk template" %
8935 self.op.disk_template,
8937 if self.op.iallocator is not None:
8938 raise errors.OpPrereqError("Disk adoption not allowed with an"
8939 " iallocator script", errors.ECODE_INVAL)
8940 if self.op.mode == constants.INSTANCE_IMPORT:
8941 raise errors.OpPrereqError("Disk adoption not allowed for"
8942 " instance import", errors.ECODE_INVAL)
8944 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8945 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8946 " but no 'adopt' parameter given" %
8947 self.op.disk_template,
8950 self.adopt_disks = has_adopt
8952 # instance name verification
8953 if self.op.name_check:
8954 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8955 self.op.instance_name = self.hostname1.name
8956 # used in CheckPrereq for ip ping check
8957 self.check_ip = self.hostname1.ip
8959 self.check_ip = None
8961 # file storage checks
8962 if (self.op.file_driver and
8963 not self.op.file_driver in constants.FILE_DRIVER):
8964 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8965 self.op.file_driver, errors.ECODE_INVAL)
8967 if self.op.disk_template == constants.DT_FILE:
8968 opcodes.RequireFileStorage()
8969 elif self.op.disk_template == constants.DT_SHARED_FILE:
8970 opcodes.RequireSharedFileStorage()
8972 ### Node/iallocator related checks
8973 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8975 if self.op.pnode is not None:
8976 if self.op.disk_template in constants.DTS_INT_MIRROR:
8977 if self.op.snode is None:
8978 raise errors.OpPrereqError("The networked disk templates need"
8979 " a mirror node", errors.ECODE_INVAL)
8981 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8983 self.op.snode = None
8985 self._cds = _GetClusterDomainSecret()
8987 if self.op.mode == constants.INSTANCE_IMPORT:
8988 # On import force_variant must be True, because if we forced it at
8989 # initial install, our only chance when importing it back is that it
8991 self.op.force_variant = True
8993 if self.op.no_install:
8994 self.LogInfo("No-installation mode has no effect during import")
8996 elif self.op.mode == constants.INSTANCE_CREATE:
8997 if self.op.os_type is None:
8998 raise errors.OpPrereqError("No guest OS specified",
9000 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9001 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9002 " installation" % self.op.os_type,
9004 if self.op.disk_template is None:
9005 raise errors.OpPrereqError("No disk template specified",
9008 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9009 # Check handshake to ensure both clusters have the same domain secret
9010 src_handshake = self.op.source_handshake
9011 if not src_handshake:
9012 raise errors.OpPrereqError("Missing source handshake",
9015 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9018 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9021 # Load and check source CA
9022 self.source_x509_ca_pem = self.op.source_x509_ca
9023 if not self.source_x509_ca_pem:
9024 raise errors.OpPrereqError("Missing source X509 CA",
9028 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9030 except OpenSSL.crypto.Error, err:
9031 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9032 (err, ), errors.ECODE_INVAL)
9034 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9035 if errcode is not None:
9036 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9039 self.source_x509_ca = cert
9041 src_instance_name = self.op.source_instance_name
9042 if not src_instance_name:
9043 raise errors.OpPrereqError("Missing source instance name",
9046 self.source_instance_name = \
9047 netutils.GetHostname(name=src_instance_name).name
9050 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9051 self.op.mode, errors.ECODE_INVAL)
9053 def ExpandNames(self):
9054 """ExpandNames for CreateInstance.
9056 Figure out the right locks for instance creation.
9059 self.needed_locks = {}
9061 instance_name = self.op.instance_name
9062 # this is just a preventive check, but someone might still add this
9063 # instance in the meantime, and creation will fail at lock-add time
9064 if instance_name in self.cfg.GetInstanceList():
9065 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9066 instance_name, errors.ECODE_EXISTS)
9068 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9070 if self.op.iallocator:
9071 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9072 # specifying a group on instance creation and then selecting nodes from
9074 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9075 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9077 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9078 nodelist = [self.op.pnode]
9079 if self.op.snode is not None:
9080 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9081 nodelist.append(self.op.snode)
9082 self.needed_locks[locking.LEVEL_NODE] = nodelist
9083 # Lock resources of instance's primary and secondary nodes (copy to
9084 # prevent accidential modification)
9085 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9087 # in case of import lock the source node too
9088 if self.op.mode == constants.INSTANCE_IMPORT:
9089 src_node = self.op.src_node
9090 src_path = self.op.src_path
9092 if src_path is None:
9093 self.op.src_path = src_path = self.op.instance_name
9095 if src_node is None:
9096 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9097 self.op.src_node = None
9098 if os.path.isabs(src_path):
9099 raise errors.OpPrereqError("Importing an instance from a path"
9100 " requires a source node option",
9103 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9104 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9105 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9106 if not os.path.isabs(src_path):
9107 self.op.src_path = src_path = \
9108 utils.PathJoin(constants.EXPORT_DIR, src_path)
9110 def _RunAllocator(self):
9111 """Run the allocator based on input opcode.
9114 nics = [n.ToDict() for n in self.nics]
9115 ial = IAllocator(self.cfg, self.rpc,
9116 mode=constants.IALLOCATOR_MODE_ALLOC,
9117 name=self.op.instance_name,
9118 disk_template=self.op.disk_template,
9121 vcpus=self.be_full[constants.BE_VCPUS],
9122 memory=self.be_full[constants.BE_MAXMEM],
9125 hypervisor=self.op.hypervisor,
9128 ial.Run(self.op.iallocator)
9131 raise errors.OpPrereqError("Can't compute nodes using"
9132 " iallocator '%s': %s" %
9133 (self.op.iallocator, ial.info),
9135 if len(ial.result) != ial.required_nodes:
9136 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9137 " of nodes (%s), required %s" %
9138 (self.op.iallocator, len(ial.result),
9139 ial.required_nodes), errors.ECODE_FAULT)
9140 self.op.pnode = ial.result[0]
9141 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9142 self.op.instance_name, self.op.iallocator,
9143 utils.CommaJoin(ial.result))
9144 if ial.required_nodes == 2:
9145 self.op.snode = ial.result[1]
9147 def BuildHooksEnv(self):
9150 This runs on master, primary and secondary nodes of the instance.
9154 "ADD_MODE": self.op.mode,
9156 if self.op.mode == constants.INSTANCE_IMPORT:
9157 env["SRC_NODE"] = self.op.src_node
9158 env["SRC_PATH"] = self.op.src_path
9159 env["SRC_IMAGES"] = self.src_images
9161 env.update(_BuildInstanceHookEnv(
9162 name=self.op.instance_name,
9163 primary_node=self.op.pnode,
9164 secondary_nodes=self.secondaries,
9165 status=self.op.start,
9166 os_type=self.op.os_type,
9167 minmem=self.be_full[constants.BE_MINMEM],
9168 maxmem=self.be_full[constants.BE_MAXMEM],
9169 vcpus=self.be_full[constants.BE_VCPUS],
9170 nics=_NICListToTuple(self, self.nics),
9171 disk_template=self.op.disk_template,
9172 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9173 for d in self.disks],
9176 hypervisor_name=self.op.hypervisor,
9182 def BuildHooksNodes(self):
9183 """Build hooks nodes.
9186 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9189 def _ReadExportInfo(self):
9190 """Reads the export information from disk.
9192 It will override the opcode source node and path with the actual
9193 information, if these two were not specified before.
9195 @return: the export information
9198 assert self.op.mode == constants.INSTANCE_IMPORT
9200 src_node = self.op.src_node
9201 src_path = self.op.src_path
9203 if src_node is None:
9204 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9205 exp_list = self.rpc.call_export_list(locked_nodes)
9207 for node in exp_list:
9208 if exp_list[node].fail_msg:
9210 if src_path in exp_list[node].payload:
9212 self.op.src_node = src_node = node
9213 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9217 raise errors.OpPrereqError("No export found for relative path %s" %
9218 src_path, errors.ECODE_INVAL)
9220 _CheckNodeOnline(self, src_node)
9221 result = self.rpc.call_export_info(src_node, src_path)
9222 result.Raise("No export or invalid export found in dir %s" % src_path)
9224 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9225 if not export_info.has_section(constants.INISECT_EXP):
9226 raise errors.ProgrammerError("Corrupted export config",
9227 errors.ECODE_ENVIRON)
9229 ei_version = export_info.get(constants.INISECT_EXP, "version")
9230 if (int(ei_version) != constants.EXPORT_VERSION):
9231 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9232 (ei_version, constants.EXPORT_VERSION),
9233 errors.ECODE_ENVIRON)
9236 def _ReadExportParams(self, einfo):
9237 """Use export parameters as defaults.
9239 In case the opcode doesn't specify (as in override) some instance
9240 parameters, then try to use them from the export information, if
9244 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9246 if self.op.disk_template is None:
9247 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9248 self.op.disk_template = einfo.get(constants.INISECT_INS,
9250 if self.op.disk_template not in constants.DISK_TEMPLATES:
9251 raise errors.OpPrereqError("Disk template specified in configuration"
9252 " file is not one of the allowed values:"
9253 " %s" % " ".join(constants.DISK_TEMPLATES))
9255 raise errors.OpPrereqError("No disk template specified and the export"
9256 " is missing the disk_template information",
9259 if not self.op.disks:
9261 # TODO: import the disk iv_name too
9262 for idx in range(constants.MAX_DISKS):
9263 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9264 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9265 disks.append({constants.IDISK_SIZE: disk_sz})
9266 self.op.disks = disks
9267 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9268 raise errors.OpPrereqError("No disk info specified and the export"
9269 " is missing the disk information",
9272 if not self.op.nics:
9274 for idx in range(constants.MAX_NICS):
9275 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9277 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9278 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9285 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9286 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9288 if (self.op.hypervisor is None and
9289 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9290 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9292 if einfo.has_section(constants.INISECT_HYP):
9293 # use the export parameters but do not override the ones
9294 # specified by the user
9295 for name, value in einfo.items(constants.INISECT_HYP):
9296 if name not in self.op.hvparams:
9297 self.op.hvparams[name] = value
9299 if einfo.has_section(constants.INISECT_BEP):
9300 # use the parameters, without overriding
9301 for name, value in einfo.items(constants.INISECT_BEP):
9302 if name not in self.op.beparams:
9303 self.op.beparams[name] = value
9304 # Compatibility for the old "memory" be param
9305 if name == constants.BE_MEMORY:
9306 if constants.BE_MAXMEM not in self.op.beparams:
9307 self.op.beparams[constants.BE_MAXMEM] = value
9308 if constants.BE_MINMEM not in self.op.beparams:
9309 self.op.beparams[constants.BE_MINMEM] = value
9311 # try to read the parameters old style, from the main section
9312 for name in constants.BES_PARAMETERS:
9313 if (name not in self.op.beparams and
9314 einfo.has_option(constants.INISECT_INS, name)):
9315 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9317 if einfo.has_section(constants.INISECT_OSP):
9318 # use the parameters, without overriding
9319 for name, value in einfo.items(constants.INISECT_OSP):
9320 if name not in self.op.osparams:
9321 self.op.osparams[name] = value
9323 def _RevertToDefaults(self, cluster):
9324 """Revert the instance parameters to the default values.
9328 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9329 for name in self.op.hvparams.keys():
9330 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9331 del self.op.hvparams[name]
9333 be_defs = cluster.SimpleFillBE({})
9334 for name in self.op.beparams.keys():
9335 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9336 del self.op.beparams[name]
9338 nic_defs = cluster.SimpleFillNIC({})
9339 for nic in self.op.nics:
9340 for name in constants.NICS_PARAMETERS:
9341 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9344 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9345 for name in self.op.osparams.keys():
9346 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9347 del self.op.osparams[name]
9349 def _CalculateFileStorageDir(self):
9350 """Calculate final instance file storage dir.
9353 # file storage dir calculation/check
9354 self.instance_file_storage_dir = None
9355 if self.op.disk_template in constants.DTS_FILEBASED:
9356 # build the full file storage dir path
9359 if self.op.disk_template == constants.DT_SHARED_FILE:
9360 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9362 get_fsd_fn = self.cfg.GetFileStorageDir
9364 cfg_storagedir = get_fsd_fn()
9365 if not cfg_storagedir:
9366 raise errors.OpPrereqError("Cluster file storage dir not defined")
9367 joinargs.append(cfg_storagedir)
9369 if self.op.file_storage_dir is not None:
9370 joinargs.append(self.op.file_storage_dir)
9372 joinargs.append(self.op.instance_name)
9374 # pylint: disable=W0142
9375 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9377 def CheckPrereq(self):
9378 """Check prerequisites.
9381 self._CalculateFileStorageDir()
9383 if self.op.mode == constants.INSTANCE_IMPORT:
9384 export_info = self._ReadExportInfo()
9385 self._ReadExportParams(export_info)
9387 if (not self.cfg.GetVGName() and
9388 self.op.disk_template not in constants.DTS_NOT_LVM):
9389 raise errors.OpPrereqError("Cluster does not support lvm-based"
9390 " instances", errors.ECODE_STATE)
9392 if (self.op.hypervisor is None or
9393 self.op.hypervisor == constants.VALUE_AUTO):
9394 self.op.hypervisor = self.cfg.GetHypervisorType()
9396 cluster = self.cfg.GetClusterInfo()
9397 enabled_hvs = cluster.enabled_hypervisors
9398 if self.op.hypervisor not in enabled_hvs:
9399 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9400 " cluster (%s)" % (self.op.hypervisor,
9401 ",".join(enabled_hvs)),
9404 # Check tag validity
9405 for tag in self.op.tags:
9406 objects.TaggableObject.ValidateTag(tag)
9408 # check hypervisor parameter syntax (locally)
9409 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9410 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9412 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9413 hv_type.CheckParameterSyntax(filled_hvp)
9414 self.hv_full = filled_hvp
9415 # check that we don't specify global parameters on an instance
9416 _CheckGlobalHvParams(self.op.hvparams)
9418 # fill and remember the beparams dict
9419 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9420 for param, value in self.op.beparams.iteritems():
9421 if value == constants.VALUE_AUTO:
9422 self.op.beparams[param] = default_beparams[param]
9423 objects.UpgradeBeParams(self.op.beparams)
9424 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9425 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9427 # build os parameters
9428 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9430 # now that hvp/bep are in final format, let's reset to defaults,
9432 if self.op.identify_defaults:
9433 self._RevertToDefaults(cluster)
9437 for idx, nic in enumerate(self.op.nics):
9438 nic_mode_req = nic.get(constants.INIC_MODE, None)
9439 nic_mode = nic_mode_req
9440 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9441 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9443 # in routed mode, for the first nic, the default ip is 'auto'
9444 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9445 default_ip_mode = constants.VALUE_AUTO
9447 default_ip_mode = constants.VALUE_NONE
9449 # ip validity checks
9450 ip = nic.get(constants.INIC_IP, default_ip_mode)
9451 if ip is None or ip.lower() == constants.VALUE_NONE:
9453 elif ip.lower() == constants.VALUE_AUTO:
9454 if not self.op.name_check:
9455 raise errors.OpPrereqError("IP address set to auto but name checks"
9456 " have been skipped",
9458 nic_ip = self.hostname1.ip
9460 if not netutils.IPAddress.IsValid(ip):
9461 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9465 # TODO: check the ip address for uniqueness
9466 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9467 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9470 # MAC address verification
9471 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9472 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9473 mac = utils.NormalizeAndValidateMac(mac)
9476 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9477 except errors.ReservationError:
9478 raise errors.OpPrereqError("MAC address %s already in use"
9479 " in cluster" % mac,
9480 errors.ECODE_NOTUNIQUE)
9482 # Build nic parameters
9483 link = nic.get(constants.INIC_LINK, None)
9484 if link == constants.VALUE_AUTO:
9485 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9488 nicparams[constants.NIC_MODE] = nic_mode
9490 nicparams[constants.NIC_LINK] = link
9492 check_params = cluster.SimpleFillNIC(nicparams)
9493 objects.NIC.CheckParameterSyntax(check_params)
9494 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9496 # disk checks/pre-build
9497 default_vg = self.cfg.GetVGName()
9499 for disk in self.op.disks:
9500 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9501 if mode not in constants.DISK_ACCESS_SET:
9502 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9503 mode, errors.ECODE_INVAL)
9504 size = disk.get(constants.IDISK_SIZE, None)
9506 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9509 except (TypeError, ValueError):
9510 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9513 data_vg = disk.get(constants.IDISK_VG, default_vg)
9515 constants.IDISK_SIZE: size,
9516 constants.IDISK_MODE: mode,
9517 constants.IDISK_VG: data_vg,
9519 if constants.IDISK_METAVG in disk:
9520 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9521 if constants.IDISK_ADOPT in disk:
9522 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9523 self.disks.append(new_disk)
9525 if self.op.mode == constants.INSTANCE_IMPORT:
9527 for idx in range(len(self.disks)):
9528 option = "disk%d_dump" % idx
9529 if export_info.has_option(constants.INISECT_INS, option):
9530 # FIXME: are the old os-es, disk sizes, etc. useful?
9531 export_name = export_info.get(constants.INISECT_INS, option)
9532 image = utils.PathJoin(self.op.src_path, export_name)
9533 disk_images.append(image)
9535 disk_images.append(False)
9537 self.src_images = disk_images
9539 old_name = export_info.get(constants.INISECT_INS, "name")
9540 if self.op.instance_name == old_name:
9541 for idx, nic in enumerate(self.nics):
9542 if nic.mac == constants.VALUE_AUTO:
9543 nic_mac_ini = "nic%d_mac" % idx
9544 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9546 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9548 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9549 if self.op.ip_check:
9550 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9551 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9552 (self.check_ip, self.op.instance_name),
9553 errors.ECODE_NOTUNIQUE)
9555 #### mac address generation
9556 # By generating here the mac address both the allocator and the hooks get
9557 # the real final mac address rather than the 'auto' or 'generate' value.
9558 # There is a race condition between the generation and the instance object
9559 # creation, which means that we know the mac is valid now, but we're not
9560 # sure it will be when we actually add the instance. If things go bad
9561 # adding the instance will abort because of a duplicate mac, and the
9562 # creation job will fail.
9563 for nic in self.nics:
9564 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9565 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9569 if self.op.iallocator is not None:
9570 self._RunAllocator()
9572 # Release all unneeded node locks
9573 _ReleaseLocks(self, locking.LEVEL_NODE,
9574 keep=filter(None, [self.op.pnode, self.op.snode,
9577 #### node related checks
9579 # check primary node
9580 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9581 assert self.pnode is not None, \
9582 "Cannot retrieve locked node %s" % self.op.pnode
9584 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9585 pnode.name, errors.ECODE_STATE)
9587 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9588 pnode.name, errors.ECODE_STATE)
9589 if not pnode.vm_capable:
9590 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9591 " '%s'" % pnode.name, errors.ECODE_STATE)
9593 self.secondaries = []
9595 # mirror node verification
9596 if self.op.disk_template in constants.DTS_INT_MIRROR:
9597 if self.op.snode == pnode.name:
9598 raise errors.OpPrereqError("The secondary node cannot be the"
9599 " primary node", errors.ECODE_INVAL)
9600 _CheckNodeOnline(self, self.op.snode)
9601 _CheckNodeNotDrained(self, self.op.snode)
9602 _CheckNodeVmCapable(self, self.op.snode)
9603 self.secondaries.append(self.op.snode)
9605 snode = self.cfg.GetNodeInfo(self.op.snode)
9606 if pnode.group != snode.group:
9607 self.LogWarning("The primary and secondary nodes are in two"
9608 " different node groups; the disk parameters"
9609 " from the first disk's node group will be"
9612 nodenames = [pnode.name] + self.secondaries
9614 # disk parameters (not customizable at instance or node level)
9615 # just use the primary node parameters, ignoring the secondary.
9616 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9618 if not self.adopt_disks:
9619 # Check lv size requirements, if not adopting
9620 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9621 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9623 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9624 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9625 disk[constants.IDISK_ADOPT])
9626 for disk in self.disks])
9627 if len(all_lvs) != len(self.disks):
9628 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9630 for lv_name in all_lvs:
9632 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9633 # to ReserveLV uses the same syntax
9634 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9635 except errors.ReservationError:
9636 raise errors.OpPrereqError("LV named %s used by another instance" %
9637 lv_name, errors.ECODE_NOTUNIQUE)
9639 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9640 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9642 node_lvs = self.rpc.call_lv_list([pnode.name],
9643 vg_names.payload.keys())[pnode.name]
9644 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9645 node_lvs = node_lvs.payload
9647 delta = all_lvs.difference(node_lvs.keys())
9649 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9650 utils.CommaJoin(delta),
9652 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9654 raise errors.OpPrereqError("Online logical volumes found, cannot"
9655 " adopt: %s" % utils.CommaJoin(online_lvs),
9657 # update the size of disk based on what is found
9658 for dsk in self.disks:
9659 dsk[constants.IDISK_SIZE] = \
9660 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9661 dsk[constants.IDISK_ADOPT])][0]))
9663 elif self.op.disk_template == constants.DT_BLOCK:
9664 # Normalize and de-duplicate device paths
9665 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9666 for disk in self.disks])
9667 if len(all_disks) != len(self.disks):
9668 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9670 baddisks = [d for d in all_disks
9671 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9673 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9674 " cannot be adopted" %
9675 (", ".join(baddisks),
9676 constants.ADOPTABLE_BLOCKDEV_ROOT),
9679 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9680 list(all_disks))[pnode.name]
9681 node_disks.Raise("Cannot get block device information from node %s" %
9683 node_disks = node_disks.payload
9684 delta = all_disks.difference(node_disks.keys())
9686 raise errors.OpPrereqError("Missing block device(s): %s" %
9687 utils.CommaJoin(delta),
9689 for dsk in self.disks:
9690 dsk[constants.IDISK_SIZE] = \
9691 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9693 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9695 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9696 # check OS parameters (remotely)
9697 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9699 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9701 # memory check on primary node
9702 #TODO(dynmem): use MINMEM for checking
9704 _CheckNodeFreeMemory(self, self.pnode.name,
9705 "creating instance %s" % self.op.instance_name,
9706 self.be_full[constants.BE_MAXMEM],
9709 self.dry_run_result = list(nodenames)
9711 def Exec(self, feedback_fn):
9712 """Create and add the instance to the cluster.
9715 instance = self.op.instance_name
9716 pnode_name = self.pnode.name
9718 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9719 self.owned_locks(locking.LEVEL_NODE)), \
9720 "Node locks differ from node resource locks"
9722 ht_kind = self.op.hypervisor
9723 if ht_kind in constants.HTS_REQ_PORT:
9724 network_port = self.cfg.AllocatePort()
9728 disks = _GenerateDiskTemplate(self,
9729 self.op.disk_template,
9730 instance, pnode_name,
9733 self.instance_file_storage_dir,
9734 self.op.file_driver,
9739 iobj = objects.Instance(name=instance, os=self.op.os_type,
9740 primary_node=pnode_name,
9741 nics=self.nics, disks=disks,
9742 disk_template=self.op.disk_template,
9743 admin_state=constants.ADMINST_DOWN,
9744 network_port=network_port,
9745 beparams=self.op.beparams,
9746 hvparams=self.op.hvparams,
9747 hypervisor=self.op.hypervisor,
9748 osparams=self.op.osparams,
9752 for tag in self.op.tags:
9755 if self.adopt_disks:
9756 if self.op.disk_template == constants.DT_PLAIN:
9757 # rename LVs to the newly-generated names; we need to construct
9758 # 'fake' LV disks with the old data, plus the new unique_id
9759 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9761 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9762 rename_to.append(t_dsk.logical_id)
9763 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9764 self.cfg.SetDiskID(t_dsk, pnode_name)
9765 result = self.rpc.call_blockdev_rename(pnode_name,
9766 zip(tmp_disks, rename_to))
9767 result.Raise("Failed to rename adoped LVs")
9769 feedback_fn("* creating instance disks...")
9771 _CreateDisks(self, iobj)
9772 except errors.OpExecError:
9773 self.LogWarning("Device creation failed, reverting...")
9775 _RemoveDisks(self, iobj)
9777 self.cfg.ReleaseDRBDMinors(instance)
9780 feedback_fn("adding instance %s to cluster config" % instance)
9782 self.cfg.AddInstance(iobj, self.proc.GetECId())
9784 # Declare that we don't want to remove the instance lock anymore, as we've
9785 # added the instance to the config
9786 del self.remove_locks[locking.LEVEL_INSTANCE]
9788 if self.op.mode == constants.INSTANCE_IMPORT:
9789 # Release unused nodes
9790 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9793 _ReleaseLocks(self, locking.LEVEL_NODE)
9796 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9797 feedback_fn("* wiping instance disks...")
9799 _WipeDisks(self, iobj)
9800 except errors.OpExecError, err:
9801 logging.exception("Wiping disks failed")
9802 self.LogWarning("Wiping instance disks failed (%s)", err)
9806 # Something is already wrong with the disks, don't do anything else
9808 elif self.op.wait_for_sync:
9809 disk_abort = not _WaitForSync(self, iobj)
9810 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9811 # make sure the disks are not degraded (still sync-ing is ok)
9812 feedback_fn("* checking mirrors status")
9813 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9818 _RemoveDisks(self, iobj)
9819 self.cfg.RemoveInstance(iobj.name)
9820 # Make sure the instance lock gets removed
9821 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9822 raise errors.OpExecError("There are some degraded disks for"
9825 # Release all node resource locks
9826 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9828 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9829 if self.op.mode == constants.INSTANCE_CREATE:
9830 if not self.op.no_install:
9831 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9832 not self.op.wait_for_sync)
9834 feedback_fn("* pausing disk sync to install instance OS")
9835 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9837 for idx, success in enumerate(result.payload):
9839 logging.warn("pause-sync of instance %s for disk %d failed",
9842 feedback_fn("* running the instance OS create scripts...")
9843 # FIXME: pass debug option from opcode to backend
9845 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9846 self.op.debug_level)
9848 feedback_fn("* resuming disk sync")
9849 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9851 for idx, success in enumerate(result.payload):
9853 logging.warn("resume-sync of instance %s for disk %d failed",
9856 os_add_result.Raise("Could not add os for instance %s"
9857 " on node %s" % (instance, pnode_name))
9859 elif self.op.mode == constants.INSTANCE_IMPORT:
9860 feedback_fn("* running the instance OS import scripts...")
9864 for idx, image in enumerate(self.src_images):
9868 # FIXME: pass debug option from opcode to backend
9869 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9870 constants.IEIO_FILE, (image, ),
9871 constants.IEIO_SCRIPT,
9872 (iobj.disks[idx], idx),
9874 transfers.append(dt)
9877 masterd.instance.TransferInstanceData(self, feedback_fn,
9878 self.op.src_node, pnode_name,
9879 self.pnode.secondary_ip,
9881 if not compat.all(import_result):
9882 self.LogWarning("Some disks for instance %s on node %s were not"
9883 " imported successfully" % (instance, pnode_name))
9885 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9886 feedback_fn("* preparing remote import...")
9887 # The source cluster will stop the instance before attempting to make a
9888 # connection. In some cases stopping an instance can take a long time,
9889 # hence the shutdown timeout is added to the connection timeout.
9890 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9891 self.op.source_shutdown_timeout)
9892 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9894 assert iobj.primary_node == self.pnode.name
9896 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9897 self.source_x509_ca,
9898 self._cds, timeouts)
9899 if not compat.all(disk_results):
9900 # TODO: Should the instance still be started, even if some disks
9901 # failed to import (valid for local imports, too)?
9902 self.LogWarning("Some disks for instance %s on node %s were not"
9903 " imported successfully" % (instance, pnode_name))
9905 # Run rename script on newly imported instance
9906 assert iobj.name == instance
9907 feedback_fn("Running rename script for %s" % instance)
9908 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9909 self.source_instance_name,
9910 self.op.debug_level)
9912 self.LogWarning("Failed to run rename script for %s on node"
9913 " %s: %s" % (instance, pnode_name, result.fail_msg))
9916 # also checked in the prereq part
9917 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9920 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9923 iobj.admin_state = constants.ADMINST_UP
9924 self.cfg.Update(iobj, feedback_fn)
9925 logging.info("Starting instance %s on node %s", instance, pnode_name)
9926 feedback_fn("* starting instance...")
9927 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9929 result.Raise("Could not start instance")
9931 return list(iobj.all_nodes)
9934 class LUInstanceConsole(NoHooksLU):
9935 """Connect to an instance's console.
9937 This is somewhat special in that it returns the command line that
9938 you need to run on the master node in order to connect to the
9944 def ExpandNames(self):
9945 self.share_locks = _ShareAll()
9946 self._ExpandAndLockInstance()
9948 def CheckPrereq(self):
9949 """Check prerequisites.
9951 This checks that the instance is in the cluster.
9954 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9955 assert self.instance is not None, \
9956 "Cannot retrieve locked instance %s" % self.op.instance_name
9957 _CheckNodeOnline(self, self.instance.primary_node)
9959 def Exec(self, feedback_fn):
9960 """Connect to the console of an instance
9963 instance = self.instance
9964 node = instance.primary_node
9966 node_insts = self.rpc.call_instance_list([node],
9967 [instance.hypervisor])[node]
9968 node_insts.Raise("Can't get node information from %s" % node)
9970 if instance.name not in node_insts.payload:
9971 if instance.admin_state == constants.ADMINST_UP:
9972 state = constants.INSTST_ERRORDOWN
9973 elif instance.admin_state == constants.ADMINST_DOWN:
9974 state = constants.INSTST_ADMINDOWN
9976 state = constants.INSTST_ADMINOFFLINE
9977 raise errors.OpExecError("Instance %s is not running (state %s)" %
9978 (instance.name, state))
9980 logging.debug("Connecting to console of %s on %s", instance.name, node)
9982 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9985 def _GetInstanceConsole(cluster, instance):
9986 """Returns console information for an instance.
9988 @type cluster: L{objects.Cluster}
9989 @type instance: L{objects.Instance}
9993 hyper = hypervisor.GetHypervisor(instance.hypervisor)
9994 # beparams and hvparams are passed separately, to avoid editing the
9995 # instance and then saving the defaults in the instance itself.
9996 hvparams = cluster.FillHV(instance)
9997 beparams = cluster.FillBE(instance)
9998 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10000 assert console.instance == instance.name
10001 assert console.Validate()
10003 return console.ToDict()
10006 class LUInstanceReplaceDisks(LogicalUnit):
10007 """Replace the disks of an instance.
10010 HPATH = "mirrors-replace"
10011 HTYPE = constants.HTYPE_INSTANCE
10014 def CheckArguments(self):
10015 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10016 self.op.iallocator)
10018 def ExpandNames(self):
10019 self._ExpandAndLockInstance()
10021 assert locking.LEVEL_NODE not in self.needed_locks
10022 assert locking.LEVEL_NODE_RES not in self.needed_locks
10023 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10025 assert self.op.iallocator is None or self.op.remote_node is None, \
10026 "Conflicting options"
10028 if self.op.remote_node is not None:
10029 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10031 # Warning: do not remove the locking of the new secondary here
10032 # unless DRBD8.AddChildren is changed to work in parallel;
10033 # currently it doesn't since parallel invocations of
10034 # FindUnusedMinor will conflict
10035 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10036 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10038 self.needed_locks[locking.LEVEL_NODE] = []
10039 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10041 if self.op.iallocator is not None:
10042 # iallocator will select a new node in the same group
10043 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10045 self.needed_locks[locking.LEVEL_NODE_RES] = []
10047 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10048 self.op.iallocator, self.op.remote_node,
10049 self.op.disks, False, self.op.early_release)
10051 self.tasklets = [self.replacer]
10053 def DeclareLocks(self, level):
10054 if level == locking.LEVEL_NODEGROUP:
10055 assert self.op.remote_node is None
10056 assert self.op.iallocator is not None
10057 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10059 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10060 # Lock all groups used by instance optimistically; this requires going
10061 # via the node before it's locked, requiring verification later on
10062 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10063 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10065 elif level == locking.LEVEL_NODE:
10066 if self.op.iallocator is not None:
10067 assert self.op.remote_node is None
10068 assert not self.needed_locks[locking.LEVEL_NODE]
10070 # Lock member nodes of all locked groups
10071 self.needed_locks[locking.LEVEL_NODE] = [node_name
10072 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10073 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10075 self._LockInstancesNodes()
10076 elif level == locking.LEVEL_NODE_RES:
10078 self.needed_locks[locking.LEVEL_NODE_RES] = \
10079 self.needed_locks[locking.LEVEL_NODE]
10081 def BuildHooksEnv(self):
10082 """Build hooks env.
10084 This runs on the master, the primary and all the secondaries.
10087 instance = self.replacer.instance
10089 "MODE": self.op.mode,
10090 "NEW_SECONDARY": self.op.remote_node,
10091 "OLD_SECONDARY": instance.secondary_nodes[0],
10093 env.update(_BuildInstanceHookEnvByObject(self, instance))
10096 def BuildHooksNodes(self):
10097 """Build hooks nodes.
10100 instance = self.replacer.instance
10102 self.cfg.GetMasterNode(),
10103 instance.primary_node,
10105 if self.op.remote_node is not None:
10106 nl.append(self.op.remote_node)
10109 def CheckPrereq(self):
10110 """Check prerequisites.
10113 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10114 self.op.iallocator is None)
10116 # Verify if node group locks are still correct
10117 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10119 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10121 return LogicalUnit.CheckPrereq(self)
10124 class TLReplaceDisks(Tasklet):
10125 """Replaces disks for an instance.
10127 Note: Locking is not within the scope of this class.
10130 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10131 disks, delay_iallocator, early_release):
10132 """Initializes this class.
10135 Tasklet.__init__(self, lu)
10138 self.instance_name = instance_name
10140 self.iallocator_name = iallocator_name
10141 self.remote_node = remote_node
10143 self.delay_iallocator = delay_iallocator
10144 self.early_release = early_release
10147 self.instance = None
10148 self.new_node = None
10149 self.target_node = None
10150 self.other_node = None
10151 self.remote_node_info = None
10152 self.node_secondary_ip = None
10155 def CheckArguments(mode, remote_node, iallocator):
10156 """Helper function for users of this class.
10159 # check for valid parameter combination
10160 if mode == constants.REPLACE_DISK_CHG:
10161 if remote_node is None and iallocator is None:
10162 raise errors.OpPrereqError("When changing the secondary either an"
10163 " iallocator script must be used or the"
10164 " new node given", errors.ECODE_INVAL)
10166 if remote_node is not None and iallocator is not None:
10167 raise errors.OpPrereqError("Give either the iallocator or the new"
10168 " secondary, not both", errors.ECODE_INVAL)
10170 elif remote_node is not None or iallocator is not None:
10171 # Not replacing the secondary
10172 raise errors.OpPrereqError("The iallocator and new node options can"
10173 " only be used when changing the"
10174 " secondary node", errors.ECODE_INVAL)
10177 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10178 """Compute a new secondary node using an IAllocator.
10181 ial = IAllocator(lu.cfg, lu.rpc,
10182 mode=constants.IALLOCATOR_MODE_RELOC,
10183 name=instance_name,
10184 relocate_from=list(relocate_from))
10186 ial.Run(iallocator_name)
10188 if not ial.success:
10189 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10190 " %s" % (iallocator_name, ial.info),
10191 errors.ECODE_NORES)
10193 if len(ial.result) != ial.required_nodes:
10194 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10195 " of nodes (%s), required %s" %
10197 len(ial.result), ial.required_nodes),
10198 errors.ECODE_FAULT)
10200 remote_node_name = ial.result[0]
10202 lu.LogInfo("Selected new secondary for instance '%s': %s",
10203 instance_name, remote_node_name)
10205 return remote_node_name
10207 def _FindFaultyDisks(self, node_name):
10208 """Wrapper for L{_FindFaultyInstanceDisks}.
10211 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10214 def _CheckDisksActivated(self, instance):
10215 """Checks if the instance disks are activated.
10217 @param instance: The instance to check disks
10218 @return: True if they are activated, False otherwise
10221 nodes = instance.all_nodes
10223 for idx, dev in enumerate(instance.disks):
10225 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10226 self.cfg.SetDiskID(dev, node)
10228 result = self.rpc.call_blockdev_find(node, dev)
10232 elif result.fail_msg or not result.payload:
10237 def CheckPrereq(self):
10238 """Check prerequisites.
10240 This checks that the instance is in the cluster.
10243 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10244 assert instance is not None, \
10245 "Cannot retrieve locked instance %s" % self.instance_name
10247 if instance.disk_template != constants.DT_DRBD8:
10248 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10249 " instances", errors.ECODE_INVAL)
10251 if len(instance.secondary_nodes) != 1:
10252 raise errors.OpPrereqError("The instance has a strange layout,"
10253 " expected one secondary but found %d" %
10254 len(instance.secondary_nodes),
10255 errors.ECODE_FAULT)
10257 if not self.delay_iallocator:
10258 self._CheckPrereq2()
10260 def _CheckPrereq2(self):
10261 """Check prerequisites, second part.
10263 This function should always be part of CheckPrereq. It was separated and is
10264 now called from Exec because during node evacuation iallocator was only
10265 called with an unmodified cluster model, not taking planned changes into
10269 instance = self.instance
10270 secondary_node = instance.secondary_nodes[0]
10272 if self.iallocator_name is None:
10273 remote_node = self.remote_node
10275 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10276 instance.name, instance.secondary_nodes)
10278 if remote_node is None:
10279 self.remote_node_info = None
10281 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10282 "Remote node '%s' is not locked" % remote_node
10284 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10285 assert self.remote_node_info is not None, \
10286 "Cannot retrieve locked node %s" % remote_node
10288 if remote_node == self.instance.primary_node:
10289 raise errors.OpPrereqError("The specified node is the primary node of"
10290 " the instance", errors.ECODE_INVAL)
10292 if remote_node == secondary_node:
10293 raise errors.OpPrereqError("The specified node is already the"
10294 " secondary node of the instance",
10295 errors.ECODE_INVAL)
10297 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10298 constants.REPLACE_DISK_CHG):
10299 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10300 errors.ECODE_INVAL)
10302 if self.mode == constants.REPLACE_DISK_AUTO:
10303 if not self._CheckDisksActivated(instance):
10304 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10305 " first" % self.instance_name,
10306 errors.ECODE_STATE)
10307 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10308 faulty_secondary = self._FindFaultyDisks(secondary_node)
10310 if faulty_primary and faulty_secondary:
10311 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10312 " one node and can not be repaired"
10313 " automatically" % self.instance_name,
10314 errors.ECODE_STATE)
10317 self.disks = faulty_primary
10318 self.target_node = instance.primary_node
10319 self.other_node = secondary_node
10320 check_nodes = [self.target_node, self.other_node]
10321 elif faulty_secondary:
10322 self.disks = faulty_secondary
10323 self.target_node = secondary_node
10324 self.other_node = instance.primary_node
10325 check_nodes = [self.target_node, self.other_node]
10331 # Non-automatic modes
10332 if self.mode == constants.REPLACE_DISK_PRI:
10333 self.target_node = instance.primary_node
10334 self.other_node = secondary_node
10335 check_nodes = [self.target_node, self.other_node]
10337 elif self.mode == constants.REPLACE_DISK_SEC:
10338 self.target_node = secondary_node
10339 self.other_node = instance.primary_node
10340 check_nodes = [self.target_node, self.other_node]
10342 elif self.mode == constants.REPLACE_DISK_CHG:
10343 self.new_node = remote_node
10344 self.other_node = instance.primary_node
10345 self.target_node = secondary_node
10346 check_nodes = [self.new_node, self.other_node]
10348 _CheckNodeNotDrained(self.lu, remote_node)
10349 _CheckNodeVmCapable(self.lu, remote_node)
10351 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10352 assert old_node_info is not None
10353 if old_node_info.offline and not self.early_release:
10354 # doesn't make sense to delay the release
10355 self.early_release = True
10356 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10357 " early-release mode", secondary_node)
10360 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10363 # If not specified all disks should be replaced
10365 self.disks = range(len(self.instance.disks))
10367 # TODO: compute disk parameters
10368 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10369 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10370 if primary_node_info.group != secondary_node_info.group:
10371 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10372 " different node groups; the disk parameters of the"
10373 " primary node's group will be applied.")
10375 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10377 for node in check_nodes:
10378 _CheckNodeOnline(self.lu, node)
10380 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10383 if node_name is not None)
10385 # Release unneeded node and node resource locks
10386 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10387 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10389 # Release any owned node group
10390 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10391 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10393 # Check whether disks are valid
10394 for disk_idx in self.disks:
10395 instance.FindDisk(disk_idx)
10397 # Get secondary node IP addresses
10398 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10399 in self.cfg.GetMultiNodeInfo(touched_nodes))
10401 def Exec(self, feedback_fn):
10402 """Execute disk replacement.
10404 This dispatches the disk replacement to the appropriate handler.
10407 if self.delay_iallocator:
10408 self._CheckPrereq2()
10411 # Verify owned locks before starting operation
10412 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10413 assert set(owned_nodes) == set(self.node_secondary_ip), \
10414 ("Incorrect node locks, owning %s, expected %s" %
10415 (owned_nodes, self.node_secondary_ip.keys()))
10416 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10417 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10419 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10420 assert list(owned_instances) == [self.instance_name], \
10421 "Instance '%s' not locked" % self.instance_name
10423 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10424 "Should not own any node group lock at this point"
10427 feedback_fn("No disks need replacement")
10430 feedback_fn("Replacing disk(s) %s for %s" %
10431 (utils.CommaJoin(self.disks), self.instance.name))
10433 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10435 # Activate the instance disks if we're replacing them on a down instance
10437 _StartInstanceDisks(self.lu, self.instance, True)
10440 # Should we replace the secondary node?
10441 if self.new_node is not None:
10442 fn = self._ExecDrbd8Secondary
10444 fn = self._ExecDrbd8DiskOnly
10446 result = fn(feedback_fn)
10448 # Deactivate the instance disks if we're replacing them on a
10451 _SafeShutdownInstanceDisks(self.lu, self.instance)
10453 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10456 # Verify owned locks
10457 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10458 nodes = frozenset(self.node_secondary_ip)
10459 assert ((self.early_release and not owned_nodes) or
10460 (not self.early_release and not (set(owned_nodes) - nodes))), \
10461 ("Not owning the correct locks, early_release=%s, owned=%r,"
10462 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10466 def _CheckVolumeGroup(self, nodes):
10467 self.lu.LogInfo("Checking volume groups")
10469 vgname = self.cfg.GetVGName()
10471 # Make sure volume group exists on all involved nodes
10472 results = self.rpc.call_vg_list(nodes)
10474 raise errors.OpExecError("Can't list volume groups on the nodes")
10477 res = results[node]
10478 res.Raise("Error checking node %s" % node)
10479 if vgname not in res.payload:
10480 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10483 def _CheckDisksExistence(self, nodes):
10484 # Check disk existence
10485 for idx, dev in enumerate(self.instance.disks):
10486 if idx not in self.disks:
10490 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10491 self.cfg.SetDiskID(dev, node)
10493 result = self.rpc.call_blockdev_find(node, dev)
10495 msg = result.fail_msg
10496 if msg or not result.payload:
10498 msg = "disk not found"
10499 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10502 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10503 for idx, dev in enumerate(self.instance.disks):
10504 if idx not in self.disks:
10507 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10510 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10512 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10513 " replace disks for instance %s" %
10514 (node_name, self.instance.name))
10516 def _CreateNewStorage(self, node_name):
10517 """Create new storage on the primary or secondary node.
10519 This is only used for same-node replaces, not for changing the
10520 secondary node, hence we don't want to modify the existing disk.
10525 for idx, dev in enumerate(self.instance.disks):
10526 if idx not in self.disks:
10529 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10531 self.cfg.SetDiskID(dev, node_name)
10533 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10534 names = _GenerateUniqueNames(self.lu, lv_names)
10536 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10538 vg_data = dev.children[0].logical_id[0]
10539 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10540 logical_id=(vg_data, names[0]), params=data_p)
10541 vg_meta = dev.children[1].logical_id[0]
10542 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10543 logical_id=(vg_meta, names[1]), params=meta_p)
10545 new_lvs = [lv_data, lv_meta]
10546 old_lvs = [child.Copy() for child in dev.children]
10547 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10549 # we pass force_create=True to force the LVM creation
10550 for new_lv in new_lvs:
10551 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10552 _GetInstanceInfoText(self.instance), False)
10556 def _CheckDevices(self, node_name, iv_names):
10557 for name, (dev, _, _) in iv_names.iteritems():
10558 self.cfg.SetDiskID(dev, node_name)
10560 result = self.rpc.call_blockdev_find(node_name, dev)
10562 msg = result.fail_msg
10563 if msg or not result.payload:
10565 msg = "disk not found"
10566 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10569 if result.payload.is_degraded:
10570 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10572 def _RemoveOldStorage(self, node_name, iv_names):
10573 for name, (_, old_lvs, _) in iv_names.iteritems():
10574 self.lu.LogInfo("Remove logical volumes for %s" % name)
10577 self.cfg.SetDiskID(lv, node_name)
10579 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10581 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10582 hint="remove unused LVs manually")
10584 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10585 """Replace a disk on the primary or secondary for DRBD 8.
10587 The algorithm for replace is quite complicated:
10589 1. for each disk to be replaced:
10591 1. create new LVs on the target node with unique names
10592 1. detach old LVs from the drbd device
10593 1. rename old LVs to name_replaced.<time_t>
10594 1. rename new LVs to old LVs
10595 1. attach the new LVs (with the old names now) to the drbd device
10597 1. wait for sync across all devices
10599 1. for each modified disk:
10601 1. remove old LVs (which have the name name_replaces.<time_t>)
10603 Failures are not very well handled.
10608 # Step: check device activation
10609 self.lu.LogStep(1, steps_total, "Check device existence")
10610 self._CheckDisksExistence([self.other_node, self.target_node])
10611 self._CheckVolumeGroup([self.target_node, self.other_node])
10613 # Step: check other node consistency
10614 self.lu.LogStep(2, steps_total, "Check peer consistency")
10615 self._CheckDisksConsistency(self.other_node,
10616 self.other_node == self.instance.primary_node,
10619 # Step: create new storage
10620 self.lu.LogStep(3, steps_total, "Allocate new storage")
10621 iv_names = self._CreateNewStorage(self.target_node)
10623 # Step: for each lv, detach+rename*2+attach
10624 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10625 for dev, old_lvs, new_lvs in iv_names.itervalues():
10626 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10628 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10630 result.Raise("Can't detach drbd from local storage on node"
10631 " %s for device %s" % (self.target_node, dev.iv_name))
10633 #cfg.Update(instance)
10635 # ok, we created the new LVs, so now we know we have the needed
10636 # storage; as such, we proceed on the target node to rename
10637 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10638 # using the assumption that logical_id == physical_id (which in
10639 # turn is the unique_id on that node)
10641 # FIXME(iustin): use a better name for the replaced LVs
10642 temp_suffix = int(time.time())
10643 ren_fn = lambda d, suff: (d.physical_id[0],
10644 d.physical_id[1] + "_replaced-%s" % suff)
10646 # Build the rename list based on what LVs exist on the node
10647 rename_old_to_new = []
10648 for to_ren in old_lvs:
10649 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10650 if not result.fail_msg and result.payload:
10652 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10654 self.lu.LogInfo("Renaming the old LVs on the target node")
10655 result = self.rpc.call_blockdev_rename(self.target_node,
10657 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10659 # Now we rename the new LVs to the old LVs
10660 self.lu.LogInfo("Renaming the new LVs on the target node")
10661 rename_new_to_old = [(new, old.physical_id)
10662 for old, new in zip(old_lvs, new_lvs)]
10663 result = self.rpc.call_blockdev_rename(self.target_node,
10665 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10667 # Intermediate steps of in memory modifications
10668 for old, new in zip(old_lvs, new_lvs):
10669 new.logical_id = old.logical_id
10670 self.cfg.SetDiskID(new, self.target_node)
10672 # We need to modify old_lvs so that removal later removes the
10673 # right LVs, not the newly added ones; note that old_lvs is a
10675 for disk in old_lvs:
10676 disk.logical_id = ren_fn(disk, temp_suffix)
10677 self.cfg.SetDiskID(disk, self.target_node)
10679 # Now that the new lvs have the old name, we can add them to the device
10680 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10681 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10683 msg = result.fail_msg
10685 for new_lv in new_lvs:
10686 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10689 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10690 hint=("cleanup manually the unused logical"
10692 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10694 cstep = itertools.count(5)
10696 if self.early_release:
10697 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10698 self._RemoveOldStorage(self.target_node, iv_names)
10699 # TODO: Check if releasing locks early still makes sense
10700 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10702 # Release all resource locks except those used by the instance
10703 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10704 keep=self.node_secondary_ip.keys())
10706 # Release all node locks while waiting for sync
10707 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10709 # TODO: Can the instance lock be downgraded here? Take the optional disk
10710 # shutdown in the caller into consideration.
10713 # This can fail as the old devices are degraded and _WaitForSync
10714 # does a combined result over all disks, so we don't check its return value
10715 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10716 _WaitForSync(self.lu, self.instance)
10718 # Check all devices manually
10719 self._CheckDevices(self.instance.primary_node, iv_names)
10721 # Step: remove old storage
10722 if not self.early_release:
10723 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10724 self._RemoveOldStorage(self.target_node, iv_names)
10726 def _ExecDrbd8Secondary(self, feedback_fn):
10727 """Replace the secondary node for DRBD 8.
10729 The algorithm for replace is quite complicated:
10730 - for all disks of the instance:
10731 - create new LVs on the new node with same names
10732 - shutdown the drbd device on the old secondary
10733 - disconnect the drbd network on the primary
10734 - create the drbd device on the new secondary
10735 - network attach the drbd on the primary, using an artifice:
10736 the drbd code for Attach() will connect to the network if it
10737 finds a device which is connected to the good local disks but
10738 not network enabled
10739 - wait for sync across all devices
10740 - remove all disks from the old secondary
10742 Failures are not very well handled.
10747 pnode = self.instance.primary_node
10749 # Step: check device activation
10750 self.lu.LogStep(1, steps_total, "Check device existence")
10751 self._CheckDisksExistence([self.instance.primary_node])
10752 self._CheckVolumeGroup([self.instance.primary_node])
10754 # Step: check other node consistency
10755 self.lu.LogStep(2, steps_total, "Check peer consistency")
10756 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10758 # Step: create new storage
10759 self.lu.LogStep(3, steps_total, "Allocate new storage")
10760 for idx, dev in enumerate(self.instance.disks):
10761 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10762 (self.new_node, idx))
10763 # we pass force_create=True to force LVM creation
10764 for new_lv in dev.children:
10765 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10766 _GetInstanceInfoText(self.instance), False)
10768 # Step 4: dbrd minors and drbd setups changes
10769 # after this, we must manually remove the drbd minors on both the
10770 # error and the success paths
10771 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10772 minors = self.cfg.AllocateDRBDMinor([self.new_node
10773 for dev in self.instance.disks],
10774 self.instance.name)
10775 logging.debug("Allocated minors %r", minors)
10778 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10779 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10780 (self.new_node, idx))
10781 # create new devices on new_node; note that we create two IDs:
10782 # one without port, so the drbd will be activated without
10783 # networking information on the new node at this stage, and one
10784 # with network, for the latter activation in step 4
10785 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10786 if self.instance.primary_node == o_node1:
10789 assert self.instance.primary_node == o_node2, "Three-node instance?"
10792 new_alone_id = (self.instance.primary_node, self.new_node, None,
10793 p_minor, new_minor, o_secret)
10794 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10795 p_minor, new_minor, o_secret)
10797 iv_names[idx] = (dev, dev.children, new_net_id)
10798 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10800 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10801 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10802 logical_id=new_alone_id,
10803 children=dev.children,
10805 params=drbd_params)
10807 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10808 _GetInstanceInfoText(self.instance), False)
10809 except errors.GenericError:
10810 self.cfg.ReleaseDRBDMinors(self.instance.name)
10813 # We have new devices, shutdown the drbd on the old secondary
10814 for idx, dev in enumerate(self.instance.disks):
10815 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10816 self.cfg.SetDiskID(dev, self.target_node)
10817 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10819 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10820 "node: %s" % (idx, msg),
10821 hint=("Please cleanup this device manually as"
10822 " soon as possible"))
10824 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10825 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10826 self.instance.disks)[pnode]
10828 msg = result.fail_msg
10830 # detaches didn't succeed (unlikely)
10831 self.cfg.ReleaseDRBDMinors(self.instance.name)
10832 raise errors.OpExecError("Can't detach the disks from the network on"
10833 " old node: %s" % (msg,))
10835 # if we managed to detach at least one, we update all the disks of
10836 # the instance to point to the new secondary
10837 self.lu.LogInfo("Updating instance configuration")
10838 for dev, _, new_logical_id in iv_names.itervalues():
10839 dev.logical_id = new_logical_id
10840 self.cfg.SetDiskID(dev, self.instance.primary_node)
10842 self.cfg.Update(self.instance, feedback_fn)
10844 # Release all node locks (the configuration has been updated)
10845 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10847 # and now perform the drbd attach
10848 self.lu.LogInfo("Attaching primary drbds to new secondary"
10849 " (standalone => connected)")
10850 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10852 self.node_secondary_ip,
10853 self.instance.disks,
10854 self.instance.name,
10856 for to_node, to_result in result.items():
10857 msg = to_result.fail_msg
10859 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10861 hint=("please do a gnt-instance info to see the"
10862 " status of disks"))
10864 cstep = itertools.count(5)
10866 if self.early_release:
10867 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10868 self._RemoveOldStorage(self.target_node, iv_names)
10869 # TODO: Check if releasing locks early still makes sense
10870 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10872 # Release all resource locks except those used by the instance
10873 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10874 keep=self.node_secondary_ip.keys())
10876 # TODO: Can the instance lock be downgraded here? Take the optional disk
10877 # shutdown in the caller into consideration.
10880 # This can fail as the old devices are degraded and _WaitForSync
10881 # does a combined result over all disks, so we don't check its return value
10882 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10883 _WaitForSync(self.lu, self.instance)
10885 # Check all devices manually
10886 self._CheckDevices(self.instance.primary_node, iv_names)
10888 # Step: remove old storage
10889 if not self.early_release:
10890 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10891 self._RemoveOldStorage(self.target_node, iv_names)
10894 class LURepairNodeStorage(NoHooksLU):
10895 """Repairs the volume group on a node.
10900 def CheckArguments(self):
10901 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10903 storage_type = self.op.storage_type
10905 if (constants.SO_FIX_CONSISTENCY not in
10906 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10907 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10908 " repaired" % storage_type,
10909 errors.ECODE_INVAL)
10911 def ExpandNames(self):
10912 self.needed_locks = {
10913 locking.LEVEL_NODE: [self.op.node_name],
10916 def _CheckFaultyDisks(self, instance, node_name):
10917 """Ensure faulty disks abort the opcode or at least warn."""
10919 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10921 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10922 " node '%s'" % (instance.name, node_name),
10923 errors.ECODE_STATE)
10924 except errors.OpPrereqError, err:
10925 if self.op.ignore_consistency:
10926 self.proc.LogWarning(str(err.args[0]))
10930 def CheckPrereq(self):
10931 """Check prerequisites.
10934 # Check whether any instance on this node has faulty disks
10935 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10936 if inst.admin_state != constants.ADMINST_UP:
10938 check_nodes = set(inst.all_nodes)
10939 check_nodes.discard(self.op.node_name)
10940 for inst_node_name in check_nodes:
10941 self._CheckFaultyDisks(inst, inst_node_name)
10943 def Exec(self, feedback_fn):
10944 feedback_fn("Repairing storage unit '%s' on %s ..." %
10945 (self.op.name, self.op.node_name))
10947 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10948 result = self.rpc.call_storage_execute(self.op.node_name,
10949 self.op.storage_type, st_args,
10951 constants.SO_FIX_CONSISTENCY)
10952 result.Raise("Failed to repair storage unit '%s' on %s" %
10953 (self.op.name, self.op.node_name))
10956 class LUNodeEvacuate(NoHooksLU):
10957 """Evacuates instances off a list of nodes.
10962 _MODE2IALLOCATOR = {
10963 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10964 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10965 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10967 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10968 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10969 constants.IALLOCATOR_NEVAC_MODES)
10971 def CheckArguments(self):
10972 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10974 def ExpandNames(self):
10975 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10977 if self.op.remote_node is not None:
10978 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10979 assert self.op.remote_node
10981 if self.op.remote_node == self.op.node_name:
10982 raise errors.OpPrereqError("Can not use evacuated node as a new"
10983 " secondary node", errors.ECODE_INVAL)
10985 if self.op.mode != constants.NODE_EVAC_SEC:
10986 raise errors.OpPrereqError("Without the use of an iallocator only"
10987 " secondary instances can be evacuated",
10988 errors.ECODE_INVAL)
10991 self.share_locks = _ShareAll()
10992 self.needed_locks = {
10993 locking.LEVEL_INSTANCE: [],
10994 locking.LEVEL_NODEGROUP: [],
10995 locking.LEVEL_NODE: [],
10998 # Determine nodes (via group) optimistically, needs verification once locks
10999 # have been acquired
11000 self.lock_nodes = self._DetermineNodes()
11002 def _DetermineNodes(self):
11003 """Gets the list of nodes to operate on.
11006 if self.op.remote_node is None:
11007 # Iallocator will choose any node(s) in the same group
11008 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11010 group_nodes = frozenset([self.op.remote_node])
11012 # Determine nodes to be locked
11013 return set([self.op.node_name]) | group_nodes
11015 def _DetermineInstances(self):
11016 """Builds list of instances to operate on.
11019 assert self.op.mode in constants.NODE_EVAC_MODES
11021 if self.op.mode == constants.NODE_EVAC_PRI:
11022 # Primary instances only
11023 inst_fn = _GetNodePrimaryInstances
11024 assert self.op.remote_node is None, \
11025 "Evacuating primary instances requires iallocator"
11026 elif self.op.mode == constants.NODE_EVAC_SEC:
11027 # Secondary instances only
11028 inst_fn = _GetNodeSecondaryInstances
11031 assert self.op.mode == constants.NODE_EVAC_ALL
11032 inst_fn = _GetNodeInstances
11033 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11035 raise errors.OpPrereqError("Due to an issue with the iallocator"
11036 " interface it is not possible to evacuate"
11037 " all instances at once; specify explicitly"
11038 " whether to evacuate primary or secondary"
11040 errors.ECODE_INVAL)
11042 return inst_fn(self.cfg, self.op.node_name)
11044 def DeclareLocks(self, level):
11045 if level == locking.LEVEL_INSTANCE:
11046 # Lock instances optimistically, needs verification once node and group
11047 # locks have been acquired
11048 self.needed_locks[locking.LEVEL_INSTANCE] = \
11049 set(i.name for i in self._DetermineInstances())
11051 elif level == locking.LEVEL_NODEGROUP:
11052 # Lock node groups for all potential target nodes optimistically, needs
11053 # verification once nodes have been acquired
11054 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11055 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11057 elif level == locking.LEVEL_NODE:
11058 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11060 def CheckPrereq(self):
11062 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11063 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11064 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11066 need_nodes = self._DetermineNodes()
11068 if not owned_nodes.issuperset(need_nodes):
11069 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11070 " locks were acquired, current nodes are"
11071 " are '%s', used to be '%s'; retry the"
11073 (self.op.node_name,
11074 utils.CommaJoin(need_nodes),
11075 utils.CommaJoin(owned_nodes)),
11076 errors.ECODE_STATE)
11078 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11079 if owned_groups != wanted_groups:
11080 raise errors.OpExecError("Node groups changed since locks were acquired,"
11081 " current groups are '%s', used to be '%s';"
11082 " retry the operation" %
11083 (utils.CommaJoin(wanted_groups),
11084 utils.CommaJoin(owned_groups)))
11086 # Determine affected instances
11087 self.instances = self._DetermineInstances()
11088 self.instance_names = [i.name for i in self.instances]
11090 if set(self.instance_names) != owned_instances:
11091 raise errors.OpExecError("Instances on node '%s' changed since locks"
11092 " were acquired, current instances are '%s',"
11093 " used to be '%s'; retry the operation" %
11094 (self.op.node_name,
11095 utils.CommaJoin(self.instance_names),
11096 utils.CommaJoin(owned_instances)))
11098 if self.instance_names:
11099 self.LogInfo("Evacuating instances from node '%s': %s",
11101 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11103 self.LogInfo("No instances to evacuate from node '%s'",
11106 if self.op.remote_node is not None:
11107 for i in self.instances:
11108 if i.primary_node == self.op.remote_node:
11109 raise errors.OpPrereqError("Node %s is the primary node of"
11110 " instance %s, cannot use it as"
11112 (self.op.remote_node, i.name),
11113 errors.ECODE_INVAL)
11115 def Exec(self, feedback_fn):
11116 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11118 if not self.instance_names:
11119 # No instances to evacuate
11122 elif self.op.iallocator is not None:
11123 # TODO: Implement relocation to other group
11124 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11125 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11126 instances=list(self.instance_names))
11128 ial.Run(self.op.iallocator)
11130 if not ial.success:
11131 raise errors.OpPrereqError("Can't compute node evacuation using"
11132 " iallocator '%s': %s" %
11133 (self.op.iallocator, ial.info),
11134 errors.ECODE_NORES)
11136 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11138 elif self.op.remote_node is not None:
11139 assert self.op.mode == constants.NODE_EVAC_SEC
11141 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11142 remote_node=self.op.remote_node,
11144 mode=constants.REPLACE_DISK_CHG,
11145 early_release=self.op.early_release)]
11146 for instance_name in self.instance_names
11150 raise errors.ProgrammerError("No iallocator or remote node")
11152 return ResultWithJobs(jobs)
11155 def _SetOpEarlyRelease(early_release, op):
11156 """Sets C{early_release} flag on opcodes if available.
11160 op.early_release = early_release
11161 except AttributeError:
11162 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11167 def _NodeEvacDest(use_nodes, group, nodes):
11168 """Returns group or nodes depending on caller's choice.
11172 return utils.CommaJoin(nodes)
11177 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11178 """Unpacks the result of change-group and node-evacuate iallocator requests.
11180 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11181 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11183 @type lu: L{LogicalUnit}
11184 @param lu: Logical unit instance
11185 @type alloc_result: tuple/list
11186 @param alloc_result: Result from iallocator
11187 @type early_release: bool
11188 @param early_release: Whether to release locks early if possible
11189 @type use_nodes: bool
11190 @param use_nodes: Whether to display node names instead of groups
11193 (moved, failed, jobs) = alloc_result
11196 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11197 for (name, reason) in failed)
11198 lu.LogWarning("Unable to evacuate instances %s", failreason)
11199 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11202 lu.LogInfo("Instances to be moved: %s",
11203 utils.CommaJoin("%s (to %s)" %
11204 (name, _NodeEvacDest(use_nodes, group, nodes))
11205 for (name, group, nodes) in moved))
11207 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11208 map(opcodes.OpCode.LoadOpCode, ops))
11212 class LUInstanceGrowDisk(LogicalUnit):
11213 """Grow a disk of an instance.
11216 HPATH = "disk-grow"
11217 HTYPE = constants.HTYPE_INSTANCE
11220 def ExpandNames(self):
11221 self._ExpandAndLockInstance()
11222 self.needed_locks[locking.LEVEL_NODE] = []
11223 self.needed_locks[locking.LEVEL_NODE_RES] = []
11224 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11226 def DeclareLocks(self, level):
11227 if level == locking.LEVEL_NODE:
11228 self._LockInstancesNodes()
11229 elif level == locking.LEVEL_NODE_RES:
11231 self.needed_locks[locking.LEVEL_NODE_RES] = \
11232 self.needed_locks[locking.LEVEL_NODE][:]
11234 def BuildHooksEnv(self):
11235 """Build hooks env.
11237 This runs on the master, the primary and all the secondaries.
11241 "DISK": self.op.disk,
11242 "AMOUNT": self.op.amount,
11244 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11247 def BuildHooksNodes(self):
11248 """Build hooks nodes.
11251 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11254 def CheckPrereq(self):
11255 """Check prerequisites.
11257 This checks that the instance is in the cluster.
11260 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11261 assert instance is not None, \
11262 "Cannot retrieve locked instance %s" % self.op.instance_name
11263 nodenames = list(instance.all_nodes)
11264 for node in nodenames:
11265 _CheckNodeOnline(self, node)
11267 self.instance = instance
11269 if instance.disk_template not in constants.DTS_GROWABLE:
11270 raise errors.OpPrereqError("Instance's disk layout does not support"
11271 " growing", errors.ECODE_INVAL)
11273 self.disk = instance.FindDisk(self.op.disk)
11275 if instance.disk_template not in (constants.DT_FILE,
11276 constants.DT_SHARED_FILE):
11277 # TODO: check the free disk space for file, when that feature will be
11279 _CheckNodesFreeDiskPerVG(self, nodenames,
11280 self.disk.ComputeGrowth(self.op.amount))
11282 def Exec(self, feedback_fn):
11283 """Execute disk grow.
11286 instance = self.instance
11289 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11290 assert (self.owned_locks(locking.LEVEL_NODE) ==
11291 self.owned_locks(locking.LEVEL_NODE_RES))
11293 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11295 raise errors.OpExecError("Cannot activate block device to grow")
11297 feedback_fn("Growing disk %s of instance '%s' by %s" %
11298 (self.op.disk, instance.name,
11299 utils.FormatUnit(self.op.amount, "h")))
11301 # First run all grow ops in dry-run mode
11302 for node in instance.all_nodes:
11303 self.cfg.SetDiskID(disk, node)
11304 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11305 result.Raise("Grow request failed to node %s" % node)
11307 # We know that (as far as we can test) operations across different
11308 # nodes will succeed, time to run it for real
11309 for node in instance.all_nodes:
11310 self.cfg.SetDiskID(disk, node)
11311 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11312 result.Raise("Grow request failed to node %s" % node)
11314 # TODO: Rewrite code to work properly
11315 # DRBD goes into sync mode for a short amount of time after executing the
11316 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11317 # calling "resize" in sync mode fails. Sleeping for a short amount of
11318 # time is a work-around.
11321 disk.RecordGrow(self.op.amount)
11322 self.cfg.Update(instance, feedback_fn)
11324 # Changes have been recorded, release node lock
11325 _ReleaseLocks(self, locking.LEVEL_NODE)
11327 # Downgrade lock while waiting for sync
11328 self.glm.downgrade(locking.LEVEL_INSTANCE)
11330 if self.op.wait_for_sync:
11331 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11333 self.proc.LogWarning("Disk sync-ing has not returned a good"
11334 " status; please check the instance")
11335 if instance.admin_state != constants.ADMINST_UP:
11336 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11337 elif instance.admin_state != constants.ADMINST_UP:
11338 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11339 " not supposed to be running because no wait for"
11340 " sync mode was requested")
11342 assert self.owned_locks(locking.LEVEL_NODE_RES)
11343 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11346 class LUInstanceQueryData(NoHooksLU):
11347 """Query runtime instance data.
11352 def ExpandNames(self):
11353 self.needed_locks = {}
11355 # Use locking if requested or when non-static information is wanted
11356 if not (self.op.static or self.op.use_locking):
11357 self.LogWarning("Non-static data requested, locks need to be acquired")
11358 self.op.use_locking = True
11360 if self.op.instances or not self.op.use_locking:
11361 # Expand instance names right here
11362 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11364 # Will use acquired locks
11365 self.wanted_names = None
11367 if self.op.use_locking:
11368 self.share_locks = _ShareAll()
11370 if self.wanted_names is None:
11371 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11373 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11375 self.needed_locks[locking.LEVEL_NODE] = []
11376 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11378 def DeclareLocks(self, level):
11379 if self.op.use_locking and level == locking.LEVEL_NODE:
11380 self._LockInstancesNodes()
11382 def CheckPrereq(self):
11383 """Check prerequisites.
11385 This only checks the optional instance list against the existing names.
11388 if self.wanted_names is None:
11389 assert self.op.use_locking, "Locking was not used"
11390 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11392 self.wanted_instances = \
11393 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11395 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11396 """Returns the status of a block device
11399 if self.op.static or not node:
11402 self.cfg.SetDiskID(dev, node)
11404 result = self.rpc.call_blockdev_find(node, dev)
11408 result.Raise("Can't compute disk status for %s" % instance_name)
11410 status = result.payload
11414 return (status.dev_path, status.major, status.minor,
11415 status.sync_percent, status.estimated_time,
11416 status.is_degraded, status.ldisk_status)
11418 def _ComputeDiskStatus(self, instance, snode, dev):
11419 """Compute block device status.
11422 if dev.dev_type in constants.LDS_DRBD:
11423 # we change the snode then (otherwise we use the one passed in)
11424 if dev.logical_id[0] == instance.primary_node:
11425 snode = dev.logical_id[1]
11427 snode = dev.logical_id[0]
11429 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11430 instance.name, dev)
11431 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11434 dev_children = map(compat.partial(self._ComputeDiskStatus,
11441 "iv_name": dev.iv_name,
11442 "dev_type": dev.dev_type,
11443 "logical_id": dev.logical_id,
11444 "physical_id": dev.physical_id,
11445 "pstatus": dev_pstatus,
11446 "sstatus": dev_sstatus,
11447 "children": dev_children,
11452 def Exec(self, feedback_fn):
11453 """Gather and return data"""
11456 cluster = self.cfg.GetClusterInfo()
11458 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11459 for i in self.wanted_instances)
11460 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11461 if self.op.static or pnode.offline:
11462 remote_state = None
11464 self.LogWarning("Primary node %s is marked offline, returning static"
11465 " information only for instance %s" %
11466 (pnode.name, instance.name))
11468 remote_info = self.rpc.call_instance_info(instance.primary_node,
11470 instance.hypervisor)
11471 remote_info.Raise("Error checking node %s" % instance.primary_node)
11472 remote_info = remote_info.payload
11473 if remote_info and "state" in remote_info:
11474 remote_state = "up"
11476 if instance.admin_state == constants.ADMINST_UP:
11477 remote_state = "down"
11479 remote_state = instance.admin_state
11481 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11484 result[instance.name] = {
11485 "name": instance.name,
11486 "config_state": instance.admin_state,
11487 "run_state": remote_state,
11488 "pnode": instance.primary_node,
11489 "snodes": instance.secondary_nodes,
11491 # this happens to be the same format used for hooks
11492 "nics": _NICListToTuple(self, instance.nics),
11493 "disk_template": instance.disk_template,
11495 "hypervisor": instance.hypervisor,
11496 "network_port": instance.network_port,
11497 "hv_instance": instance.hvparams,
11498 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11499 "be_instance": instance.beparams,
11500 "be_actual": cluster.FillBE(instance),
11501 "os_instance": instance.osparams,
11502 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11503 "serial_no": instance.serial_no,
11504 "mtime": instance.mtime,
11505 "ctime": instance.ctime,
11506 "uuid": instance.uuid,
11512 class LUInstanceSetParams(LogicalUnit):
11513 """Modifies an instances's parameters.
11516 HPATH = "instance-modify"
11517 HTYPE = constants.HTYPE_INSTANCE
11520 def CheckArguments(self):
11521 if not (self.op.nics or self.op.disks or self.op.disk_template or
11522 self.op.hvparams or self.op.beparams or self.op.os_name or
11523 self.op.online_inst or self.op.offline_inst):
11524 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11526 if self.op.hvparams:
11527 _CheckGlobalHvParams(self.op.hvparams)
11531 for disk_op, disk_dict in self.op.disks:
11532 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11533 if disk_op == constants.DDM_REMOVE:
11534 disk_addremove += 1
11536 elif disk_op == constants.DDM_ADD:
11537 disk_addremove += 1
11539 if not isinstance(disk_op, int):
11540 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11541 if not isinstance(disk_dict, dict):
11542 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11543 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11545 if disk_op == constants.DDM_ADD:
11546 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11547 if mode not in constants.DISK_ACCESS_SET:
11548 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11549 errors.ECODE_INVAL)
11550 size = disk_dict.get(constants.IDISK_SIZE, None)
11552 raise errors.OpPrereqError("Required disk parameter size missing",
11553 errors.ECODE_INVAL)
11556 except (TypeError, ValueError), err:
11557 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11558 str(err), errors.ECODE_INVAL)
11559 disk_dict[constants.IDISK_SIZE] = size
11561 # modification of disk
11562 if constants.IDISK_SIZE in disk_dict:
11563 raise errors.OpPrereqError("Disk size change not possible, use"
11564 " grow-disk", errors.ECODE_INVAL)
11566 if disk_addremove > 1:
11567 raise errors.OpPrereqError("Only one disk add or remove operation"
11568 " supported at a time", errors.ECODE_INVAL)
11570 if self.op.disks and self.op.disk_template is not None:
11571 raise errors.OpPrereqError("Disk template conversion and other disk"
11572 " changes not supported at the same time",
11573 errors.ECODE_INVAL)
11575 if (self.op.disk_template and
11576 self.op.disk_template in constants.DTS_INT_MIRROR and
11577 self.op.remote_node is None):
11578 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11579 " one requires specifying a secondary node",
11580 errors.ECODE_INVAL)
11584 for nic_op, nic_dict in self.op.nics:
11585 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11586 if nic_op == constants.DDM_REMOVE:
11589 elif nic_op == constants.DDM_ADD:
11592 if not isinstance(nic_op, int):
11593 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11594 if not isinstance(nic_dict, dict):
11595 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11596 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11598 # nic_dict should be a dict
11599 nic_ip = nic_dict.get(constants.INIC_IP, None)
11600 if nic_ip is not None:
11601 if nic_ip.lower() == constants.VALUE_NONE:
11602 nic_dict[constants.INIC_IP] = None
11604 if not netutils.IPAddress.IsValid(nic_ip):
11605 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11606 errors.ECODE_INVAL)
11608 nic_bridge = nic_dict.get("bridge", None)
11609 nic_link = nic_dict.get(constants.INIC_LINK, None)
11610 if nic_bridge and nic_link:
11611 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11612 " at the same time", errors.ECODE_INVAL)
11613 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11614 nic_dict["bridge"] = None
11615 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11616 nic_dict[constants.INIC_LINK] = None
11618 if nic_op == constants.DDM_ADD:
11619 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11620 if nic_mac is None:
11621 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11623 if constants.INIC_MAC in nic_dict:
11624 nic_mac = nic_dict[constants.INIC_MAC]
11625 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11626 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11628 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11629 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11630 " modifying an existing nic",
11631 errors.ECODE_INVAL)
11633 if nic_addremove > 1:
11634 raise errors.OpPrereqError("Only one NIC add or remove operation"
11635 " supported at a time", errors.ECODE_INVAL)
11637 def ExpandNames(self):
11638 self._ExpandAndLockInstance()
11639 # Can't even acquire node locks in shared mode as upcoming changes in
11640 # Ganeti 2.6 will start to modify the node object on disk conversion
11641 self.needed_locks[locking.LEVEL_NODE] = []
11642 self.needed_locks[locking.LEVEL_NODE_RES] = []
11643 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11645 def DeclareLocks(self, level):
11646 if level == locking.LEVEL_NODE:
11647 self._LockInstancesNodes()
11648 if self.op.disk_template and self.op.remote_node:
11649 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11650 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11651 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11653 self.needed_locks[locking.LEVEL_NODE_RES] = \
11654 self.needed_locks[locking.LEVEL_NODE][:]
11656 def BuildHooksEnv(self):
11657 """Build hooks env.
11659 This runs on the master, primary and secondaries.
11663 if constants.BE_MINMEM in self.be_new:
11664 args["minmem"] = self.be_new[constants.BE_MINMEM]
11665 if constants.BE_MAXMEM in self.be_new:
11666 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11667 if constants.BE_VCPUS in self.be_new:
11668 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11669 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11670 # information at all.
11673 nic_override = dict(self.op.nics)
11674 for idx, nic in enumerate(self.instance.nics):
11675 if idx in nic_override:
11676 this_nic_override = nic_override[idx]
11678 this_nic_override = {}
11679 if constants.INIC_IP in this_nic_override:
11680 ip = this_nic_override[constants.INIC_IP]
11683 if constants.INIC_MAC in this_nic_override:
11684 mac = this_nic_override[constants.INIC_MAC]
11687 if idx in self.nic_pnew:
11688 nicparams = self.nic_pnew[idx]
11690 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11691 mode = nicparams[constants.NIC_MODE]
11692 link = nicparams[constants.NIC_LINK]
11693 args["nics"].append((ip, mac, mode, link))
11694 if constants.DDM_ADD in nic_override:
11695 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11696 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11697 nicparams = self.nic_pnew[constants.DDM_ADD]
11698 mode = nicparams[constants.NIC_MODE]
11699 link = nicparams[constants.NIC_LINK]
11700 args["nics"].append((ip, mac, mode, link))
11701 elif constants.DDM_REMOVE in nic_override:
11702 del args["nics"][-1]
11704 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11705 if self.op.disk_template:
11706 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11710 def BuildHooksNodes(self):
11711 """Build hooks nodes.
11714 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11717 def CheckPrereq(self):
11718 """Check prerequisites.
11720 This only checks the instance list against the existing names.
11723 # checking the new params on the primary/secondary nodes
11725 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11726 cluster = self.cluster = self.cfg.GetClusterInfo()
11727 assert self.instance is not None, \
11728 "Cannot retrieve locked instance %s" % self.op.instance_name
11729 pnode = instance.primary_node
11730 nodelist = list(instance.all_nodes)
11731 pnode_info = self.cfg.GetNodeInfo(pnode)
11732 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11735 if self.op.os_name and not self.op.force:
11736 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11737 self.op.force_variant)
11738 instance_os = self.op.os_name
11740 instance_os = instance.os
11742 if self.op.disk_template:
11743 if instance.disk_template == self.op.disk_template:
11744 raise errors.OpPrereqError("Instance already has disk template %s" %
11745 instance.disk_template, errors.ECODE_INVAL)
11747 if (instance.disk_template,
11748 self.op.disk_template) not in self._DISK_CONVERSIONS:
11749 raise errors.OpPrereqError("Unsupported disk template conversion from"
11750 " %s to %s" % (instance.disk_template,
11751 self.op.disk_template),
11752 errors.ECODE_INVAL)
11753 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11754 msg="cannot change disk template")
11755 if self.op.disk_template in constants.DTS_INT_MIRROR:
11756 if self.op.remote_node == pnode:
11757 raise errors.OpPrereqError("Given new secondary node %s is the same"
11758 " as the primary node of the instance" %
11759 self.op.remote_node, errors.ECODE_STATE)
11760 _CheckNodeOnline(self, self.op.remote_node)
11761 _CheckNodeNotDrained(self, self.op.remote_node)
11762 # FIXME: here we assume that the old instance type is DT_PLAIN
11763 assert instance.disk_template == constants.DT_PLAIN
11764 disks = [{constants.IDISK_SIZE: d.size,
11765 constants.IDISK_VG: d.logical_id[0]}
11766 for d in instance.disks]
11767 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11768 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11770 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11771 if pnode_info.group != snode_info.group:
11772 self.LogWarning("The primary and secondary nodes are in two"
11773 " different node groups; the disk parameters"
11774 " from the first disk's node group will be"
11777 # hvparams processing
11778 if self.op.hvparams:
11779 hv_type = instance.hypervisor
11780 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11781 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11782 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11785 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11786 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11787 self.hv_proposed = self.hv_new = hv_new # the new actual values
11788 self.hv_inst = i_hvdict # the new dict (without defaults)
11790 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11792 self.hv_new = self.hv_inst = {}
11794 # beparams processing
11795 if self.op.beparams:
11796 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11798 objects.UpgradeBeParams(i_bedict)
11799 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11800 be_new = cluster.SimpleFillBE(i_bedict)
11801 self.be_proposed = self.be_new = be_new # the new actual values
11802 self.be_inst = i_bedict # the new dict (without defaults)
11804 self.be_new = self.be_inst = {}
11805 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11806 be_old = cluster.FillBE(instance)
11808 # CPU param validation -- checking every time a paramtere is
11809 # changed to cover all cases where either CPU mask or vcpus have
11811 if (constants.BE_VCPUS in self.be_proposed and
11812 constants.HV_CPU_MASK in self.hv_proposed):
11814 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11815 # Verify mask is consistent with number of vCPUs. Can skip this
11816 # test if only 1 entry in the CPU mask, which means same mask
11817 # is applied to all vCPUs.
11818 if (len(cpu_list) > 1 and
11819 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11820 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11822 (self.be_proposed[constants.BE_VCPUS],
11823 self.hv_proposed[constants.HV_CPU_MASK]),
11824 errors.ECODE_INVAL)
11826 # Only perform this test if a new CPU mask is given
11827 if constants.HV_CPU_MASK in self.hv_new:
11828 # Calculate the largest CPU number requested
11829 max_requested_cpu = max(map(max, cpu_list))
11830 # Check that all of the instance's nodes have enough physical CPUs to
11831 # satisfy the requested CPU mask
11832 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11833 max_requested_cpu + 1, instance.hypervisor)
11835 # osparams processing
11836 if self.op.osparams:
11837 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11838 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11839 self.os_inst = i_osdict # the new dict (without defaults)
11845 #TODO(dynmem): do the appropriate check involving MINMEM
11846 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11847 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11848 mem_check_list = [pnode]
11849 if be_new[constants.BE_AUTO_BALANCE]:
11850 # either we changed auto_balance to yes or it was from before
11851 mem_check_list.extend(instance.secondary_nodes)
11852 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11853 instance.hypervisor)
11854 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11855 [instance.hypervisor])
11856 pninfo = nodeinfo[pnode]
11857 msg = pninfo.fail_msg
11859 # Assume the primary node is unreachable and go ahead
11860 self.warn.append("Can't get info from primary node %s: %s" %
11863 (_, _, (pnhvinfo, )) = pninfo.payload
11864 if not isinstance(pnhvinfo.get("memory_free", None), int):
11865 self.warn.append("Node data from primary node %s doesn't contain"
11866 " free memory information" % pnode)
11867 elif instance_info.fail_msg:
11868 self.warn.append("Can't get instance runtime information: %s" %
11869 instance_info.fail_msg)
11871 if instance_info.payload:
11872 current_mem = int(instance_info.payload["memory"])
11874 # Assume instance not running
11875 # (there is a slight race condition here, but it's not very
11876 # probable, and we have no other way to check)
11877 # TODO: Describe race condition
11879 #TODO(dynmem): do the appropriate check involving MINMEM
11880 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11881 pnhvinfo["memory_free"])
11883 raise errors.OpPrereqError("This change will prevent the instance"
11884 " from starting, due to %d MB of memory"
11885 " missing on its primary node" %
11887 errors.ECODE_NORES)
11889 if be_new[constants.BE_AUTO_BALANCE]:
11890 for node, nres in nodeinfo.items():
11891 if node not in instance.secondary_nodes:
11893 nres.Raise("Can't get info from secondary node %s" % node,
11894 prereq=True, ecode=errors.ECODE_STATE)
11895 (_, _, (nhvinfo, )) = nres.payload
11896 if not isinstance(nhvinfo.get("memory_free", None), int):
11897 raise errors.OpPrereqError("Secondary node %s didn't return free"
11898 " memory information" % node,
11899 errors.ECODE_STATE)
11900 #TODO(dynmem): do the appropriate check involving MINMEM
11901 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11902 raise errors.OpPrereqError("This change will prevent the instance"
11903 " from failover to its secondary node"
11904 " %s, due to not enough memory" % node,
11905 errors.ECODE_STATE)
11909 self.nic_pinst = {}
11910 for nic_op, nic_dict in self.op.nics:
11911 if nic_op == constants.DDM_REMOVE:
11912 if not instance.nics:
11913 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11914 errors.ECODE_INVAL)
11916 if nic_op != constants.DDM_ADD:
11918 if not instance.nics:
11919 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11920 " no NICs" % nic_op,
11921 errors.ECODE_INVAL)
11922 if nic_op < 0 or nic_op >= len(instance.nics):
11923 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11925 (nic_op, len(instance.nics) - 1),
11926 errors.ECODE_INVAL)
11927 old_nic_params = instance.nics[nic_op].nicparams
11928 old_nic_ip = instance.nics[nic_op].ip
11930 old_nic_params = {}
11933 update_params_dict = dict([(key, nic_dict[key])
11934 for key in constants.NICS_PARAMETERS
11935 if key in nic_dict])
11937 if "bridge" in nic_dict:
11938 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11940 new_nic_params = _GetUpdatedParams(old_nic_params,
11941 update_params_dict)
11942 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11943 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11944 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11945 self.nic_pinst[nic_op] = new_nic_params
11946 self.nic_pnew[nic_op] = new_filled_nic_params
11947 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11949 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11950 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11951 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11953 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11955 self.warn.append(msg)
11957 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11958 if new_nic_mode == constants.NIC_MODE_ROUTED:
11959 if constants.INIC_IP in nic_dict:
11960 nic_ip = nic_dict[constants.INIC_IP]
11962 nic_ip = old_nic_ip
11964 raise errors.OpPrereqError("Cannot set the nic ip to None"
11965 " on a routed nic", errors.ECODE_INVAL)
11966 if constants.INIC_MAC in nic_dict:
11967 nic_mac = nic_dict[constants.INIC_MAC]
11968 if nic_mac is None:
11969 raise errors.OpPrereqError("Cannot set the nic mac to None",
11970 errors.ECODE_INVAL)
11971 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11972 # otherwise generate the mac
11973 nic_dict[constants.INIC_MAC] = \
11974 self.cfg.GenerateMAC(self.proc.GetECId())
11976 # or validate/reserve the current one
11978 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11979 except errors.ReservationError:
11980 raise errors.OpPrereqError("MAC address %s already in use"
11981 " in cluster" % nic_mac,
11982 errors.ECODE_NOTUNIQUE)
11985 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11986 raise errors.OpPrereqError("Disk operations not supported for"
11987 " diskless instances",
11988 errors.ECODE_INVAL)
11989 for disk_op, _ in self.op.disks:
11990 if disk_op == constants.DDM_REMOVE:
11991 if len(instance.disks) == 1:
11992 raise errors.OpPrereqError("Cannot remove the last disk of"
11993 " an instance", errors.ECODE_INVAL)
11994 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11995 msg="cannot remove disks")
11997 if (disk_op == constants.DDM_ADD and
11998 len(instance.disks) >= constants.MAX_DISKS):
11999 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12000 " add more" % constants.MAX_DISKS,
12001 errors.ECODE_STATE)
12002 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12004 if disk_op < 0 or disk_op >= len(instance.disks):
12005 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12007 (disk_op, len(instance.disks)),
12008 errors.ECODE_INVAL)
12010 # disabling the instance
12011 if self.op.offline_inst:
12012 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12013 msg="cannot change instance state to offline")
12015 # enabling the instance
12016 if self.op.online_inst:
12017 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12018 msg="cannot make instance go online")
12020 def _ConvertPlainToDrbd(self, feedback_fn):
12021 """Converts an instance from plain to drbd.
12024 feedback_fn("Converting template to drbd")
12025 instance = self.instance
12026 pnode = instance.primary_node
12027 snode = self.op.remote_node
12029 assert instance.disk_template == constants.DT_PLAIN
12031 # create a fake disk info for _GenerateDiskTemplate
12032 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12033 constants.IDISK_VG: d.logical_id[0]}
12034 for d in instance.disks]
12035 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12036 instance.name, pnode, [snode],
12037 disk_info, None, None, 0, feedback_fn,
12039 info = _GetInstanceInfoText(instance)
12040 feedback_fn("Creating aditional volumes...")
12041 # first, create the missing data and meta devices
12042 for disk in new_disks:
12043 # unfortunately this is... not too nice
12044 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12046 for child in disk.children:
12047 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12048 # at this stage, all new LVs have been created, we can rename the
12050 feedback_fn("Renaming original volumes...")
12051 rename_list = [(o, n.children[0].logical_id)
12052 for (o, n) in zip(instance.disks, new_disks)]
12053 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12054 result.Raise("Failed to rename original LVs")
12056 feedback_fn("Initializing DRBD devices...")
12057 # all child devices are in place, we can now create the DRBD devices
12058 for disk in new_disks:
12059 for node in [pnode, snode]:
12060 f_create = node == pnode
12061 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12063 # at this point, the instance has been modified
12064 instance.disk_template = constants.DT_DRBD8
12065 instance.disks = new_disks
12066 self.cfg.Update(instance, feedback_fn)
12068 # Release node locks while waiting for sync
12069 _ReleaseLocks(self, locking.LEVEL_NODE)
12071 # disks are created, waiting for sync
12072 disk_abort = not _WaitForSync(self, instance,
12073 oneshot=not self.op.wait_for_sync)
12075 raise errors.OpExecError("There are some degraded disks for"
12076 " this instance, please cleanup manually")
12078 # Node resource locks will be released by caller
12080 def _ConvertDrbdToPlain(self, feedback_fn):
12081 """Converts an instance from drbd to plain.
12084 instance = self.instance
12086 assert len(instance.secondary_nodes) == 1
12087 assert instance.disk_template == constants.DT_DRBD8
12089 pnode = instance.primary_node
12090 snode = instance.secondary_nodes[0]
12091 feedback_fn("Converting template to plain")
12093 old_disks = instance.disks
12094 new_disks = [d.children[0] for d in old_disks]
12096 # copy over size and mode
12097 for parent, child in zip(old_disks, new_disks):
12098 child.size = parent.size
12099 child.mode = parent.mode
12101 # update instance structure
12102 instance.disks = new_disks
12103 instance.disk_template = constants.DT_PLAIN
12104 self.cfg.Update(instance, feedback_fn)
12106 # Release locks in case removing disks takes a while
12107 _ReleaseLocks(self, locking.LEVEL_NODE)
12109 feedback_fn("Removing volumes on the secondary node...")
12110 for disk in old_disks:
12111 self.cfg.SetDiskID(disk, snode)
12112 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12114 self.LogWarning("Could not remove block device %s on node %s,"
12115 " continuing anyway: %s", disk.iv_name, snode, msg)
12117 feedback_fn("Removing unneeded volumes on the primary node...")
12118 for idx, disk in enumerate(old_disks):
12119 meta = disk.children[1]
12120 self.cfg.SetDiskID(meta, pnode)
12121 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12123 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12124 " continuing anyway: %s", idx, pnode, msg)
12126 # this is a DRBD disk, return its port to the pool
12127 for disk in old_disks:
12128 tcp_port = disk.logical_id[2]
12129 self.cfg.AddTcpUdpPort(tcp_port)
12131 # Node resource locks will be released by caller
12133 def Exec(self, feedback_fn):
12134 """Modifies an instance.
12136 All parameters take effect only at the next restart of the instance.
12139 # Process here the warnings from CheckPrereq, as we don't have a
12140 # feedback_fn there.
12141 for warn in self.warn:
12142 feedback_fn("WARNING: %s" % warn)
12144 assert ((self.op.disk_template is None) ^
12145 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12146 "Not owning any node resource locks"
12149 instance = self.instance
12151 for disk_op, disk_dict in self.op.disks:
12152 if disk_op == constants.DDM_REMOVE:
12153 # remove the last disk
12154 device = instance.disks.pop()
12155 device_idx = len(instance.disks)
12156 for node, disk in device.ComputeNodeTree(instance.primary_node):
12157 self.cfg.SetDiskID(disk, node)
12158 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12160 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12161 " continuing anyway", device_idx, node, msg)
12162 result.append(("disk/%d" % device_idx, "remove"))
12164 # if this is a DRBD disk, return its port to the pool
12165 if device.dev_type in constants.LDS_DRBD:
12166 tcp_port = device.logical_id[2]
12167 self.cfg.AddTcpUdpPort(tcp_port)
12168 elif disk_op == constants.DDM_ADD:
12170 if instance.disk_template in (constants.DT_FILE,
12171 constants.DT_SHARED_FILE):
12172 file_driver, file_path = instance.disks[0].logical_id
12173 file_path = os.path.dirname(file_path)
12175 file_driver = file_path = None
12176 disk_idx_base = len(instance.disks)
12177 new_disk = _GenerateDiskTemplate(self,
12178 instance.disk_template,
12179 instance.name, instance.primary_node,
12180 instance.secondary_nodes,
12186 self.diskparams)[0]
12187 instance.disks.append(new_disk)
12188 info = _GetInstanceInfoText(instance)
12190 logging.info("Creating volume %s for instance %s",
12191 new_disk.iv_name, instance.name)
12192 # Note: this needs to be kept in sync with _CreateDisks
12194 for node in instance.all_nodes:
12195 f_create = node == instance.primary_node
12197 _CreateBlockDev(self, node, instance, new_disk,
12198 f_create, info, f_create)
12199 except errors.OpExecError, err:
12200 self.LogWarning("Failed to create volume %s (%s) on"
12202 new_disk.iv_name, new_disk, node, err)
12203 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12204 (new_disk.size, new_disk.mode)))
12206 # change a given disk
12207 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12208 result.append(("disk.mode/%d" % disk_op,
12209 disk_dict[constants.IDISK_MODE]))
12211 if self.op.disk_template:
12213 check_nodes = set(instance.all_nodes)
12214 if self.op.remote_node:
12215 check_nodes.add(self.op.remote_node)
12216 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12217 owned = self.owned_locks(level)
12218 assert not (check_nodes - owned), \
12219 ("Not owning the correct locks, owning %r, expected at least %r" %
12220 (owned, check_nodes))
12222 r_shut = _ShutdownInstanceDisks(self, instance)
12224 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12225 " proceed with disk template conversion")
12226 mode = (instance.disk_template, self.op.disk_template)
12228 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12230 self.cfg.ReleaseDRBDMinors(instance.name)
12232 result.append(("disk_template", self.op.disk_template))
12234 assert instance.disk_template == self.op.disk_template, \
12235 ("Expected disk template '%s', found '%s'" %
12236 (self.op.disk_template, instance.disk_template))
12238 # Release node and resource locks if there are any (they might already have
12239 # been released during disk conversion)
12240 _ReleaseLocks(self, locking.LEVEL_NODE)
12241 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12244 for nic_op, nic_dict in self.op.nics:
12245 if nic_op == constants.DDM_REMOVE:
12246 # remove the last nic
12247 del instance.nics[-1]
12248 result.append(("nic.%d" % len(instance.nics), "remove"))
12249 elif nic_op == constants.DDM_ADD:
12250 # mac and bridge should be set, by now
12251 mac = nic_dict[constants.INIC_MAC]
12252 ip = nic_dict.get(constants.INIC_IP, None)
12253 nicparams = self.nic_pinst[constants.DDM_ADD]
12254 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12255 instance.nics.append(new_nic)
12256 result.append(("nic.%d" % (len(instance.nics) - 1),
12257 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12258 (new_nic.mac, new_nic.ip,
12259 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12260 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12263 for key in (constants.INIC_MAC, constants.INIC_IP):
12264 if key in nic_dict:
12265 setattr(instance.nics[nic_op], key, nic_dict[key])
12266 if nic_op in self.nic_pinst:
12267 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12268 for key, val in nic_dict.iteritems():
12269 result.append(("nic.%s/%d" % (key, nic_op), val))
12272 if self.op.hvparams:
12273 instance.hvparams = self.hv_inst
12274 for key, val in self.op.hvparams.iteritems():
12275 result.append(("hv/%s" % key, val))
12278 if self.op.beparams:
12279 instance.beparams = self.be_inst
12280 for key, val in self.op.beparams.iteritems():
12281 result.append(("be/%s" % key, val))
12284 if self.op.os_name:
12285 instance.os = self.op.os_name
12288 if self.op.osparams:
12289 instance.osparams = self.os_inst
12290 for key, val in self.op.osparams.iteritems():
12291 result.append(("os/%s" % key, val))
12293 # online/offline instance
12294 if self.op.online_inst:
12295 self.cfg.MarkInstanceDown(instance.name)
12296 result.append(("admin_state", constants.ADMINST_DOWN))
12297 if self.op.offline_inst:
12298 self.cfg.MarkInstanceOffline(instance.name)
12299 result.append(("admin_state", constants.ADMINST_OFFLINE))
12301 self.cfg.Update(instance, feedback_fn)
12303 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12304 self.owned_locks(locking.LEVEL_NODE)), \
12305 "All node locks should have been released by now"
12309 _DISK_CONVERSIONS = {
12310 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12311 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12315 class LUInstanceChangeGroup(LogicalUnit):
12316 HPATH = "instance-change-group"
12317 HTYPE = constants.HTYPE_INSTANCE
12320 def ExpandNames(self):
12321 self.share_locks = _ShareAll()
12322 self.needed_locks = {
12323 locking.LEVEL_NODEGROUP: [],
12324 locking.LEVEL_NODE: [],
12327 self._ExpandAndLockInstance()
12329 if self.op.target_groups:
12330 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12331 self.op.target_groups)
12333 self.req_target_uuids = None
12335 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12337 def DeclareLocks(self, level):
12338 if level == locking.LEVEL_NODEGROUP:
12339 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12341 if self.req_target_uuids:
12342 lock_groups = set(self.req_target_uuids)
12344 # Lock all groups used by instance optimistically; this requires going
12345 # via the node before it's locked, requiring verification later on
12346 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12347 lock_groups.update(instance_groups)
12349 # No target groups, need to lock all of them
12350 lock_groups = locking.ALL_SET
12352 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12354 elif level == locking.LEVEL_NODE:
12355 if self.req_target_uuids:
12356 # Lock all nodes used by instances
12357 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12358 self._LockInstancesNodes()
12360 # Lock all nodes in all potential target groups
12361 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12362 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12363 member_nodes = [node_name
12364 for group in lock_groups
12365 for node_name in self.cfg.GetNodeGroup(group).members]
12366 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12368 # Lock all nodes as all groups are potential targets
12369 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12371 def CheckPrereq(self):
12372 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12373 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12374 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12376 assert (self.req_target_uuids is None or
12377 owned_groups.issuperset(self.req_target_uuids))
12378 assert owned_instances == set([self.op.instance_name])
12380 # Get instance information
12381 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12383 # Check if node groups for locked instance are still correct
12384 assert owned_nodes.issuperset(self.instance.all_nodes), \
12385 ("Instance %s's nodes changed while we kept the lock" %
12386 self.op.instance_name)
12388 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12391 if self.req_target_uuids:
12392 # User requested specific target groups
12393 self.target_uuids = self.req_target_uuids
12395 # All groups except those used by the instance are potential targets
12396 self.target_uuids = owned_groups - inst_groups
12398 conflicting_groups = self.target_uuids & inst_groups
12399 if conflicting_groups:
12400 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12401 " used by the instance '%s'" %
12402 (utils.CommaJoin(conflicting_groups),
12403 self.op.instance_name),
12404 errors.ECODE_INVAL)
12406 if not self.target_uuids:
12407 raise errors.OpPrereqError("There are no possible target groups",
12408 errors.ECODE_INVAL)
12410 def BuildHooksEnv(self):
12411 """Build hooks env.
12414 assert self.target_uuids
12417 "TARGET_GROUPS": " ".join(self.target_uuids),
12420 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12424 def BuildHooksNodes(self):
12425 """Build hooks nodes.
12428 mn = self.cfg.GetMasterNode()
12429 return ([mn], [mn])
12431 def Exec(self, feedback_fn):
12432 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12434 assert instances == [self.op.instance_name], "Instance not locked"
12436 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12437 instances=instances, target_groups=list(self.target_uuids))
12439 ial.Run(self.op.iallocator)
12441 if not ial.success:
12442 raise errors.OpPrereqError("Can't compute solution for changing group of"
12443 " instance '%s' using iallocator '%s': %s" %
12444 (self.op.instance_name, self.op.iallocator,
12446 errors.ECODE_NORES)
12448 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12450 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12451 " instance '%s'", len(jobs), self.op.instance_name)
12453 return ResultWithJobs(jobs)
12456 class LUBackupQuery(NoHooksLU):
12457 """Query the exports list
12462 def ExpandNames(self):
12463 self.needed_locks = {}
12464 self.share_locks[locking.LEVEL_NODE] = 1
12465 if not self.op.nodes:
12466 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12468 self.needed_locks[locking.LEVEL_NODE] = \
12469 _GetWantedNodes(self, self.op.nodes)
12471 def Exec(self, feedback_fn):
12472 """Compute the list of all the exported system images.
12475 @return: a dictionary with the structure node->(export-list)
12476 where export-list is a list of the instances exported on
12480 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12481 rpcresult = self.rpc.call_export_list(self.nodes)
12483 for node in rpcresult:
12484 if rpcresult[node].fail_msg:
12485 result[node] = False
12487 result[node] = rpcresult[node].payload
12492 class LUBackupPrepare(NoHooksLU):
12493 """Prepares an instance for an export and returns useful information.
12498 def ExpandNames(self):
12499 self._ExpandAndLockInstance()
12501 def CheckPrereq(self):
12502 """Check prerequisites.
12505 instance_name = self.op.instance_name
12507 self.instance = self.cfg.GetInstanceInfo(instance_name)
12508 assert self.instance is not None, \
12509 "Cannot retrieve locked instance %s" % self.op.instance_name
12510 _CheckNodeOnline(self, self.instance.primary_node)
12512 self._cds = _GetClusterDomainSecret()
12514 def Exec(self, feedback_fn):
12515 """Prepares an instance for an export.
12518 instance = self.instance
12520 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12521 salt = utils.GenerateSecret(8)
12523 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12524 result = self.rpc.call_x509_cert_create(instance.primary_node,
12525 constants.RIE_CERT_VALIDITY)
12526 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12528 (name, cert_pem) = result.payload
12530 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12534 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12535 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12537 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12543 class LUBackupExport(LogicalUnit):
12544 """Export an instance to an image in the cluster.
12547 HPATH = "instance-export"
12548 HTYPE = constants.HTYPE_INSTANCE
12551 def CheckArguments(self):
12552 """Check the arguments.
12555 self.x509_key_name = self.op.x509_key_name
12556 self.dest_x509_ca_pem = self.op.destination_x509_ca
12558 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12559 if not self.x509_key_name:
12560 raise errors.OpPrereqError("Missing X509 key name for encryption",
12561 errors.ECODE_INVAL)
12563 if not self.dest_x509_ca_pem:
12564 raise errors.OpPrereqError("Missing destination X509 CA",
12565 errors.ECODE_INVAL)
12567 def ExpandNames(self):
12568 self._ExpandAndLockInstance()
12570 # Lock all nodes for local exports
12571 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12572 # FIXME: lock only instance primary and destination node
12574 # Sad but true, for now we have do lock all nodes, as we don't know where
12575 # the previous export might be, and in this LU we search for it and
12576 # remove it from its current node. In the future we could fix this by:
12577 # - making a tasklet to search (share-lock all), then create the
12578 # new one, then one to remove, after
12579 # - removing the removal operation altogether
12580 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12582 def DeclareLocks(self, level):
12583 """Last minute lock declaration."""
12584 # All nodes are locked anyway, so nothing to do here.
12586 def BuildHooksEnv(self):
12587 """Build hooks env.
12589 This will run on the master, primary node and target node.
12593 "EXPORT_MODE": self.op.mode,
12594 "EXPORT_NODE": self.op.target_node,
12595 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12596 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12597 # TODO: Generic function for boolean env variables
12598 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12601 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12605 def BuildHooksNodes(self):
12606 """Build hooks nodes.
12609 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12611 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12612 nl.append(self.op.target_node)
12616 def CheckPrereq(self):
12617 """Check prerequisites.
12619 This checks that the instance and node names are valid.
12622 instance_name = self.op.instance_name
12624 self.instance = self.cfg.GetInstanceInfo(instance_name)
12625 assert self.instance is not None, \
12626 "Cannot retrieve locked instance %s" % self.op.instance_name
12627 _CheckNodeOnline(self, self.instance.primary_node)
12629 if (self.op.remove_instance and
12630 self.instance.admin_state == constants.ADMINST_UP and
12631 not self.op.shutdown):
12632 raise errors.OpPrereqError("Can not remove instance without shutting it"
12635 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12636 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12637 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12638 assert self.dst_node is not None
12640 _CheckNodeOnline(self, self.dst_node.name)
12641 _CheckNodeNotDrained(self, self.dst_node.name)
12644 self.dest_disk_info = None
12645 self.dest_x509_ca = None
12647 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12648 self.dst_node = None
12650 if len(self.op.target_node) != len(self.instance.disks):
12651 raise errors.OpPrereqError(("Received destination information for %s"
12652 " disks, but instance %s has %s disks") %
12653 (len(self.op.target_node), instance_name,
12654 len(self.instance.disks)),
12655 errors.ECODE_INVAL)
12657 cds = _GetClusterDomainSecret()
12659 # Check X509 key name
12661 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12662 except (TypeError, ValueError), err:
12663 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12665 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12666 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12667 errors.ECODE_INVAL)
12669 # Load and verify CA
12671 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12672 except OpenSSL.crypto.Error, err:
12673 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12674 (err, ), errors.ECODE_INVAL)
12676 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12677 if errcode is not None:
12678 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12679 (msg, ), errors.ECODE_INVAL)
12681 self.dest_x509_ca = cert
12683 # Verify target information
12685 for idx, disk_data in enumerate(self.op.target_node):
12687 (host, port, magic) = \
12688 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12689 except errors.GenericError, err:
12690 raise errors.OpPrereqError("Target info for disk %s: %s" %
12691 (idx, err), errors.ECODE_INVAL)
12693 disk_info.append((host, port, magic))
12695 assert len(disk_info) == len(self.op.target_node)
12696 self.dest_disk_info = disk_info
12699 raise errors.ProgrammerError("Unhandled export mode %r" %
12702 # instance disk type verification
12703 # TODO: Implement export support for file-based disks
12704 for disk in self.instance.disks:
12705 if disk.dev_type == constants.LD_FILE:
12706 raise errors.OpPrereqError("Export not supported for instances with"
12707 " file-based disks", errors.ECODE_INVAL)
12709 def _CleanupExports(self, feedback_fn):
12710 """Removes exports of current instance from all other nodes.
12712 If an instance in a cluster with nodes A..D was exported to node C, its
12713 exports will be removed from the nodes A, B and D.
12716 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12718 nodelist = self.cfg.GetNodeList()
12719 nodelist.remove(self.dst_node.name)
12721 # on one-node clusters nodelist will be empty after the removal
12722 # if we proceed the backup would be removed because OpBackupQuery
12723 # substitutes an empty list with the full cluster node list.
12724 iname = self.instance.name
12726 feedback_fn("Removing old exports for instance %s" % iname)
12727 exportlist = self.rpc.call_export_list(nodelist)
12728 for node in exportlist:
12729 if exportlist[node].fail_msg:
12731 if iname in exportlist[node].payload:
12732 msg = self.rpc.call_export_remove(node, iname).fail_msg
12734 self.LogWarning("Could not remove older export for instance %s"
12735 " on node %s: %s", iname, node, msg)
12737 def Exec(self, feedback_fn):
12738 """Export an instance to an image in the cluster.
12741 assert self.op.mode in constants.EXPORT_MODES
12743 instance = self.instance
12744 src_node = instance.primary_node
12746 if self.op.shutdown:
12747 # shutdown the instance, but not the disks
12748 feedback_fn("Shutting down instance %s" % instance.name)
12749 result = self.rpc.call_instance_shutdown(src_node, instance,
12750 self.op.shutdown_timeout)
12751 # TODO: Maybe ignore failures if ignore_remove_failures is set
12752 result.Raise("Could not shutdown instance %s on"
12753 " node %s" % (instance.name, src_node))
12755 # set the disks ID correctly since call_instance_start needs the
12756 # correct drbd minor to create the symlinks
12757 for disk in instance.disks:
12758 self.cfg.SetDiskID(disk, src_node)
12760 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12763 # Activate the instance disks if we'exporting a stopped instance
12764 feedback_fn("Activating disks for %s" % instance.name)
12765 _StartInstanceDisks(self, instance, None)
12768 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12771 helper.CreateSnapshots()
12773 if (self.op.shutdown and
12774 instance.admin_state == constants.ADMINST_UP and
12775 not self.op.remove_instance):
12776 assert not activate_disks
12777 feedback_fn("Starting instance %s" % instance.name)
12778 result = self.rpc.call_instance_start(src_node,
12779 (instance, None, None), False)
12780 msg = result.fail_msg
12782 feedback_fn("Failed to start instance: %s" % msg)
12783 _ShutdownInstanceDisks(self, instance)
12784 raise errors.OpExecError("Could not start instance: %s" % msg)
12786 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12787 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12788 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12789 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12790 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12792 (key_name, _, _) = self.x509_key_name
12795 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12798 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12799 key_name, dest_ca_pem,
12804 # Check for backwards compatibility
12805 assert len(dresults) == len(instance.disks)
12806 assert compat.all(isinstance(i, bool) for i in dresults), \
12807 "Not all results are boolean: %r" % dresults
12811 feedback_fn("Deactivating disks for %s" % instance.name)
12812 _ShutdownInstanceDisks(self, instance)
12814 if not (compat.all(dresults) and fin_resu):
12817 failures.append("export finalization")
12818 if not compat.all(dresults):
12819 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12821 failures.append("disk export: disk(s) %s" % fdsk)
12823 raise errors.OpExecError("Export failed, errors in %s" %
12824 utils.CommaJoin(failures))
12826 # At this point, the export was successful, we can cleanup/finish
12828 # Remove instance if requested
12829 if self.op.remove_instance:
12830 feedback_fn("Removing instance %s" % instance.name)
12831 _RemoveInstance(self, feedback_fn, instance,
12832 self.op.ignore_remove_failures)
12834 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12835 self._CleanupExports(feedback_fn)
12837 return fin_resu, dresults
12840 class LUBackupRemove(NoHooksLU):
12841 """Remove exports related to the named instance.
12846 def ExpandNames(self):
12847 self.needed_locks = {}
12848 # We need all nodes to be locked in order for RemoveExport to work, but we
12849 # don't need to lock the instance itself, as nothing will happen to it (and
12850 # we can remove exports also for a removed instance)
12851 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12853 def Exec(self, feedback_fn):
12854 """Remove any export.
12857 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12858 # If the instance was not found we'll try with the name that was passed in.
12859 # This will only work if it was an FQDN, though.
12861 if not instance_name:
12863 instance_name = self.op.instance_name
12865 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12866 exportlist = self.rpc.call_export_list(locked_nodes)
12868 for node in exportlist:
12869 msg = exportlist[node].fail_msg
12871 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12873 if instance_name in exportlist[node].payload:
12875 result = self.rpc.call_export_remove(node, instance_name)
12876 msg = result.fail_msg
12878 logging.error("Could not remove export for instance %s"
12879 " on node %s: %s", instance_name, node, msg)
12881 if fqdn_warn and not found:
12882 feedback_fn("Export not found. If trying to remove an export belonging"
12883 " to a deleted instance please use its Fully Qualified"
12887 class LUGroupAdd(LogicalUnit):
12888 """Logical unit for creating node groups.
12891 HPATH = "group-add"
12892 HTYPE = constants.HTYPE_GROUP
12895 def ExpandNames(self):
12896 # We need the new group's UUID here so that we can create and acquire the
12897 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12898 # that it should not check whether the UUID exists in the configuration.
12899 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12900 self.needed_locks = {}
12901 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12903 def CheckPrereq(self):
12904 """Check prerequisites.
12906 This checks that the given group name is not an existing node group
12911 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12912 except errors.OpPrereqError:
12915 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12916 " node group (UUID: %s)" %
12917 (self.op.group_name, existing_uuid),
12918 errors.ECODE_EXISTS)
12920 if self.op.ndparams:
12921 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12923 if self.op.hv_state:
12924 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12926 self.new_hv_state = None
12928 if self.op.disk_state:
12929 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12931 self.new_disk_state = None
12933 if self.op.diskparams:
12934 for templ in constants.DISK_TEMPLATES:
12935 if templ not in self.op.diskparams:
12936 self.op.diskparams[templ] = {}
12937 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12939 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12941 if self.op.ipolicy:
12942 cluster = self.cfg.GetClusterInfo()
12943 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12944 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12946 def BuildHooksEnv(self):
12947 """Build hooks env.
12951 "GROUP_NAME": self.op.group_name,
12954 def BuildHooksNodes(self):
12955 """Build hooks nodes.
12958 mn = self.cfg.GetMasterNode()
12959 return ([mn], [mn])
12961 def Exec(self, feedback_fn):
12962 """Add the node group to the cluster.
12965 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12966 uuid=self.group_uuid,
12967 alloc_policy=self.op.alloc_policy,
12968 ndparams=self.op.ndparams,
12969 diskparams=self.op.diskparams,
12970 ipolicy=self.op.ipolicy,
12971 hv_state_static=self.new_hv_state,
12972 disk_state_static=self.new_disk_state)
12974 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12975 del self.remove_locks[locking.LEVEL_NODEGROUP]
12978 class LUGroupAssignNodes(NoHooksLU):
12979 """Logical unit for assigning nodes to groups.
12984 def ExpandNames(self):
12985 # These raise errors.OpPrereqError on their own:
12986 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12987 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12989 # We want to lock all the affected nodes and groups. We have readily
12990 # available the list of nodes, and the *destination* group. To gather the
12991 # list of "source" groups, we need to fetch node information later on.
12992 self.needed_locks = {
12993 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12994 locking.LEVEL_NODE: self.op.nodes,
12997 def DeclareLocks(self, level):
12998 if level == locking.LEVEL_NODEGROUP:
12999 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13001 # Try to get all affected nodes' groups without having the group or node
13002 # lock yet. Needs verification later in the code flow.
13003 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13005 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13007 def CheckPrereq(self):
13008 """Check prerequisites.
13011 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13012 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13013 frozenset(self.op.nodes))
13015 expected_locks = (set([self.group_uuid]) |
13016 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13017 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13018 if actual_locks != expected_locks:
13019 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13020 " current groups are '%s', used to be '%s'" %
13021 (utils.CommaJoin(expected_locks),
13022 utils.CommaJoin(actual_locks)))
13024 self.node_data = self.cfg.GetAllNodesInfo()
13025 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13026 instance_data = self.cfg.GetAllInstancesInfo()
13028 if self.group is None:
13029 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13030 (self.op.group_name, self.group_uuid))
13032 (new_splits, previous_splits) = \
13033 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13034 for node in self.op.nodes],
13035 self.node_data, instance_data)
13038 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13040 if not self.op.force:
13041 raise errors.OpExecError("The following instances get split by this"
13042 " change and --force was not given: %s" %
13045 self.LogWarning("This operation will split the following instances: %s",
13048 if previous_splits:
13049 self.LogWarning("In addition, these already-split instances continue"
13050 " to be split across groups: %s",
13051 utils.CommaJoin(utils.NiceSort(previous_splits)))
13053 def Exec(self, feedback_fn):
13054 """Assign nodes to a new group.
13057 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13059 self.cfg.AssignGroupNodes(mods)
13062 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13063 """Check for split instances after a node assignment.
13065 This method considers a series of node assignments as an atomic operation,
13066 and returns information about split instances after applying the set of
13069 In particular, it returns information about newly split instances, and
13070 instances that were already split, and remain so after the change.
13072 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13075 @type changes: list of (node_name, new_group_uuid) pairs.
13076 @param changes: list of node assignments to consider.
13077 @param node_data: a dict with data for all nodes
13078 @param instance_data: a dict with all instances to consider
13079 @rtype: a two-tuple
13080 @return: a list of instances that were previously okay and result split as a
13081 consequence of this change, and a list of instances that were previously
13082 split and this change does not fix.
13085 changed_nodes = dict((node, group) for node, group in changes
13086 if node_data[node].group != group)
13088 all_split_instances = set()
13089 previously_split_instances = set()
13091 def InstanceNodes(instance):
13092 return [instance.primary_node] + list(instance.secondary_nodes)
13094 for inst in instance_data.values():
13095 if inst.disk_template not in constants.DTS_INT_MIRROR:
13098 instance_nodes = InstanceNodes(inst)
13100 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13101 previously_split_instances.add(inst.name)
13103 if len(set(changed_nodes.get(node, node_data[node].group)
13104 for node in instance_nodes)) > 1:
13105 all_split_instances.add(inst.name)
13107 return (list(all_split_instances - previously_split_instances),
13108 list(previously_split_instances & all_split_instances))
13111 class _GroupQuery(_QueryBase):
13112 FIELDS = query.GROUP_FIELDS
13114 def ExpandNames(self, lu):
13115 lu.needed_locks = {}
13117 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13118 self._cluster = lu.cfg.GetClusterInfo()
13119 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13122 self.wanted = [name_to_uuid[name]
13123 for name in utils.NiceSort(name_to_uuid.keys())]
13125 # Accept names to be either names or UUIDs.
13128 all_uuid = frozenset(self._all_groups.keys())
13130 for name in self.names:
13131 if name in all_uuid:
13132 self.wanted.append(name)
13133 elif name in name_to_uuid:
13134 self.wanted.append(name_to_uuid[name])
13136 missing.append(name)
13139 raise errors.OpPrereqError("Some groups do not exist: %s" %
13140 utils.CommaJoin(missing),
13141 errors.ECODE_NOENT)
13143 def DeclareLocks(self, lu, level):
13146 def _GetQueryData(self, lu):
13147 """Computes the list of node groups and their attributes.
13150 do_nodes = query.GQ_NODE in self.requested_data
13151 do_instances = query.GQ_INST in self.requested_data
13153 group_to_nodes = None
13154 group_to_instances = None
13156 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13157 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13158 # latter GetAllInstancesInfo() is not enough, for we have to go through
13159 # instance->node. Hence, we will need to process nodes even if we only need
13160 # instance information.
13161 if do_nodes or do_instances:
13162 all_nodes = lu.cfg.GetAllNodesInfo()
13163 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13166 for node in all_nodes.values():
13167 if node.group in group_to_nodes:
13168 group_to_nodes[node.group].append(node.name)
13169 node_to_group[node.name] = node.group
13172 all_instances = lu.cfg.GetAllInstancesInfo()
13173 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13175 for instance in all_instances.values():
13176 node = instance.primary_node
13177 if node in node_to_group:
13178 group_to_instances[node_to_group[node]].append(instance.name)
13181 # Do not pass on node information if it was not requested.
13182 group_to_nodes = None
13184 return query.GroupQueryData(self._cluster,
13185 [self._all_groups[uuid]
13186 for uuid in self.wanted],
13187 group_to_nodes, group_to_instances)
13190 class LUGroupQuery(NoHooksLU):
13191 """Logical unit for querying node groups.
13196 def CheckArguments(self):
13197 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13198 self.op.output_fields, False)
13200 def ExpandNames(self):
13201 self.gq.ExpandNames(self)
13203 def DeclareLocks(self, level):
13204 self.gq.DeclareLocks(self, level)
13206 def Exec(self, feedback_fn):
13207 return self.gq.OldStyleQuery(self)
13210 class LUGroupSetParams(LogicalUnit):
13211 """Modifies the parameters of a node group.
13214 HPATH = "group-modify"
13215 HTYPE = constants.HTYPE_GROUP
13218 def CheckArguments(self):
13221 self.op.diskparams,
13222 self.op.alloc_policy,
13224 self.op.disk_state,
13228 if all_changes.count(None) == len(all_changes):
13229 raise errors.OpPrereqError("Please pass at least one modification",
13230 errors.ECODE_INVAL)
13232 def ExpandNames(self):
13233 # This raises errors.OpPrereqError on its own:
13234 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13236 self.needed_locks = {
13237 locking.LEVEL_NODEGROUP: [self.group_uuid],
13240 def CheckPrereq(self):
13241 """Check prerequisites.
13244 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13246 if self.group is None:
13247 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13248 (self.op.group_name, self.group_uuid))
13250 if self.op.ndparams:
13251 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13252 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13253 self.new_ndparams = new_ndparams
13255 if self.op.diskparams:
13256 self.new_diskparams = dict()
13257 for templ in constants.DISK_TEMPLATES:
13258 if templ not in self.op.diskparams:
13259 self.op.diskparams[templ] = {}
13260 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13261 self.op.diskparams[templ])
13262 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13263 self.new_diskparams[templ] = new_templ_params
13265 if self.op.hv_state:
13266 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13267 self.group.hv_state_static)
13269 if self.op.disk_state:
13270 self.new_disk_state = \
13271 _MergeAndVerifyDiskState(self.op.disk_state,
13272 self.group.disk_state_static)
13274 if self.op.ipolicy:
13276 for key, value in self.op.ipolicy.iteritems():
13277 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13280 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13281 self.new_ipolicy = g_ipolicy
13282 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13284 def BuildHooksEnv(self):
13285 """Build hooks env.
13289 "GROUP_NAME": self.op.group_name,
13290 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13293 def BuildHooksNodes(self):
13294 """Build hooks nodes.
13297 mn = self.cfg.GetMasterNode()
13298 return ([mn], [mn])
13300 def Exec(self, feedback_fn):
13301 """Modifies the node group.
13306 if self.op.ndparams:
13307 self.group.ndparams = self.new_ndparams
13308 result.append(("ndparams", str(self.group.ndparams)))
13310 if self.op.diskparams:
13311 self.group.diskparams = self.new_diskparams
13312 result.append(("diskparams", str(self.group.diskparams)))
13314 if self.op.alloc_policy:
13315 self.group.alloc_policy = self.op.alloc_policy
13317 if self.op.hv_state:
13318 self.group.hv_state_static = self.new_hv_state
13320 if self.op.disk_state:
13321 self.group.disk_state_static = self.new_disk_state
13323 if self.op.ipolicy:
13324 self.group.ipolicy = self.new_ipolicy
13326 self.cfg.Update(self.group, feedback_fn)
13330 class LUGroupRemove(LogicalUnit):
13331 HPATH = "group-remove"
13332 HTYPE = constants.HTYPE_GROUP
13335 def ExpandNames(self):
13336 # This will raises errors.OpPrereqError on its own:
13337 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13338 self.needed_locks = {
13339 locking.LEVEL_NODEGROUP: [self.group_uuid],
13342 def CheckPrereq(self):
13343 """Check prerequisites.
13345 This checks that the given group name exists as a node group, that is
13346 empty (i.e., contains no nodes), and that is not the last group of the
13350 # Verify that the group is empty.
13351 group_nodes = [node.name
13352 for node in self.cfg.GetAllNodesInfo().values()
13353 if node.group == self.group_uuid]
13356 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13358 (self.op.group_name,
13359 utils.CommaJoin(utils.NiceSort(group_nodes))),
13360 errors.ECODE_STATE)
13362 # Verify the cluster would not be left group-less.
13363 if len(self.cfg.GetNodeGroupList()) == 1:
13364 raise errors.OpPrereqError("Group '%s' is the only group,"
13365 " cannot be removed" %
13366 self.op.group_name,
13367 errors.ECODE_STATE)
13369 def BuildHooksEnv(self):
13370 """Build hooks env.
13374 "GROUP_NAME": self.op.group_name,
13377 def BuildHooksNodes(self):
13378 """Build hooks nodes.
13381 mn = self.cfg.GetMasterNode()
13382 return ([mn], [mn])
13384 def Exec(self, feedback_fn):
13385 """Remove the node group.
13389 self.cfg.RemoveNodeGroup(self.group_uuid)
13390 except errors.ConfigurationError:
13391 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13392 (self.op.group_name, self.group_uuid))
13394 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13397 class LUGroupRename(LogicalUnit):
13398 HPATH = "group-rename"
13399 HTYPE = constants.HTYPE_GROUP
13402 def ExpandNames(self):
13403 # This raises errors.OpPrereqError on its own:
13404 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13406 self.needed_locks = {
13407 locking.LEVEL_NODEGROUP: [self.group_uuid],
13410 def CheckPrereq(self):
13411 """Check prerequisites.
13413 Ensures requested new name is not yet used.
13417 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13418 except errors.OpPrereqError:
13421 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13422 " node group (UUID: %s)" %
13423 (self.op.new_name, new_name_uuid),
13424 errors.ECODE_EXISTS)
13426 def BuildHooksEnv(self):
13427 """Build hooks env.
13431 "OLD_NAME": self.op.group_name,
13432 "NEW_NAME": self.op.new_name,
13435 def BuildHooksNodes(self):
13436 """Build hooks nodes.
13439 mn = self.cfg.GetMasterNode()
13441 all_nodes = self.cfg.GetAllNodesInfo()
13442 all_nodes.pop(mn, None)
13445 run_nodes.extend(node.name for node in all_nodes.values()
13446 if node.group == self.group_uuid)
13448 return (run_nodes, run_nodes)
13450 def Exec(self, feedback_fn):
13451 """Rename the node group.
13454 group = self.cfg.GetNodeGroup(self.group_uuid)
13457 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13458 (self.op.group_name, self.group_uuid))
13460 group.name = self.op.new_name
13461 self.cfg.Update(group, feedback_fn)
13463 return self.op.new_name
13466 class LUGroupEvacuate(LogicalUnit):
13467 HPATH = "group-evacuate"
13468 HTYPE = constants.HTYPE_GROUP
13471 def ExpandNames(self):
13472 # This raises errors.OpPrereqError on its own:
13473 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13475 if self.op.target_groups:
13476 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13477 self.op.target_groups)
13479 self.req_target_uuids = []
13481 if self.group_uuid in self.req_target_uuids:
13482 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13483 " as a target group (targets are %s)" %
13485 utils.CommaJoin(self.req_target_uuids)),
13486 errors.ECODE_INVAL)
13488 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13490 self.share_locks = _ShareAll()
13491 self.needed_locks = {
13492 locking.LEVEL_INSTANCE: [],
13493 locking.LEVEL_NODEGROUP: [],
13494 locking.LEVEL_NODE: [],
13497 def DeclareLocks(self, level):
13498 if level == locking.LEVEL_INSTANCE:
13499 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13501 # Lock instances optimistically, needs verification once node and group
13502 # locks have been acquired
13503 self.needed_locks[locking.LEVEL_INSTANCE] = \
13504 self.cfg.GetNodeGroupInstances(self.group_uuid)
13506 elif level == locking.LEVEL_NODEGROUP:
13507 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13509 if self.req_target_uuids:
13510 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13512 # Lock all groups used by instances optimistically; this requires going
13513 # via the node before it's locked, requiring verification later on
13514 lock_groups.update(group_uuid
13515 for instance_name in
13516 self.owned_locks(locking.LEVEL_INSTANCE)
13518 self.cfg.GetInstanceNodeGroups(instance_name))
13520 # No target groups, need to lock all of them
13521 lock_groups = locking.ALL_SET
13523 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13525 elif level == locking.LEVEL_NODE:
13526 # This will only lock the nodes in the group to be evacuated which
13527 # contain actual instances
13528 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13529 self._LockInstancesNodes()
13531 # Lock all nodes in group to be evacuated and target groups
13532 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13533 assert self.group_uuid in owned_groups
13534 member_nodes = [node_name
13535 for group in owned_groups
13536 for node_name in self.cfg.GetNodeGroup(group).members]
13537 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13539 def CheckPrereq(self):
13540 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13541 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13542 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13544 assert owned_groups.issuperset(self.req_target_uuids)
13545 assert self.group_uuid in owned_groups
13547 # Check if locked instances are still correct
13548 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13550 # Get instance information
13551 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13553 # Check if node groups for locked instances are still correct
13554 for instance_name in owned_instances:
13555 inst = self.instances[instance_name]
13556 assert owned_nodes.issuperset(inst.all_nodes), \
13557 "Instance %s's nodes changed while we kept the lock" % instance_name
13559 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13562 assert self.group_uuid in inst_groups, \
13563 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13565 if self.req_target_uuids:
13566 # User requested specific target groups
13567 self.target_uuids = self.req_target_uuids
13569 # All groups except the one to be evacuated are potential targets
13570 self.target_uuids = [group_uuid for group_uuid in owned_groups
13571 if group_uuid != self.group_uuid]
13573 if not self.target_uuids:
13574 raise errors.OpPrereqError("There are no possible target groups",
13575 errors.ECODE_INVAL)
13577 def BuildHooksEnv(self):
13578 """Build hooks env.
13582 "GROUP_NAME": self.op.group_name,
13583 "TARGET_GROUPS": " ".join(self.target_uuids),
13586 def BuildHooksNodes(self):
13587 """Build hooks nodes.
13590 mn = self.cfg.GetMasterNode()
13592 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13594 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13596 return (run_nodes, run_nodes)
13598 def Exec(self, feedback_fn):
13599 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13601 assert self.group_uuid not in self.target_uuids
13603 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13604 instances=instances, target_groups=self.target_uuids)
13606 ial.Run(self.op.iallocator)
13608 if not ial.success:
13609 raise errors.OpPrereqError("Can't compute group evacuation using"
13610 " iallocator '%s': %s" %
13611 (self.op.iallocator, ial.info),
13612 errors.ECODE_NORES)
13614 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13616 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13617 len(jobs), self.op.group_name)
13619 return ResultWithJobs(jobs)
13622 class TagsLU(NoHooksLU): # pylint: disable=W0223
13623 """Generic tags LU.
13625 This is an abstract class which is the parent of all the other tags LUs.
13628 def ExpandNames(self):
13629 self.group_uuid = None
13630 self.needed_locks = {}
13631 if self.op.kind == constants.TAG_NODE:
13632 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13633 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13634 elif self.op.kind == constants.TAG_INSTANCE:
13635 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13636 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13637 elif self.op.kind == constants.TAG_NODEGROUP:
13638 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13640 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13641 # not possible to acquire the BGL based on opcode parameters)
13643 def CheckPrereq(self):
13644 """Check prerequisites.
13647 if self.op.kind == constants.TAG_CLUSTER:
13648 self.target = self.cfg.GetClusterInfo()
13649 elif self.op.kind == constants.TAG_NODE:
13650 self.target = self.cfg.GetNodeInfo(self.op.name)
13651 elif self.op.kind == constants.TAG_INSTANCE:
13652 self.target = self.cfg.GetInstanceInfo(self.op.name)
13653 elif self.op.kind == constants.TAG_NODEGROUP:
13654 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13656 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13657 str(self.op.kind), errors.ECODE_INVAL)
13660 class LUTagsGet(TagsLU):
13661 """Returns the tags of a given object.
13666 def ExpandNames(self):
13667 TagsLU.ExpandNames(self)
13669 # Share locks as this is only a read operation
13670 self.share_locks = _ShareAll()
13672 def Exec(self, feedback_fn):
13673 """Returns the tag list.
13676 return list(self.target.GetTags())
13679 class LUTagsSearch(NoHooksLU):
13680 """Searches the tags for a given pattern.
13685 def ExpandNames(self):
13686 self.needed_locks = {}
13688 def CheckPrereq(self):
13689 """Check prerequisites.
13691 This checks the pattern passed for validity by compiling it.
13695 self.re = re.compile(self.op.pattern)
13696 except re.error, err:
13697 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13698 (self.op.pattern, err), errors.ECODE_INVAL)
13700 def Exec(self, feedback_fn):
13701 """Returns the tag list.
13705 tgts = [("/cluster", cfg.GetClusterInfo())]
13706 ilist = cfg.GetAllInstancesInfo().values()
13707 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13708 nlist = cfg.GetAllNodesInfo().values()
13709 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13710 tgts.extend(("/nodegroup/%s" % n.name, n)
13711 for n in cfg.GetAllNodeGroupsInfo().values())
13713 for path, target in tgts:
13714 for tag in target.GetTags():
13715 if self.re.search(tag):
13716 results.append((path, tag))
13720 class LUTagsSet(TagsLU):
13721 """Sets a tag on a given object.
13726 def CheckPrereq(self):
13727 """Check prerequisites.
13729 This checks the type and length of the tag name and value.
13732 TagsLU.CheckPrereq(self)
13733 for tag in self.op.tags:
13734 objects.TaggableObject.ValidateTag(tag)
13736 def Exec(self, feedback_fn):
13741 for tag in self.op.tags:
13742 self.target.AddTag(tag)
13743 except errors.TagError, err:
13744 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13745 self.cfg.Update(self.target, feedback_fn)
13748 class LUTagsDel(TagsLU):
13749 """Delete a list of tags from a given object.
13754 def CheckPrereq(self):
13755 """Check prerequisites.
13757 This checks that we have the given tag.
13760 TagsLU.CheckPrereq(self)
13761 for tag in self.op.tags:
13762 objects.TaggableObject.ValidateTag(tag)
13763 del_tags = frozenset(self.op.tags)
13764 cur_tags = self.target.GetTags()
13766 diff_tags = del_tags - cur_tags
13768 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13769 raise errors.OpPrereqError("Tag(s) %s not found" %
13770 (utils.CommaJoin(diff_names), ),
13771 errors.ECODE_NOENT)
13773 def Exec(self, feedback_fn):
13774 """Remove the tag from the object.
13777 for tag in self.op.tags:
13778 self.target.RemoveTag(tag)
13779 self.cfg.Update(self.target, feedback_fn)
13782 class LUTestDelay(NoHooksLU):
13783 """Sleep for a specified amount of time.
13785 This LU sleeps on the master and/or nodes for a specified amount of
13791 def ExpandNames(self):
13792 """Expand names and set required locks.
13794 This expands the node list, if any.
13797 self.needed_locks = {}
13798 if self.op.on_nodes:
13799 # _GetWantedNodes can be used here, but is not always appropriate to use
13800 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13801 # more information.
13802 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13803 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13805 def _TestDelay(self):
13806 """Do the actual sleep.
13809 if self.op.on_master:
13810 if not utils.TestDelay(self.op.duration):
13811 raise errors.OpExecError("Error during master delay test")
13812 if self.op.on_nodes:
13813 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13814 for node, node_result in result.items():
13815 node_result.Raise("Failure during rpc call to node %s" % node)
13817 def Exec(self, feedback_fn):
13818 """Execute the test delay opcode, with the wanted repetitions.
13821 if self.op.repeat == 0:
13824 top_value = self.op.repeat - 1
13825 for i in range(self.op.repeat):
13826 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13830 class LUTestJqueue(NoHooksLU):
13831 """Utility LU to test some aspects of the job queue.
13836 # Must be lower than default timeout for WaitForJobChange to see whether it
13837 # notices changed jobs
13838 _CLIENT_CONNECT_TIMEOUT = 20.0
13839 _CLIENT_CONFIRM_TIMEOUT = 60.0
13842 def _NotifyUsingSocket(cls, cb, errcls):
13843 """Opens a Unix socket and waits for another program to connect.
13846 @param cb: Callback to send socket name to client
13847 @type errcls: class
13848 @param errcls: Exception class to use for errors
13851 # Using a temporary directory as there's no easy way to create temporary
13852 # sockets without writing a custom loop around tempfile.mktemp and
13854 tmpdir = tempfile.mkdtemp()
13856 tmpsock = utils.PathJoin(tmpdir, "sock")
13858 logging.debug("Creating temporary socket at %s", tmpsock)
13859 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13864 # Send details to client
13867 # Wait for client to connect before continuing
13868 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13870 (conn, _) = sock.accept()
13871 except socket.error, err:
13872 raise errcls("Client didn't connect in time (%s)" % err)
13876 # Remove as soon as client is connected
13877 shutil.rmtree(tmpdir)
13879 # Wait for client to close
13882 # pylint: disable=E1101
13883 # Instance of '_socketobject' has no ... member
13884 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13886 except socket.error, err:
13887 raise errcls("Client failed to confirm notification (%s)" % err)
13891 def _SendNotification(self, test, arg, sockname):
13892 """Sends a notification to the client.
13895 @param test: Test name
13896 @param arg: Test argument (depends on test)
13897 @type sockname: string
13898 @param sockname: Socket path
13901 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13903 def _Notify(self, prereq, test, arg):
13904 """Notifies the client of a test.
13907 @param prereq: Whether this is a prereq-phase test
13909 @param test: Test name
13910 @param arg: Test argument (depends on test)
13914 errcls = errors.OpPrereqError
13916 errcls = errors.OpExecError
13918 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13922 def CheckArguments(self):
13923 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13924 self.expandnames_calls = 0
13926 def ExpandNames(self):
13927 checkargs_calls = getattr(self, "checkargs_calls", 0)
13928 if checkargs_calls < 1:
13929 raise errors.ProgrammerError("CheckArguments was not called")
13931 self.expandnames_calls += 1
13933 if self.op.notify_waitlock:
13934 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13936 self.LogInfo("Expanding names")
13938 # Get lock on master node (just to get a lock, not for a particular reason)
13939 self.needed_locks = {
13940 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13943 def Exec(self, feedback_fn):
13944 if self.expandnames_calls < 1:
13945 raise errors.ProgrammerError("ExpandNames was not called")
13947 if self.op.notify_exec:
13948 self._Notify(False, constants.JQT_EXEC, None)
13950 self.LogInfo("Executing")
13952 if self.op.log_messages:
13953 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13954 for idx, msg in enumerate(self.op.log_messages):
13955 self.LogInfo("Sending log message %s", idx + 1)
13956 feedback_fn(constants.JQT_MSGPREFIX + msg)
13957 # Report how many test messages have been sent
13958 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13961 raise errors.OpExecError("Opcode failure was requested")
13966 class IAllocator(object):
13967 """IAllocator framework.
13969 An IAllocator instance has three sets of attributes:
13970 - cfg that is needed to query the cluster
13971 - input data (all members of the _KEYS class attribute are required)
13972 - four buffer attributes (in|out_data|text), that represent the
13973 input (to the external script) in text and data structure format,
13974 and the output from it, again in two formats
13975 - the result variables from the script (success, info, nodes) for
13979 # pylint: disable=R0902
13980 # lots of instance attributes
13982 def __init__(self, cfg, rpc_runner, mode, **kwargs):
13984 self.rpc = rpc_runner
13985 # init buffer variables
13986 self.in_text = self.out_text = self.in_data = self.out_data = None
13987 # init all input fields so that pylint is happy
13989 self.memory = self.disks = self.disk_template = None
13990 self.os = self.tags = self.nics = self.vcpus = None
13991 self.hypervisor = None
13992 self.relocate_from = None
13994 self.instances = None
13995 self.evac_mode = None
13996 self.target_groups = []
13998 self.required_nodes = None
13999 # init result fields
14000 self.success = self.info = self.result = None
14003 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14005 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14006 " IAllocator" % self.mode)
14008 keyset = [n for (n, _) in keydata]
14011 if key not in keyset:
14012 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14013 " IAllocator" % key)
14014 setattr(self, key, kwargs[key])
14017 if key not in kwargs:
14018 raise errors.ProgrammerError("Missing input parameter '%s' to"
14019 " IAllocator" % key)
14020 self._BuildInputData(compat.partial(fn, self), keydata)
14022 def _ComputeClusterData(self):
14023 """Compute the generic allocator input data.
14025 This is the data that is independent of the actual operation.
14029 cluster_info = cfg.GetClusterInfo()
14032 "version": constants.IALLOCATOR_VERSION,
14033 "cluster_name": cfg.GetClusterName(),
14034 "cluster_tags": list(cluster_info.GetTags()),
14035 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14036 # we don't have job IDs
14038 ninfo = cfg.GetAllNodesInfo()
14039 iinfo = cfg.GetAllInstancesInfo().values()
14040 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14043 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14045 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14046 hypervisor_name = self.hypervisor
14047 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14048 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14050 hypervisor_name = cluster_info.primary_hypervisor
14052 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14055 self.rpc.call_all_instances_info(node_list,
14056 cluster_info.enabled_hypervisors)
14058 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14060 config_ndata = self._ComputeBasicNodeData(ninfo)
14061 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14062 i_list, config_ndata)
14063 assert len(data["nodes"]) == len(ninfo), \
14064 "Incomplete node data computed"
14066 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14068 self.in_data = data
14071 def _ComputeNodeGroupData(cfg):
14072 """Compute node groups data.
14075 ng = dict((guuid, {
14076 "name": gdata.name,
14077 "alloc_policy": gdata.alloc_policy,
14079 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14084 def _ComputeBasicNodeData(node_cfg):
14085 """Compute global node data.
14088 @returns: a dict of name: (node dict, node config)
14091 # fill in static (config-based) values
14092 node_results = dict((ninfo.name, {
14093 "tags": list(ninfo.GetTags()),
14094 "primary_ip": ninfo.primary_ip,
14095 "secondary_ip": ninfo.secondary_ip,
14096 "offline": ninfo.offline,
14097 "drained": ninfo.drained,
14098 "master_candidate": ninfo.master_candidate,
14099 "group": ninfo.group,
14100 "master_capable": ninfo.master_capable,
14101 "vm_capable": ninfo.vm_capable,
14103 for ninfo in node_cfg.values())
14105 return node_results
14108 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14110 """Compute global node data.
14112 @param node_results: the basic node structures as filled from the config
14115 #TODO(dynmem): compute the right data on MAX and MIN memory
14116 # make a copy of the current dict
14117 node_results = dict(node_results)
14118 for nname, nresult in node_data.items():
14119 assert nname in node_results, "Missing basic data for node %s" % nname
14120 ninfo = node_cfg[nname]
14122 if not (ninfo.offline or ninfo.drained):
14123 nresult.Raise("Can't get data for node %s" % nname)
14124 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14126 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14128 for attr in ["memory_total", "memory_free", "memory_dom0",
14129 "vg_size", "vg_free", "cpu_total"]:
14130 if attr not in remote_info:
14131 raise errors.OpExecError("Node '%s' didn't return attribute"
14132 " '%s'" % (nname, attr))
14133 if not isinstance(remote_info[attr], int):
14134 raise errors.OpExecError("Node '%s' returned invalid value"
14136 (nname, attr, remote_info[attr]))
14137 # compute memory used by primary instances
14138 i_p_mem = i_p_up_mem = 0
14139 for iinfo, beinfo in i_list:
14140 if iinfo.primary_node == nname:
14141 i_p_mem += beinfo[constants.BE_MAXMEM]
14142 if iinfo.name not in node_iinfo[nname].payload:
14145 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14146 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14147 remote_info["memory_free"] -= max(0, i_mem_diff)
14149 if iinfo.admin_state == constants.ADMINST_UP:
14150 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14152 # compute memory used by instances
14154 "total_memory": remote_info["memory_total"],
14155 "reserved_memory": remote_info["memory_dom0"],
14156 "free_memory": remote_info["memory_free"],
14157 "total_disk": remote_info["vg_size"],
14158 "free_disk": remote_info["vg_free"],
14159 "total_cpus": remote_info["cpu_total"],
14160 "i_pri_memory": i_p_mem,
14161 "i_pri_up_memory": i_p_up_mem,
14163 pnr_dyn.update(node_results[nname])
14164 node_results[nname] = pnr_dyn
14166 return node_results
14169 def _ComputeInstanceData(cluster_info, i_list):
14170 """Compute global instance data.
14174 for iinfo, beinfo in i_list:
14176 for nic in iinfo.nics:
14177 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14181 "mode": filled_params[constants.NIC_MODE],
14182 "link": filled_params[constants.NIC_LINK],
14184 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14185 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14186 nic_data.append(nic_dict)
14188 "tags": list(iinfo.GetTags()),
14189 "admin_state": iinfo.admin_state,
14190 "vcpus": beinfo[constants.BE_VCPUS],
14191 "memory": beinfo[constants.BE_MAXMEM],
14193 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14195 "disks": [{constants.IDISK_SIZE: dsk.size,
14196 constants.IDISK_MODE: dsk.mode}
14197 for dsk in iinfo.disks],
14198 "disk_template": iinfo.disk_template,
14199 "hypervisor": iinfo.hypervisor,
14201 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14203 instance_data[iinfo.name] = pir
14205 return instance_data
14207 def _AddNewInstance(self):
14208 """Add new instance data to allocator structure.
14210 This in combination with _AllocatorGetClusterData will create the
14211 correct structure needed as input for the allocator.
14213 The checks for the completeness of the opcode must have already been
14217 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14219 if self.disk_template in constants.DTS_INT_MIRROR:
14220 self.required_nodes = 2
14222 self.required_nodes = 1
14226 "disk_template": self.disk_template,
14229 "vcpus": self.vcpus,
14230 "memory": self.memory,
14231 "disks": self.disks,
14232 "disk_space_total": disk_space,
14234 "required_nodes": self.required_nodes,
14235 "hypervisor": self.hypervisor,
14240 def _AddRelocateInstance(self):
14241 """Add relocate instance data to allocator structure.
14243 This in combination with _IAllocatorGetClusterData will create the
14244 correct structure needed as input for the allocator.
14246 The checks for the completeness of the opcode must have already been
14250 instance = self.cfg.GetInstanceInfo(self.name)
14251 if instance is None:
14252 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14253 " IAllocator" % self.name)
14255 if instance.disk_template not in constants.DTS_MIRRORED:
14256 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14257 errors.ECODE_INVAL)
14259 if instance.disk_template in constants.DTS_INT_MIRROR and \
14260 len(instance.secondary_nodes) != 1:
14261 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14262 errors.ECODE_STATE)
14264 self.required_nodes = 1
14265 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14266 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14270 "disk_space_total": disk_space,
14271 "required_nodes": self.required_nodes,
14272 "relocate_from": self.relocate_from,
14276 def _AddNodeEvacuate(self):
14277 """Get data for node-evacuate requests.
14281 "instances": self.instances,
14282 "evac_mode": self.evac_mode,
14285 def _AddChangeGroup(self):
14286 """Get data for node-evacuate requests.
14290 "instances": self.instances,
14291 "target_groups": self.target_groups,
14294 def _BuildInputData(self, fn, keydata):
14295 """Build input data structures.
14298 self._ComputeClusterData()
14301 request["type"] = self.mode
14302 for keyname, keytype in keydata:
14303 if keyname not in request:
14304 raise errors.ProgrammerError("Request parameter %s is missing" %
14306 val = request[keyname]
14307 if not keytype(val):
14308 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14309 " validation, value %s, expected"
14310 " type %s" % (keyname, val, keytype))
14311 self.in_data["request"] = request
14313 self.in_text = serializer.Dump(self.in_data)
14315 _STRING_LIST = ht.TListOf(ht.TString)
14316 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14317 # pylint: disable=E1101
14318 # Class '...' has no 'OP_ID' member
14319 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14320 opcodes.OpInstanceMigrate.OP_ID,
14321 opcodes.OpInstanceReplaceDisks.OP_ID])
14325 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14326 ht.TItems([ht.TNonEmptyString,
14327 ht.TNonEmptyString,
14328 ht.TListOf(ht.TNonEmptyString),
14331 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14332 ht.TItems([ht.TNonEmptyString,
14335 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14336 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14339 constants.IALLOCATOR_MODE_ALLOC:
14342 ("name", ht.TString),
14343 ("memory", ht.TInt),
14344 ("disks", ht.TListOf(ht.TDict)),
14345 ("disk_template", ht.TString),
14346 ("os", ht.TString),
14347 ("tags", _STRING_LIST),
14348 ("nics", ht.TListOf(ht.TDict)),
14349 ("vcpus", ht.TInt),
14350 ("hypervisor", ht.TString),
14352 constants.IALLOCATOR_MODE_RELOC:
14353 (_AddRelocateInstance,
14354 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14356 constants.IALLOCATOR_MODE_NODE_EVAC:
14357 (_AddNodeEvacuate, [
14358 ("instances", _STRING_LIST),
14359 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14361 constants.IALLOCATOR_MODE_CHG_GROUP:
14362 (_AddChangeGroup, [
14363 ("instances", _STRING_LIST),
14364 ("target_groups", _STRING_LIST),
14368 def Run(self, name, validate=True, call_fn=None):
14369 """Run an instance allocator and return the results.
14372 if call_fn is None:
14373 call_fn = self.rpc.call_iallocator_runner
14375 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14376 result.Raise("Failure while running the iallocator script")
14378 self.out_text = result.payload
14380 self._ValidateResult()
14382 def _ValidateResult(self):
14383 """Process the allocator results.
14385 This will process and if successful save the result in
14386 self.out_data and the other parameters.
14390 rdict = serializer.Load(self.out_text)
14391 except Exception, err:
14392 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14394 if not isinstance(rdict, dict):
14395 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14397 # TODO: remove backwards compatiblity in later versions
14398 if "nodes" in rdict and "result" not in rdict:
14399 rdict["result"] = rdict["nodes"]
14402 for key in "success", "info", "result":
14403 if key not in rdict:
14404 raise errors.OpExecError("Can't parse iallocator results:"
14405 " missing key '%s'" % key)
14406 setattr(self, key, rdict[key])
14408 if not self._result_check(self.result):
14409 raise errors.OpExecError("Iallocator returned invalid result,"
14410 " expected %s, got %s" %
14411 (self._result_check, self.result),
14412 errors.ECODE_INVAL)
14414 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14415 assert self.relocate_from is not None
14416 assert self.required_nodes == 1
14418 node2group = dict((name, ndata["group"])
14419 for (name, ndata) in self.in_data["nodes"].items())
14421 fn = compat.partial(self._NodesToGroups, node2group,
14422 self.in_data["nodegroups"])
14424 instance = self.cfg.GetInstanceInfo(self.name)
14425 request_groups = fn(self.relocate_from + [instance.primary_node])
14426 result_groups = fn(rdict["result"] + [instance.primary_node])
14428 if self.success and not set(result_groups).issubset(request_groups):
14429 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14430 " differ from original groups (%s)" %
14431 (utils.CommaJoin(result_groups),
14432 utils.CommaJoin(request_groups)))
14434 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14435 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14437 self.out_data = rdict
14440 def _NodesToGroups(node2group, groups, nodes):
14441 """Returns a list of unique group names for a list of nodes.
14443 @type node2group: dict
14444 @param node2group: Map from node name to group UUID
14446 @param groups: Group information
14448 @param nodes: Node names
14455 group_uuid = node2group[node]
14457 # Ignore unknown node
14461 group = groups[group_uuid]
14463 # Can't find group, let's use UUID
14464 group_name = group_uuid
14466 group_name = group["name"]
14468 result.add(group_name)
14470 return sorted(result)
14473 class LUTestAllocator(NoHooksLU):
14474 """Run allocator tests.
14476 This LU runs the allocator tests
14479 def CheckPrereq(self):
14480 """Check prerequisites.
14482 This checks the opcode parameters depending on the director and mode test.
14485 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14486 for attr in ["memory", "disks", "disk_template",
14487 "os", "tags", "nics", "vcpus"]:
14488 if not hasattr(self.op, attr):
14489 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14490 attr, errors.ECODE_INVAL)
14491 iname = self.cfg.ExpandInstanceName(self.op.name)
14492 if iname is not None:
14493 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14494 iname, errors.ECODE_EXISTS)
14495 if not isinstance(self.op.nics, list):
14496 raise errors.OpPrereqError("Invalid parameter 'nics'",
14497 errors.ECODE_INVAL)
14498 if not isinstance(self.op.disks, list):
14499 raise errors.OpPrereqError("Invalid parameter 'disks'",
14500 errors.ECODE_INVAL)
14501 for row in self.op.disks:
14502 if (not isinstance(row, dict) or
14503 constants.IDISK_SIZE not in row or
14504 not isinstance(row[constants.IDISK_SIZE], int) or
14505 constants.IDISK_MODE not in row or
14506 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14507 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14508 " parameter", errors.ECODE_INVAL)
14509 if self.op.hypervisor is None:
14510 self.op.hypervisor = self.cfg.GetHypervisorType()
14511 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14512 fname = _ExpandInstanceName(self.cfg, self.op.name)
14513 self.op.name = fname
14514 self.relocate_from = \
14515 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14516 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14517 constants.IALLOCATOR_MODE_NODE_EVAC):
14518 if not self.op.instances:
14519 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14520 self.op.instances = _GetWantedInstances(self, self.op.instances)
14522 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14523 self.op.mode, errors.ECODE_INVAL)
14525 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14526 if self.op.allocator is None:
14527 raise errors.OpPrereqError("Missing allocator name",
14528 errors.ECODE_INVAL)
14529 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14530 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14531 self.op.direction, errors.ECODE_INVAL)
14533 def Exec(self, feedback_fn):
14534 """Run the allocator test.
14537 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14538 ial = IAllocator(self.cfg, self.rpc,
14541 memory=self.op.memory,
14542 disks=self.op.disks,
14543 disk_template=self.op.disk_template,
14547 vcpus=self.op.vcpus,
14548 hypervisor=self.op.hypervisor,
14550 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14551 ial = IAllocator(self.cfg, self.rpc,
14554 relocate_from=list(self.relocate_from),
14556 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14557 ial = IAllocator(self.cfg, self.rpc,
14559 instances=self.op.instances,
14560 target_groups=self.op.target_groups)
14561 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14562 ial = IAllocator(self.cfg, self.rpc,
14564 instances=self.op.instances,
14565 evac_mode=self.op.evac_mode)
14567 raise errors.ProgrammerError("Uncatched mode %s in"
14568 " LUTestAllocator.Exec", self.op.mode)
14570 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14571 result = ial.in_text
14573 ial.Run(self.op.allocator, validate=False)
14574 result = ial.out_text
14578 #: Query type implementations
14580 constants.QR_INSTANCE: _InstanceQuery,
14581 constants.QR_NODE: _NodeQuery,
14582 constants.QR_GROUP: _GroupQuery,
14583 constants.QR_OS: _OsQuery,
14586 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14589 def _GetQueryImplementation(name):
14590 """Returns the implemtnation for a query type.
14592 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14596 return _QUERY_IMPL[name]
14598 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14599 errors.ECODE_INVAL)