4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1040 nic_count, disk_sizes,
1041 _check_spec_fn=_CheckMinMaxSpecs):
1042 """Verifies ipolicy against provided specs.
1045 @param ipolicy: The ipolicy
1047 @param mem_size: The memory size
1048 @type cpu_count: int
1049 @param cpu_count: Used cpu cores
1050 @type disk_count: int
1051 @param disk_count: Number of disks used
1052 @type nic_count: int
1053 @param nic_count: Number of nics used
1054 @type disk_sizes: list of ints
1055 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1056 @param _check_spec_fn: The checking function (unittest only)
1057 @return: A list of violations, or an empty list of no violations are found
1060 assert disk_count == len(disk_sizes)
1063 (constants.ISPEC_MEM_SIZE, mem_size),
1064 (constants.ISPEC_CPU_COUNT, cpu_count),
1065 (constants.ISPEC_DISK_COUNT, disk_count),
1066 (constants.ISPEC_NIC_COUNT, nic_count),
1067 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1070 (_check_spec_fn(name, ipolicy, value)
1071 for (name, value) in test_settings))
1074 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1075 _compute_fn=_ComputeIPolicySpecViolation):
1076 """Compute if instance meets the specs of ipolicy.
1079 @param ipolicy: The ipolicy to verify against
1080 @type instance: L{objects.Instance}
1081 @param instance: The instance to verify
1082 @param _compute_fn: The function to verify ipolicy (unittest only)
1083 @see: L{_ComputeIPolicySpecViolation}
1086 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1087 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1088 disk_count = len(instance.disks)
1089 disk_sizes = [disk.size for disk in instance.disks]
1090 nic_count = len(instance.nics)
1092 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1096 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1097 _compute_fn=_ComputeIPolicySpecViolation):
1098 """Compute if instance specs meets the specs of ipolicy.
1101 @param ipolicy: The ipolicy to verify against
1102 @param instance_spec: dict
1103 @param instance_spec: The instance spec to verify
1104 @param _compute_fn: The function to verify ipolicy (unittest only)
1105 @see: L{_ComputeIPolicySpecViolation}
1108 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1109 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1110 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1111 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1112 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1114 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1118 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1120 _compute_fn=_ComputeIPolicyInstanceViolation):
1121 """Compute if instance meets the specs of the new target group.
1123 @param ipolicy: The ipolicy to verify
1124 @param instance: The instance object to verify
1125 @param current_group: The current group of the instance
1126 @param target_group: The new group of the instance
1127 @param _compute_fn: The function to verify ipolicy (unittest only)
1128 @see: L{_ComputeIPolicySpecViolation}
1131 if current_group == target_group:
1134 return _compute_fn(ipolicy, instance)
1137 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1138 _compute_fn=_ComputeIPolicyNodeViolation):
1139 """Checks that the target node is correct in terms of instance policy.
1141 @param ipolicy: The ipolicy to verify
1142 @param instance: The instance object to verify
1143 @param node: The new node to relocate
1144 @param ignore: Ignore violations of the ipolicy
1145 @param _compute_fn: The function to verify ipolicy (unittest only)
1146 @see: L{_ComputeIPolicySpecViolation}
1149 res = _compute_fn(ipolicy, instance, instance.primary_node.group, node.group)
1152 msg = ("Instance does not meet target node group's (%s) instance"
1153 " policy: %s") % (node.group, utils.CommaJoin(res))
1157 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1160 def _ExpandItemName(fn, name, kind):
1161 """Expand an item name.
1163 @param fn: the function to use for expansion
1164 @param name: requested item name
1165 @param kind: text description ('Node' or 'Instance')
1166 @return: the resolved (full) name
1167 @raise errors.OpPrereqError: if the item is not found
1170 full_name = fn(name)
1171 if full_name is None:
1172 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1177 def _ExpandNodeName(cfg, name):
1178 """Wrapper over L{_ExpandItemName} for nodes."""
1179 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1182 def _ExpandInstanceName(cfg, name):
1183 """Wrapper over L{_ExpandItemName} for instance."""
1184 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1187 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1188 minmem, maxmem, vcpus, nics, disk_template, disks,
1189 bep, hvp, hypervisor_name, tags):
1190 """Builds instance related env variables for hooks
1192 This builds the hook environment from individual variables.
1195 @param name: the name of the instance
1196 @type primary_node: string
1197 @param primary_node: the name of the instance's primary node
1198 @type secondary_nodes: list
1199 @param secondary_nodes: list of secondary nodes as strings
1200 @type os_type: string
1201 @param os_type: the name of the instance's OS
1202 @type status: string
1203 @param status: the desired status of the instance
1204 @type minmem: string
1205 @param minmem: the minimum memory size of the instance
1206 @type maxmem: string
1207 @param maxmem: the maximum memory size of the instance
1209 @param vcpus: the count of VCPUs the instance has
1211 @param nics: list of tuples (ip, mac, mode, link) representing
1212 the NICs the instance has
1213 @type disk_template: string
1214 @param disk_template: the disk template of the instance
1216 @param disks: the list of (size, mode) pairs
1218 @param bep: the backend parameters for the instance
1220 @param hvp: the hypervisor parameters for the instance
1221 @type hypervisor_name: string
1222 @param hypervisor_name: the hypervisor for the instance
1224 @param tags: list of instance tags as strings
1226 @return: the hook environment for this instance
1231 "INSTANCE_NAME": name,
1232 "INSTANCE_PRIMARY": primary_node,
1233 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1234 "INSTANCE_OS_TYPE": os_type,
1235 "INSTANCE_STATUS": status,
1236 "INSTANCE_MINMEM": minmem,
1237 "INSTANCE_MAXMEM": maxmem,
1238 # TODO(2.7) remove deprecated "memory" value
1239 "INSTANCE_MEMORY": maxmem,
1240 "INSTANCE_VCPUS": vcpus,
1241 "INSTANCE_DISK_TEMPLATE": disk_template,
1242 "INSTANCE_HYPERVISOR": hypervisor_name,
1245 nic_count = len(nics)
1246 for idx, (ip, mac, mode, link) in enumerate(nics):
1249 env["INSTANCE_NIC%d_IP" % idx] = ip
1250 env["INSTANCE_NIC%d_MAC" % idx] = mac
1251 env["INSTANCE_NIC%d_MODE" % idx] = mode
1252 env["INSTANCE_NIC%d_LINK" % idx] = link
1253 if mode == constants.NIC_MODE_BRIDGED:
1254 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1258 env["INSTANCE_NIC_COUNT"] = nic_count
1261 disk_count = len(disks)
1262 for idx, (size, mode) in enumerate(disks):
1263 env["INSTANCE_DISK%d_SIZE" % idx] = size
1264 env["INSTANCE_DISK%d_MODE" % idx] = mode
1268 env["INSTANCE_DISK_COUNT"] = disk_count
1273 env["INSTANCE_TAGS"] = " ".join(tags)
1275 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1276 for key, value in source.items():
1277 env["INSTANCE_%s_%s" % (kind, key)] = value
1282 def _NICListToTuple(lu, nics):
1283 """Build a list of nic information tuples.
1285 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1286 value in LUInstanceQueryData.
1288 @type lu: L{LogicalUnit}
1289 @param lu: the logical unit on whose behalf we execute
1290 @type nics: list of L{objects.NIC}
1291 @param nics: list of nics to convert to hooks tuples
1295 cluster = lu.cfg.GetClusterInfo()
1299 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1300 mode = filled_params[constants.NIC_MODE]
1301 link = filled_params[constants.NIC_LINK]
1302 hooks_nics.append((ip, mac, mode, link))
1306 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1307 """Builds instance related env variables for hooks from an object.
1309 @type lu: L{LogicalUnit}
1310 @param lu: the logical unit on whose behalf we execute
1311 @type instance: L{objects.Instance}
1312 @param instance: the instance for which we should build the
1314 @type override: dict
1315 @param override: dictionary with key/values that will override
1318 @return: the hook environment dictionary
1321 cluster = lu.cfg.GetClusterInfo()
1322 bep = cluster.FillBE(instance)
1323 hvp = cluster.FillHV(instance)
1325 "name": instance.name,
1326 "primary_node": instance.primary_node,
1327 "secondary_nodes": instance.secondary_nodes,
1328 "os_type": instance.os,
1329 "status": instance.admin_state,
1330 "maxmem": bep[constants.BE_MAXMEM],
1331 "minmem": bep[constants.BE_MINMEM],
1332 "vcpus": bep[constants.BE_VCPUS],
1333 "nics": _NICListToTuple(lu, instance.nics),
1334 "disk_template": instance.disk_template,
1335 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1338 "hypervisor_name": instance.hypervisor,
1339 "tags": instance.tags,
1342 args.update(override)
1343 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1346 def _AdjustCandidatePool(lu, exceptions):
1347 """Adjust the candidate pool after node operations.
1350 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1352 lu.LogInfo("Promoted nodes to master candidate role: %s",
1353 utils.CommaJoin(node.name for node in mod_list))
1354 for name in mod_list:
1355 lu.context.ReaddNode(name)
1356 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1358 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1362 def _DecideSelfPromotion(lu, exceptions=None):
1363 """Decide whether I should promote myself as a master candidate.
1366 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1367 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1368 # the new node will increase mc_max with one, so:
1369 mc_should = min(mc_should + 1, cp_size)
1370 return mc_now < mc_should
1373 def _CalculateGroupIPolicy(cluster, group):
1374 """Calculate instance policy for group.
1377 return cluster.SimpleFillIPolicy(group.ipolicy)
1380 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1381 """Check that the brigdes needed by a list of nics exist.
1384 cluster = lu.cfg.GetClusterInfo()
1385 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1386 brlist = [params[constants.NIC_LINK] for params in paramslist
1387 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1389 result = lu.rpc.call_bridges_exist(target_node, brlist)
1390 result.Raise("Error checking bridges on destination node '%s'" %
1391 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1394 def _CheckInstanceBridgesExist(lu, instance, node=None):
1395 """Check that the brigdes needed by an instance exist.
1399 node = instance.primary_node
1400 _CheckNicsBridgesExist(lu, instance.nics, node)
1403 def _CheckOSVariant(os_obj, name):
1404 """Check whether an OS name conforms to the os variants specification.
1406 @type os_obj: L{objects.OS}
1407 @param os_obj: OS object to check
1409 @param name: OS name passed by the user, to check for validity
1412 variant = objects.OS.GetVariant(name)
1413 if not os_obj.supported_variants:
1415 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1416 " passed)" % (os_obj.name, variant),
1420 raise errors.OpPrereqError("OS name must include a variant",
1423 if variant not in os_obj.supported_variants:
1424 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1427 def _GetNodeInstancesInner(cfg, fn):
1428 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1431 def _GetNodeInstances(cfg, node_name):
1432 """Returns a list of all primary and secondary instances on a node.
1436 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1439 def _GetNodePrimaryInstances(cfg, node_name):
1440 """Returns primary instances on a node.
1443 return _GetNodeInstancesInner(cfg,
1444 lambda inst: node_name == inst.primary_node)
1447 def _GetNodeSecondaryInstances(cfg, node_name):
1448 """Returns secondary instances on a node.
1451 return _GetNodeInstancesInner(cfg,
1452 lambda inst: node_name in inst.secondary_nodes)
1455 def _GetStorageTypeArgs(cfg, storage_type):
1456 """Returns the arguments for a storage type.
1459 # Special case for file storage
1460 if storage_type == constants.ST_FILE:
1461 # storage.FileStorage wants a list of storage directories
1462 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1467 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1470 for dev in instance.disks:
1471 cfg.SetDiskID(dev, node_name)
1473 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1474 result.Raise("Failed to get disk status from node %s" % node_name,
1475 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1477 for idx, bdev_status in enumerate(result.payload):
1478 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1484 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1485 """Check the sanity of iallocator and node arguments and use the
1486 cluster-wide iallocator if appropriate.
1488 Check that at most one of (iallocator, node) is specified. If none is
1489 specified, then the LU's opcode's iallocator slot is filled with the
1490 cluster-wide default iallocator.
1492 @type iallocator_slot: string
1493 @param iallocator_slot: the name of the opcode iallocator slot
1494 @type node_slot: string
1495 @param node_slot: the name of the opcode target node slot
1498 node = getattr(lu.op, node_slot, None)
1499 iallocator = getattr(lu.op, iallocator_slot, None)
1501 if node is not None and iallocator is not None:
1502 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1504 elif node is None and iallocator is None:
1505 default_iallocator = lu.cfg.GetDefaultIAllocator()
1506 if default_iallocator:
1507 setattr(lu.op, iallocator_slot, default_iallocator)
1509 raise errors.OpPrereqError("No iallocator or node given and no"
1510 " cluster-wide default iallocator found;"
1511 " please specify either an iallocator or a"
1512 " node, or set a cluster-wide default"
1516 def _GetDefaultIAllocator(cfg, iallocator):
1517 """Decides on which iallocator to use.
1519 @type cfg: L{config.ConfigWriter}
1520 @param cfg: Cluster configuration object
1521 @type iallocator: string or None
1522 @param iallocator: Iallocator specified in opcode
1524 @return: Iallocator name
1528 # Use default iallocator
1529 iallocator = cfg.GetDefaultIAllocator()
1532 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1533 " opcode nor as a cluster-wide default",
1539 class LUClusterPostInit(LogicalUnit):
1540 """Logical unit for running hooks after cluster initialization.
1543 HPATH = "cluster-init"
1544 HTYPE = constants.HTYPE_CLUSTER
1546 def BuildHooksEnv(self):
1551 "OP_TARGET": self.cfg.GetClusterName(),
1554 def BuildHooksNodes(self):
1555 """Build hooks nodes.
1558 return ([], [self.cfg.GetMasterNode()])
1560 def Exec(self, feedback_fn):
1567 class LUClusterDestroy(LogicalUnit):
1568 """Logical unit for destroying the cluster.
1571 HPATH = "cluster-destroy"
1572 HTYPE = constants.HTYPE_CLUSTER
1574 def BuildHooksEnv(self):
1579 "OP_TARGET": self.cfg.GetClusterName(),
1582 def BuildHooksNodes(self):
1583 """Build hooks nodes.
1588 def CheckPrereq(self):
1589 """Check prerequisites.
1591 This checks whether the cluster is empty.
1593 Any errors are signaled by raising errors.OpPrereqError.
1596 master = self.cfg.GetMasterNode()
1598 nodelist = self.cfg.GetNodeList()
1599 if len(nodelist) != 1 or nodelist[0] != master:
1600 raise errors.OpPrereqError("There are still %d node(s) in"
1601 " this cluster." % (len(nodelist) - 1),
1603 instancelist = self.cfg.GetInstanceList()
1605 raise errors.OpPrereqError("There are still %d instance(s) in"
1606 " this cluster." % len(instancelist),
1609 def Exec(self, feedback_fn):
1610 """Destroys the cluster.
1613 master_params = self.cfg.GetMasterNetworkParameters()
1615 # Run post hooks on master node before it's removed
1616 _RunPostHook(self, master_params.name)
1618 ems = self.cfg.GetUseExternalMipScript()
1619 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1622 self.LogWarning("Error disabling the master IP address: %s",
1625 return master_params.name
1628 def _VerifyCertificate(filename):
1629 """Verifies a certificate for L{LUClusterVerifyConfig}.
1631 @type filename: string
1632 @param filename: Path to PEM file
1636 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1637 utils.ReadFile(filename))
1638 except Exception, err: # pylint: disable=W0703
1639 return (LUClusterVerifyConfig.ETYPE_ERROR,
1640 "Failed to load X509 certificate %s: %s" % (filename, err))
1643 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1644 constants.SSL_CERT_EXPIRATION_ERROR)
1647 fnamemsg = "While verifying %s: %s" % (filename, msg)
1652 return (None, fnamemsg)
1653 elif errcode == utils.CERT_WARNING:
1654 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1655 elif errcode == utils.CERT_ERROR:
1656 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1658 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1661 def _GetAllHypervisorParameters(cluster, instances):
1662 """Compute the set of all hypervisor parameters.
1664 @type cluster: L{objects.Cluster}
1665 @param cluster: the cluster object
1666 @param instances: list of L{objects.Instance}
1667 @param instances: additional instances from which to obtain parameters
1668 @rtype: list of (origin, hypervisor, parameters)
1669 @return: a list with all parameters found, indicating the hypervisor they
1670 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1675 for hv_name in cluster.enabled_hypervisors:
1676 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1678 for os_name, os_hvp in cluster.os_hvp.items():
1679 for hv_name, hv_params in os_hvp.items():
1681 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1682 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1684 # TODO: collapse identical parameter values in a single one
1685 for instance in instances:
1686 if instance.hvparams:
1687 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1688 cluster.FillHV(instance)))
1693 class _VerifyErrors(object):
1694 """Mix-in for cluster/group verify LUs.
1696 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1697 self.op and self._feedback_fn to be available.)
1701 ETYPE_FIELD = "code"
1702 ETYPE_ERROR = "ERROR"
1703 ETYPE_WARNING = "WARNING"
1705 def _Error(self, ecode, item, msg, *args, **kwargs):
1706 """Format an error message.
1708 Based on the opcode's error_codes parameter, either format a
1709 parseable error code, or a simpler error string.
1711 This must be called only from Exec and functions called from Exec.
1714 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1715 itype, etxt, _ = ecode
1716 # first complete the msg
1719 # then format the whole message
1720 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1721 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1727 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1728 # and finally report it via the feedback_fn
1729 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1731 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1732 """Log an error message if the passed condition is True.
1736 or self.op.debug_simulate_errors) # pylint: disable=E1101
1738 # If the error code is in the list of ignored errors, demote the error to a
1740 (_, etxt, _) = ecode
1741 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1742 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1745 self._Error(ecode, *args, **kwargs)
1747 # do not mark the operation as failed for WARN cases only
1748 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1749 self.bad = self.bad or cond
1752 class LUClusterVerify(NoHooksLU):
1753 """Submits all jobs necessary to verify the cluster.
1758 def ExpandNames(self):
1759 self.needed_locks = {}
1761 def Exec(self, feedback_fn):
1764 if self.op.group_name:
1765 groups = [self.op.group_name]
1766 depends_fn = lambda: None
1768 groups = self.cfg.GetNodeGroupList()
1770 # Verify global configuration
1772 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1775 # Always depend on global verification
1776 depends_fn = lambda: [(-len(jobs), [])]
1778 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1779 ignore_errors=self.op.ignore_errors,
1780 depends=depends_fn())]
1781 for group in groups)
1783 # Fix up all parameters
1784 for op in itertools.chain(*jobs): # pylint: disable=W0142
1785 op.debug_simulate_errors = self.op.debug_simulate_errors
1786 op.verbose = self.op.verbose
1787 op.error_codes = self.op.error_codes
1789 op.skip_checks = self.op.skip_checks
1790 except AttributeError:
1791 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1793 return ResultWithJobs(jobs)
1796 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1797 """Verifies the cluster config.
1802 def _VerifyHVP(self, hvp_data):
1803 """Verifies locally the syntax of the hypervisor parameters.
1806 for item, hv_name, hv_params in hvp_data:
1807 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1810 hv_class = hypervisor.GetHypervisor(hv_name)
1811 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1812 hv_class.CheckParameterSyntax(hv_params)
1813 except errors.GenericError, err:
1814 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1816 def ExpandNames(self):
1817 # Information can be safely retrieved as the BGL is acquired in exclusive
1819 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1820 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1821 self.all_node_info = self.cfg.GetAllNodesInfo()
1822 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1823 self.needed_locks = {}
1825 def Exec(self, feedback_fn):
1826 """Verify integrity of cluster, performing various test on nodes.
1830 self._feedback_fn = feedback_fn
1832 feedback_fn("* Verifying cluster config")
1834 for msg in self.cfg.VerifyConfig():
1835 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1837 feedback_fn("* Verifying cluster certificate files")
1839 for cert_filename in constants.ALL_CERT_FILES:
1840 (errcode, msg) = _VerifyCertificate(cert_filename)
1841 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1843 feedback_fn("* Verifying hypervisor parameters")
1845 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1846 self.all_inst_info.values()))
1848 feedback_fn("* Verifying all nodes belong to an existing group")
1850 # We do this verification here because, should this bogus circumstance
1851 # occur, it would never be caught by VerifyGroup, which only acts on
1852 # nodes/instances reachable from existing node groups.
1854 dangling_nodes = set(node.name for node in self.all_node_info.values()
1855 if node.group not in self.all_group_info)
1857 dangling_instances = {}
1858 no_node_instances = []
1860 for inst in self.all_inst_info.values():
1861 if inst.primary_node in dangling_nodes:
1862 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1863 elif inst.primary_node not in self.all_node_info:
1864 no_node_instances.append(inst.name)
1869 utils.CommaJoin(dangling_instances.get(node.name,
1871 for node in dangling_nodes]
1873 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1875 "the following nodes (and their instances) belong to a non"
1876 " existing group: %s", utils.CommaJoin(pretty_dangling))
1878 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1880 "the following instances have a non-existing primary-node:"
1881 " %s", utils.CommaJoin(no_node_instances))
1886 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1887 """Verifies the status of a node group.
1890 HPATH = "cluster-verify"
1891 HTYPE = constants.HTYPE_CLUSTER
1894 _HOOKS_INDENT_RE = re.compile("^", re.M)
1896 class NodeImage(object):
1897 """A class representing the logical and physical status of a node.
1900 @ivar name: the node name to which this object refers
1901 @ivar volumes: a structure as returned from
1902 L{ganeti.backend.GetVolumeList} (runtime)
1903 @ivar instances: a list of running instances (runtime)
1904 @ivar pinst: list of configured primary instances (config)
1905 @ivar sinst: list of configured secondary instances (config)
1906 @ivar sbp: dictionary of {primary-node: list of instances} for all
1907 instances for which this node is secondary (config)
1908 @ivar mfree: free memory, as reported by hypervisor (runtime)
1909 @ivar dfree: free disk, as reported by the node (runtime)
1910 @ivar offline: the offline status (config)
1911 @type rpc_fail: boolean
1912 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1913 not whether the individual keys were correct) (runtime)
1914 @type lvm_fail: boolean
1915 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1916 @type hyp_fail: boolean
1917 @ivar hyp_fail: whether the RPC call didn't return the instance list
1918 @type ghost: boolean
1919 @ivar ghost: whether this is a known node or not (config)
1920 @type os_fail: boolean
1921 @ivar os_fail: whether the RPC call didn't return valid OS data
1923 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1924 @type vm_capable: boolean
1925 @ivar vm_capable: whether the node can host instances
1928 def __init__(self, offline=False, name=None, vm_capable=True):
1937 self.offline = offline
1938 self.vm_capable = vm_capable
1939 self.rpc_fail = False
1940 self.lvm_fail = False
1941 self.hyp_fail = False
1943 self.os_fail = False
1946 def ExpandNames(self):
1947 # This raises errors.OpPrereqError on its own:
1948 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1950 # Get instances in node group; this is unsafe and needs verification later
1951 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1953 self.needed_locks = {
1954 locking.LEVEL_INSTANCE: inst_names,
1955 locking.LEVEL_NODEGROUP: [self.group_uuid],
1956 locking.LEVEL_NODE: [],
1959 self.share_locks = _ShareAll()
1961 def DeclareLocks(self, level):
1962 if level == locking.LEVEL_NODE:
1963 # Get members of node group; this is unsafe and needs verification later
1964 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1966 all_inst_info = self.cfg.GetAllInstancesInfo()
1968 # In Exec(), we warn about mirrored instances that have primary and
1969 # secondary living in separate node groups. To fully verify that
1970 # volumes for these instances are healthy, we will need to do an
1971 # extra call to their secondaries. We ensure here those nodes will
1973 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1974 # Important: access only the instances whose lock is owned
1975 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1976 nodes.update(all_inst_info[inst].secondary_nodes)
1978 self.needed_locks[locking.LEVEL_NODE] = nodes
1980 def CheckPrereq(self):
1981 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1982 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1984 group_nodes = set(self.group_info.members)
1985 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1988 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1990 unlocked_instances = \
1991 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1994 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1995 utils.CommaJoin(unlocked_nodes))
1997 if unlocked_instances:
1998 raise errors.OpPrereqError("Missing lock for instances: %s" %
1999 utils.CommaJoin(unlocked_instances))
2001 self.all_node_info = self.cfg.GetAllNodesInfo()
2002 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2004 self.my_node_names = utils.NiceSort(group_nodes)
2005 self.my_inst_names = utils.NiceSort(group_instances)
2007 self.my_node_info = dict((name, self.all_node_info[name])
2008 for name in self.my_node_names)
2010 self.my_inst_info = dict((name, self.all_inst_info[name])
2011 for name in self.my_inst_names)
2013 # We detect here the nodes that will need the extra RPC calls for verifying
2014 # split LV volumes; they should be locked.
2015 extra_lv_nodes = set()
2017 for inst in self.my_inst_info.values():
2018 if inst.disk_template in constants.DTS_INT_MIRROR:
2019 group = self.my_node_info[inst.primary_node].group
2020 for nname in inst.secondary_nodes:
2021 if self.all_node_info[nname].group != group:
2022 extra_lv_nodes.add(nname)
2024 unlocked_lv_nodes = \
2025 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2027 if unlocked_lv_nodes:
2028 raise errors.OpPrereqError("these nodes could be locked: %s" %
2029 utils.CommaJoin(unlocked_lv_nodes))
2030 self.extra_lv_nodes = list(extra_lv_nodes)
2032 def _VerifyNode(self, ninfo, nresult):
2033 """Perform some basic validation on data returned from a node.
2035 - check the result data structure is well formed and has all the
2037 - check ganeti version
2039 @type ninfo: L{objects.Node}
2040 @param ninfo: the node to check
2041 @param nresult: the results from the node
2043 @return: whether overall this call was successful (and we can expect
2044 reasonable values in the respose)
2048 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2050 # main result, nresult should be a non-empty dict
2051 test = not nresult or not isinstance(nresult, dict)
2052 _ErrorIf(test, constants.CV_ENODERPC, node,
2053 "unable to verify node: no data returned")
2057 # compares ganeti version
2058 local_version = constants.PROTOCOL_VERSION
2059 remote_version = nresult.get("version", None)
2060 test = not (remote_version and
2061 isinstance(remote_version, (list, tuple)) and
2062 len(remote_version) == 2)
2063 _ErrorIf(test, constants.CV_ENODERPC, node,
2064 "connection to node returned invalid data")
2068 test = local_version != remote_version[0]
2069 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2070 "incompatible protocol versions: master %s,"
2071 " node %s", local_version, remote_version[0])
2075 # node seems compatible, we can actually try to look into its results
2077 # full package version
2078 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2079 constants.CV_ENODEVERSION, node,
2080 "software version mismatch: master %s, node %s",
2081 constants.RELEASE_VERSION, remote_version[1],
2082 code=self.ETYPE_WARNING)
2084 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2085 if ninfo.vm_capable and isinstance(hyp_result, dict):
2086 for hv_name, hv_result in hyp_result.iteritems():
2087 test = hv_result is not None
2088 _ErrorIf(test, constants.CV_ENODEHV, node,
2089 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2091 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2092 if ninfo.vm_capable and isinstance(hvp_result, list):
2093 for item, hv_name, hv_result in hvp_result:
2094 _ErrorIf(True, constants.CV_ENODEHV, node,
2095 "hypervisor %s parameter verify failure (source %s): %s",
2096 hv_name, item, hv_result)
2098 test = nresult.get(constants.NV_NODESETUP,
2099 ["Missing NODESETUP results"])
2100 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2105 def _VerifyNodeTime(self, ninfo, nresult,
2106 nvinfo_starttime, nvinfo_endtime):
2107 """Check the node time.
2109 @type ninfo: L{objects.Node}
2110 @param ninfo: the node to check
2111 @param nresult: the remote results for the node
2112 @param nvinfo_starttime: the start time of the RPC call
2113 @param nvinfo_endtime: the end time of the RPC call
2117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119 ntime = nresult.get(constants.NV_TIME, None)
2121 ntime_merged = utils.MergeTime(ntime)
2122 except (ValueError, TypeError):
2123 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2126 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2127 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2128 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2129 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2133 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2134 "Node time diverges by at least %s from master node time",
2137 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2138 """Check the node LVM results.
2140 @type ninfo: L{objects.Node}
2141 @param ninfo: the node to check
2142 @param nresult: the remote results for the node
2143 @param vg_name: the configured VG name
2150 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2152 # checks vg existence and size > 20G
2153 vglist = nresult.get(constants.NV_VGLIST, None)
2155 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2157 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2158 constants.MIN_VG_SIZE)
2159 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2162 pvlist = nresult.get(constants.NV_PVLIST, None)
2163 test = pvlist is None
2164 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2166 # check that ':' is not present in PV names, since it's a
2167 # special character for lvcreate (denotes the range of PEs to
2169 for _, pvname, owner_vg in pvlist:
2170 test = ":" in pvname
2171 _ErrorIf(test, constants.CV_ENODELVM, node,
2172 "Invalid character ':' in PV '%s' of VG '%s'",
2175 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2176 """Check the node bridges.
2178 @type ninfo: L{objects.Node}
2179 @param ninfo: the node to check
2180 @param nresult: the remote results for the node
2181 @param bridges: the expected list of bridges
2188 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2190 missing = nresult.get(constants.NV_BRIDGES, None)
2191 test = not isinstance(missing, list)
2192 _ErrorIf(test, constants.CV_ENODENET, node,
2193 "did not return valid bridge information")
2195 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2196 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2198 def _VerifyNodeUserScripts(self, ninfo, nresult):
2199 """Check the results of user scripts presence and executability on the node
2201 @type ninfo: L{objects.Node}
2202 @param ninfo: the node to check
2203 @param nresult: the remote results for the node
2208 test = not constants.NV_USERSCRIPTS in nresult
2209 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2210 "did not return user scripts information")
2212 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2214 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2215 "user scripts not present or not executable: %s" %
2216 utils.CommaJoin(sorted(broken_scripts)))
2218 def _VerifyNodeNetwork(self, ninfo, nresult):
2219 """Check the node network connectivity results.
2221 @type ninfo: L{objects.Node}
2222 @param ninfo: the node to check
2223 @param nresult: the remote results for the node
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 test = constants.NV_NODELIST not in nresult
2230 _ErrorIf(test, constants.CV_ENODESSH, node,
2231 "node hasn't returned node ssh connectivity data")
2233 if nresult[constants.NV_NODELIST]:
2234 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2235 _ErrorIf(True, constants.CV_ENODESSH, node,
2236 "ssh communication with node '%s': %s", a_node, a_msg)
2238 test = constants.NV_NODENETTEST not in nresult
2239 _ErrorIf(test, constants.CV_ENODENET, node,
2240 "node hasn't returned node tcp connectivity data")
2242 if nresult[constants.NV_NODENETTEST]:
2243 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2245 _ErrorIf(True, constants.CV_ENODENET, node,
2246 "tcp communication with node '%s': %s",
2247 anode, nresult[constants.NV_NODENETTEST][anode])
2249 test = constants.NV_MASTERIP not in nresult
2250 _ErrorIf(test, constants.CV_ENODENET, node,
2251 "node hasn't returned node master IP reachability data")
2253 if not nresult[constants.NV_MASTERIP]:
2254 if node == self.master_node:
2255 msg = "the master node cannot reach the master IP (not configured?)"
2257 msg = "cannot reach the master IP"
2258 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2260 def _VerifyInstancePolicy(self, instance):
2261 """Verify instance specs against instance policy set on node group level.
2265 cluster = self.cfg.GetClusterInfo()
2266 full_beparams = cluster.FillBE(instance)
2267 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2269 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2270 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2271 disk_count = len(instance.disks)
2272 disk_sizes = [disk.size for disk in instance.disks]
2273 nic_count = len(instance.nics)
2276 (constants.ISPEC_MEM_SIZE, mem_size),
2277 (constants.ISPEC_CPU_COUNT, cpu_count),
2278 (constants.ISPEC_DISK_COUNT, disk_count),
2279 (constants.ISPEC_NIC_COUNT, nic_count),
2280 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2282 for (name, value) in test_settings:
2283 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2284 self._ErrorIf(test_result is not None,
2285 constants.CV_EINSTANCEPOLICY, instance.name,
2288 def _VerifyInstance(self, instance, instanceconfig, node_image,
2290 """Verify an instance.
2292 This function checks to see if the required block devices are
2293 available on the instance's node.
2296 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2297 node_current = instanceconfig.primary_node
2299 node_vol_should = {}
2300 instanceconfig.MapLVsByNode(node_vol_should)
2302 self._VerifyInstancePolicy(instanceconfig)
2304 for node in node_vol_should:
2305 n_img = node_image[node]
2306 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2307 # ignore missing volumes on offline or broken nodes
2309 for volume in node_vol_should[node]:
2310 test = volume not in n_img.volumes
2311 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2312 "volume %s missing on node %s", volume, node)
2314 if instanceconfig.admin_state == constants.ADMINST_UP:
2315 pri_img = node_image[node_current]
2316 test = instance not in pri_img.instances and not pri_img.offline
2317 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2318 "instance not running on its primary node %s",
2321 diskdata = [(nname, success, status, idx)
2322 for (nname, disks) in diskstatus.items()
2323 for idx, (success, status) in enumerate(disks)]
2325 for nname, success, bdev_status, idx in diskdata:
2326 # the 'ghost node' construction in Exec() ensures that we have a
2328 snode = node_image[nname]
2329 bad_snode = snode.ghost or snode.offline
2330 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2331 not success and not bad_snode,
2332 constants.CV_EINSTANCEFAULTYDISK, instance,
2333 "couldn't retrieve status for disk/%s on %s: %s",
2334 idx, nname, bdev_status)
2335 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2336 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2337 constants.CV_EINSTANCEFAULTYDISK, instance,
2338 "disk/%s on %s is faulty", idx, nname)
2340 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2341 """Verify if there are any unknown volumes in the cluster.
2343 The .os, .swap and backup volumes are ignored. All other volumes are
2344 reported as unknown.
2346 @type reserved: L{ganeti.utils.FieldSet}
2347 @param reserved: a FieldSet of reserved volume names
2350 for node, n_img in node_image.items():
2351 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2352 # skip non-healthy nodes
2354 for volume in n_img.volumes:
2355 test = ((node not in node_vol_should or
2356 volume not in node_vol_should[node]) and
2357 not reserved.Matches(volume))
2358 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2359 "volume %s is unknown", volume)
2361 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2362 """Verify N+1 Memory Resilience.
2364 Check that if one single node dies we can still start all the
2365 instances it was primary for.
2368 cluster_info = self.cfg.GetClusterInfo()
2369 for node, n_img in node_image.items():
2370 # This code checks that every node which is now listed as
2371 # secondary has enough memory to host all instances it is
2372 # supposed to should a single other node in the cluster fail.
2373 # FIXME: not ready for failover to an arbitrary node
2374 # FIXME: does not support file-backed instances
2375 # WARNING: we currently take into account down instances as well
2376 # as up ones, considering that even if they're down someone
2377 # might want to start them even in the event of a node failure.
2379 # we're skipping offline nodes from the N+1 warning, since
2380 # most likely we don't have good memory infromation from them;
2381 # we already list instances living on such nodes, and that's
2384 #TODO(dynmem): use MINMEM for checking
2385 #TODO(dynmem): also consider ballooning out other instances
2386 for prinode, instances in n_img.sbp.items():
2388 for instance in instances:
2389 bep = cluster_info.FillBE(instance_cfg[instance])
2390 if bep[constants.BE_AUTO_BALANCE]:
2391 needed_mem += bep[constants.BE_MAXMEM]
2392 test = n_img.mfree < needed_mem
2393 self._ErrorIf(test, constants.CV_ENODEN1, node,
2394 "not enough memory to accomodate instance failovers"
2395 " should node %s fail (%dMiB needed, %dMiB available)",
2396 prinode, needed_mem, n_img.mfree)
2399 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2400 (files_all, files_opt, files_mc, files_vm)):
2401 """Verifies file checksums collected from all nodes.
2403 @param errorif: Callback for reporting errors
2404 @param nodeinfo: List of L{objects.Node} objects
2405 @param master_node: Name of master node
2406 @param all_nvinfo: RPC results
2409 # Define functions determining which nodes to consider for a file
2412 (files_mc, lambda node: (node.master_candidate or
2413 node.name == master_node)),
2414 (files_vm, lambda node: node.vm_capable),
2417 # Build mapping from filename to list of nodes which should have the file
2419 for (files, fn) in files2nodefn:
2421 filenodes = nodeinfo
2423 filenodes = filter(fn, nodeinfo)
2424 nodefiles.update((filename,
2425 frozenset(map(operator.attrgetter("name"), filenodes)))
2426 for filename in files)
2428 assert set(nodefiles) == (files_all | files_mc | files_vm)
2430 fileinfo = dict((filename, {}) for filename in nodefiles)
2431 ignore_nodes = set()
2433 for node in nodeinfo:
2435 ignore_nodes.add(node.name)
2438 nresult = all_nvinfo[node.name]
2440 if nresult.fail_msg or not nresult.payload:
2443 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2445 test = not (node_files and isinstance(node_files, dict))
2446 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2447 "Node did not return file checksum data")
2449 ignore_nodes.add(node.name)
2452 # Build per-checksum mapping from filename to nodes having it
2453 for (filename, checksum) in node_files.items():
2454 assert filename in nodefiles
2455 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2457 for (filename, checksums) in fileinfo.items():
2458 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2460 # Nodes having the file
2461 with_file = frozenset(node_name
2462 for nodes in fileinfo[filename].values()
2463 for node_name in nodes) - ignore_nodes
2465 expected_nodes = nodefiles[filename] - ignore_nodes
2467 # Nodes missing file
2468 missing_file = expected_nodes - with_file
2470 if filename in files_opt:
2472 errorif(missing_file and missing_file != expected_nodes,
2473 constants.CV_ECLUSTERFILECHECK, None,
2474 "File %s is optional, but it must exist on all or no"
2475 " nodes (not found on %s)",
2476 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2478 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2479 "File %s is missing from node(s) %s", filename,
2480 utils.CommaJoin(utils.NiceSort(missing_file)))
2482 # Warn if a node has a file it shouldn't
2483 unexpected = with_file - expected_nodes
2485 constants.CV_ECLUSTERFILECHECK, None,
2486 "File %s should not exist on node(s) %s",
2487 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2489 # See if there are multiple versions of the file
2490 test = len(checksums) > 1
2492 variants = ["variant %s on %s" %
2493 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2494 for (idx, (checksum, nodes)) in
2495 enumerate(sorted(checksums.items()))]
2499 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2500 "File %s found with %s different checksums (%s)",
2501 filename, len(checksums), "; ".join(variants))
2503 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2505 """Verifies and the node DRBD status.
2507 @type ninfo: L{objects.Node}
2508 @param ninfo: the node to check
2509 @param nresult: the remote results for the node
2510 @param instanceinfo: the dict of instances
2511 @param drbd_helper: the configured DRBD usermode helper
2512 @param drbd_map: the DRBD map as returned by
2513 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2517 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2520 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2521 test = (helper_result == None)
2522 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2523 "no drbd usermode helper returned")
2525 status, payload = helper_result
2527 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2528 "drbd usermode helper check unsuccessful: %s", payload)
2529 test = status and (payload != drbd_helper)
2530 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2531 "wrong drbd usermode helper: %s", payload)
2533 # compute the DRBD minors
2535 for minor, instance in drbd_map[node].items():
2536 test = instance not in instanceinfo
2537 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2538 "ghost instance '%s' in temporary DRBD map", instance)
2539 # ghost instance should not be running, but otherwise we
2540 # don't give double warnings (both ghost instance and
2541 # unallocated minor in use)
2543 node_drbd[minor] = (instance, False)
2545 instance = instanceinfo[instance]
2546 node_drbd[minor] = (instance.name,
2547 instance.admin_state == constants.ADMINST_UP)
2549 # and now check them
2550 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2551 test = not isinstance(used_minors, (tuple, list))
2552 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2553 "cannot parse drbd status file: %s", str(used_minors))
2555 # we cannot check drbd status
2558 for minor, (iname, must_exist) in node_drbd.items():
2559 test = minor not in used_minors and must_exist
2560 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2561 "drbd minor %d of instance %s is not active", minor, iname)
2562 for minor in used_minors:
2563 test = minor not in node_drbd
2564 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2565 "unallocated drbd minor %d is in use", minor)
2567 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2568 """Builds the node OS structures.
2570 @type ninfo: L{objects.Node}
2571 @param ninfo: the node to check
2572 @param nresult: the remote results for the node
2573 @param nimg: the node image object
2577 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2579 remote_os = nresult.get(constants.NV_OSLIST, None)
2580 test = (not isinstance(remote_os, list) or
2581 not compat.all(isinstance(v, list) and len(v) == 7
2582 for v in remote_os))
2584 _ErrorIf(test, constants.CV_ENODEOS, node,
2585 "node hasn't returned valid OS data")
2594 for (name, os_path, status, diagnose,
2595 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2597 if name not in os_dict:
2600 # parameters is a list of lists instead of list of tuples due to
2601 # JSON lacking a real tuple type, fix it:
2602 parameters = [tuple(v) for v in parameters]
2603 os_dict[name].append((os_path, status, diagnose,
2604 set(variants), set(parameters), set(api_ver)))
2606 nimg.oslist = os_dict
2608 def _VerifyNodeOS(self, ninfo, nimg, base):
2609 """Verifies the node OS list.
2611 @type ninfo: L{objects.Node}
2612 @param ninfo: the node to check
2613 @param nimg: the node image object
2614 @param base: the 'template' node we match against (e.g. from the master)
2618 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2622 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2623 for os_name, os_data in nimg.oslist.items():
2624 assert os_data, "Empty OS status for OS %s?!" % os_name
2625 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2626 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2627 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2628 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2629 "OS '%s' has multiple entries (first one shadows the rest): %s",
2630 os_name, utils.CommaJoin([v[0] for v in os_data]))
2631 # comparisons with the 'base' image
2632 test = os_name not in base.oslist
2633 _ErrorIf(test, constants.CV_ENODEOS, node,
2634 "Extra OS %s not present on reference node (%s)",
2638 assert base.oslist[os_name], "Base node has empty OS status?"
2639 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2641 # base OS is invalid, skipping
2643 for kind, a, b in [("API version", f_api, b_api),
2644 ("variants list", f_var, b_var),
2645 ("parameters", beautify_params(f_param),
2646 beautify_params(b_param))]:
2647 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2648 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2649 kind, os_name, base.name,
2650 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2652 # check any missing OSes
2653 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2654 _ErrorIf(missing, constants.CV_ENODEOS, node,
2655 "OSes present on reference node %s but missing on this node: %s",
2656 base.name, utils.CommaJoin(missing))
2658 def _VerifyOob(self, ninfo, nresult):
2659 """Verifies out of band functionality of a node.
2661 @type ninfo: L{objects.Node}
2662 @param ninfo: the node to check
2663 @param nresult: the remote results for the node
2667 # We just have to verify the paths on master and/or master candidates
2668 # as the oob helper is invoked on the master
2669 if ((ninfo.master_candidate or ninfo.master_capable) and
2670 constants.NV_OOB_PATHS in nresult):
2671 for path_result in nresult[constants.NV_OOB_PATHS]:
2672 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2674 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2675 """Verifies and updates the node volume data.
2677 This function will update a L{NodeImage}'s internal structures
2678 with data from the remote call.
2680 @type ninfo: L{objects.Node}
2681 @param ninfo: the node to check
2682 @param nresult: the remote results for the node
2683 @param nimg: the node image object
2684 @param vg_name: the configured VG name
2688 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2690 nimg.lvm_fail = True
2691 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2694 elif isinstance(lvdata, basestring):
2695 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2696 utils.SafeEncode(lvdata))
2697 elif not isinstance(lvdata, dict):
2698 _ErrorIf(True, constants.CV_ENODELVM, node,
2699 "rpc call to node failed (lvlist)")
2701 nimg.volumes = lvdata
2702 nimg.lvm_fail = False
2704 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2705 """Verifies and updates the node instance list.
2707 If the listing was successful, then updates this node's instance
2708 list. Otherwise, it marks the RPC call as failed for the instance
2711 @type ninfo: L{objects.Node}
2712 @param ninfo: the node to check
2713 @param nresult: the remote results for the node
2714 @param nimg: the node image object
2717 idata = nresult.get(constants.NV_INSTANCELIST, None)
2718 test = not isinstance(idata, list)
2719 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2720 "rpc call to node failed (instancelist): %s",
2721 utils.SafeEncode(str(idata)))
2723 nimg.hyp_fail = True
2725 nimg.instances = idata
2727 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2728 """Verifies and computes a node information map
2730 @type ninfo: L{objects.Node}
2731 @param ninfo: the node to check
2732 @param nresult: the remote results for the node
2733 @param nimg: the node image object
2734 @param vg_name: the configured VG name
2738 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2740 # try to read free memory (from the hypervisor)
2741 hv_info = nresult.get(constants.NV_HVINFO, None)
2742 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2743 _ErrorIf(test, constants.CV_ENODEHV, node,
2744 "rpc call to node failed (hvinfo)")
2747 nimg.mfree = int(hv_info["memory_free"])
2748 except (ValueError, TypeError):
2749 _ErrorIf(True, constants.CV_ENODERPC, node,
2750 "node returned invalid nodeinfo, check hypervisor")
2752 # FIXME: devise a free space model for file based instances as well
2753 if vg_name is not None:
2754 test = (constants.NV_VGLIST not in nresult or
2755 vg_name not in nresult[constants.NV_VGLIST])
2756 _ErrorIf(test, constants.CV_ENODELVM, node,
2757 "node didn't return data for the volume group '%s'"
2758 " - it is either missing or broken", vg_name)
2761 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2762 except (ValueError, TypeError):
2763 _ErrorIf(True, constants.CV_ENODERPC, node,
2764 "node returned invalid LVM info, check LVM status")
2766 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2767 """Gets per-disk status information for all instances.
2769 @type nodelist: list of strings
2770 @param nodelist: Node names
2771 @type node_image: dict of (name, L{objects.Node})
2772 @param node_image: Node objects
2773 @type instanceinfo: dict of (name, L{objects.Instance})
2774 @param instanceinfo: Instance objects
2775 @rtype: {instance: {node: [(succes, payload)]}}
2776 @return: a dictionary of per-instance dictionaries with nodes as
2777 keys and disk information as values; the disk information is a
2778 list of tuples (success, payload)
2781 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2784 node_disks_devonly = {}
2785 diskless_instances = set()
2786 diskless = constants.DT_DISKLESS
2788 for nname in nodelist:
2789 node_instances = list(itertools.chain(node_image[nname].pinst,
2790 node_image[nname].sinst))
2791 diskless_instances.update(inst for inst in node_instances
2792 if instanceinfo[inst].disk_template == diskless)
2793 disks = [(inst, disk)
2794 for inst in node_instances
2795 for disk in instanceinfo[inst].disks]
2798 # No need to collect data
2801 node_disks[nname] = disks
2803 # Creating copies as SetDiskID below will modify the objects and that can
2804 # lead to incorrect data returned from nodes
2805 devonly = [dev.Copy() for (_, dev) in disks]
2808 self.cfg.SetDiskID(dev, nname)
2810 node_disks_devonly[nname] = devonly
2812 assert len(node_disks) == len(node_disks_devonly)
2814 # Collect data from all nodes with disks
2815 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2818 assert len(result) == len(node_disks)
2822 for (nname, nres) in result.items():
2823 disks = node_disks[nname]
2826 # No data from this node
2827 data = len(disks) * [(False, "node offline")]
2830 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2831 "while getting disk information: %s", msg)
2833 # No data from this node
2834 data = len(disks) * [(False, msg)]
2837 for idx, i in enumerate(nres.payload):
2838 if isinstance(i, (tuple, list)) and len(i) == 2:
2841 logging.warning("Invalid result from node %s, entry %d: %s",
2843 data.append((False, "Invalid result from the remote node"))
2845 for ((inst, _), status) in zip(disks, data):
2846 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2848 # Add empty entries for diskless instances.
2849 for inst in diskless_instances:
2850 assert inst not in instdisk
2853 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2854 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2855 compat.all(isinstance(s, (tuple, list)) and
2856 len(s) == 2 for s in statuses)
2857 for inst, nnames in instdisk.items()
2858 for nname, statuses in nnames.items())
2859 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2864 def _SshNodeSelector(group_uuid, all_nodes):
2865 """Create endless iterators for all potential SSH check hosts.
2868 nodes = [node for node in all_nodes
2869 if (node.group != group_uuid and
2871 keyfunc = operator.attrgetter("group")
2873 return map(itertools.cycle,
2874 [sorted(map(operator.attrgetter("name"), names))
2875 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2879 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2880 """Choose which nodes should talk to which other nodes.
2882 We will make nodes contact all nodes in their group, and one node from
2885 @warning: This algorithm has a known issue if one node group is much
2886 smaller than others (e.g. just one node). In such a case all other
2887 nodes will talk to the single node.
2890 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2891 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2893 return (online_nodes,
2894 dict((name, sorted([i.next() for i in sel]))
2895 for name in online_nodes))
2897 def BuildHooksEnv(self):
2900 Cluster-Verify hooks just ran in the post phase and their failure makes
2901 the output be logged in the verify output and the verification to fail.
2905 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2908 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2909 for node in self.my_node_info.values())
2913 def BuildHooksNodes(self):
2914 """Build hooks nodes.
2917 return ([], self.my_node_names)
2919 def Exec(self, feedback_fn):
2920 """Verify integrity of the node group, performing various test on nodes.
2923 # This method has too many local variables. pylint: disable=R0914
2924 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2926 if not self.my_node_names:
2928 feedback_fn("* Empty node group, skipping verification")
2932 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2933 verbose = self.op.verbose
2934 self._feedback_fn = feedback_fn
2936 vg_name = self.cfg.GetVGName()
2937 drbd_helper = self.cfg.GetDRBDHelper()
2938 cluster = self.cfg.GetClusterInfo()
2939 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2940 hypervisors = cluster.enabled_hypervisors
2941 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2943 i_non_redundant = [] # Non redundant instances
2944 i_non_a_balanced = [] # Non auto-balanced instances
2945 i_offline = 0 # Count of offline instances
2946 n_offline = 0 # Count of offline nodes
2947 n_drained = 0 # Count of nodes being drained
2948 node_vol_should = {}
2950 # FIXME: verify OS list
2953 filemap = _ComputeAncillaryFiles(cluster, False)
2955 # do local checksums
2956 master_node = self.master_node = self.cfg.GetMasterNode()
2957 master_ip = self.cfg.GetMasterIP()
2959 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2962 if self.cfg.GetUseExternalMipScript():
2963 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2965 node_verify_param = {
2966 constants.NV_FILELIST:
2967 utils.UniqueSequence(filename
2968 for files in filemap
2969 for filename in files),
2970 constants.NV_NODELIST:
2971 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2972 self.all_node_info.values()),
2973 constants.NV_HYPERVISOR: hypervisors,
2974 constants.NV_HVPARAMS:
2975 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2976 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2977 for node in node_data_list
2978 if not node.offline],
2979 constants.NV_INSTANCELIST: hypervisors,
2980 constants.NV_VERSION: None,
2981 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2982 constants.NV_NODESETUP: None,
2983 constants.NV_TIME: None,
2984 constants.NV_MASTERIP: (master_node, master_ip),
2985 constants.NV_OSLIST: None,
2986 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2987 constants.NV_USERSCRIPTS: user_scripts,
2990 if vg_name is not None:
2991 node_verify_param[constants.NV_VGLIST] = None
2992 node_verify_param[constants.NV_LVLIST] = vg_name
2993 node_verify_param[constants.NV_PVLIST] = [vg_name]
2994 node_verify_param[constants.NV_DRBDLIST] = None
2997 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3000 # FIXME: this needs to be changed per node-group, not cluster-wide
3002 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3003 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3004 bridges.add(default_nicpp[constants.NIC_LINK])
3005 for instance in self.my_inst_info.values():
3006 for nic in instance.nics:
3007 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3008 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3009 bridges.add(full_nic[constants.NIC_LINK])
3012 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3014 # Build our expected cluster state
3015 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3017 vm_capable=node.vm_capable))
3018 for node in node_data_list)
3022 for node in self.all_node_info.values():
3023 path = _SupportsOob(self.cfg, node)
3024 if path and path not in oob_paths:
3025 oob_paths.append(path)
3028 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3030 for instance in self.my_inst_names:
3031 inst_config = self.my_inst_info[instance]
3033 for nname in inst_config.all_nodes:
3034 if nname not in node_image:
3035 gnode = self.NodeImage(name=nname)
3036 gnode.ghost = (nname not in self.all_node_info)
3037 node_image[nname] = gnode
3039 inst_config.MapLVsByNode(node_vol_should)
3041 pnode = inst_config.primary_node
3042 node_image[pnode].pinst.append(instance)
3044 for snode in inst_config.secondary_nodes:
3045 nimg = node_image[snode]
3046 nimg.sinst.append(instance)
3047 if pnode not in nimg.sbp:
3048 nimg.sbp[pnode] = []
3049 nimg.sbp[pnode].append(instance)
3051 # At this point, we have the in-memory data structures complete,
3052 # except for the runtime information, which we'll gather next
3054 # Due to the way our RPC system works, exact response times cannot be
3055 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3056 # time before and after executing the request, we can at least have a time
3058 nvinfo_starttime = time.time()
3059 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3061 self.cfg.GetClusterName())
3062 nvinfo_endtime = time.time()
3064 if self.extra_lv_nodes and vg_name is not None:
3066 self.rpc.call_node_verify(self.extra_lv_nodes,
3067 {constants.NV_LVLIST: vg_name},
3068 self.cfg.GetClusterName())
3070 extra_lv_nvinfo = {}
3072 all_drbd_map = self.cfg.ComputeDRBDMap()
3074 feedback_fn("* Gathering disk information (%s nodes)" %
3075 len(self.my_node_names))
3076 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3079 feedback_fn("* Verifying configuration file consistency")
3081 # If not all nodes are being checked, we need to make sure the master node
3082 # and a non-checked vm_capable node are in the list.
3083 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3085 vf_nvinfo = all_nvinfo.copy()
3086 vf_node_info = list(self.my_node_info.values())
3087 additional_nodes = []
3088 if master_node not in self.my_node_info:
3089 additional_nodes.append(master_node)
3090 vf_node_info.append(self.all_node_info[master_node])
3091 # Add the first vm_capable node we find which is not included
3092 for node in absent_nodes:
3093 nodeinfo = self.all_node_info[node]
3094 if nodeinfo.vm_capable and not nodeinfo.offline:
3095 additional_nodes.append(node)
3096 vf_node_info.append(self.all_node_info[node])
3098 key = constants.NV_FILELIST
3099 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3100 {key: node_verify_param[key]},
3101 self.cfg.GetClusterName()))
3103 vf_nvinfo = all_nvinfo
3104 vf_node_info = self.my_node_info.values()
3106 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3108 feedback_fn("* Verifying node status")
3112 for node_i in node_data_list:
3114 nimg = node_image[node]
3118 feedback_fn("* Skipping offline node %s" % (node,))
3122 if node == master_node:
3124 elif node_i.master_candidate:
3125 ntype = "master candidate"
3126 elif node_i.drained:
3132 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3134 msg = all_nvinfo[node].fail_msg
3135 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3138 nimg.rpc_fail = True
3141 nresult = all_nvinfo[node].payload
3143 nimg.call_ok = self._VerifyNode(node_i, nresult)
3144 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3145 self._VerifyNodeNetwork(node_i, nresult)
3146 self._VerifyNodeUserScripts(node_i, nresult)
3147 self._VerifyOob(node_i, nresult)
3150 self._VerifyNodeLVM(node_i, nresult, vg_name)
3151 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3154 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3155 self._UpdateNodeInstances(node_i, nresult, nimg)
3156 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3157 self._UpdateNodeOS(node_i, nresult, nimg)
3159 if not nimg.os_fail:
3160 if refos_img is None:
3162 self._VerifyNodeOS(node_i, nimg, refos_img)
3163 self._VerifyNodeBridges(node_i, nresult, bridges)
3165 # Check whether all running instancies are primary for the node. (This
3166 # can no longer be done from _VerifyInstance below, since some of the
3167 # wrong instances could be from other node groups.)
3168 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3170 for inst in non_primary_inst:
3171 # FIXME: investigate best way to handle offline insts
3172 if inst.admin_state == constants.ADMINST_OFFLINE:
3174 feedback_fn("* Skipping offline instance %s" % inst.name)
3177 test = inst in self.all_inst_info
3178 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3179 "instance should not run on node %s", node_i.name)
3180 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3181 "node is running unknown instance %s", inst)
3183 for node, result in extra_lv_nvinfo.items():
3184 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3185 node_image[node], vg_name)
3187 feedback_fn("* Verifying instance status")
3188 for instance in self.my_inst_names:
3190 feedback_fn("* Verifying instance %s" % instance)
3191 inst_config = self.my_inst_info[instance]
3192 self._VerifyInstance(instance, inst_config, node_image,
3194 inst_nodes_offline = []
3196 pnode = inst_config.primary_node
3197 pnode_img = node_image[pnode]
3198 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3199 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3200 " primary node failed", instance)
3202 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3204 constants.CV_EINSTANCEBADNODE, instance,
3205 "instance is marked as running and lives on offline node %s",
3206 inst_config.primary_node)
3208 # If the instance is non-redundant we cannot survive losing its primary
3209 # node, so we are not N+1 compliant. On the other hand we have no disk
3210 # templates with more than one secondary so that situation is not well
3212 # FIXME: does not support file-backed instances
3213 if not inst_config.secondary_nodes:
3214 i_non_redundant.append(instance)
3216 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3217 constants.CV_EINSTANCELAYOUT,
3218 instance, "instance has multiple secondary nodes: %s",
3219 utils.CommaJoin(inst_config.secondary_nodes),
3220 code=self.ETYPE_WARNING)
3222 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3223 pnode = inst_config.primary_node
3224 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3225 instance_groups = {}
3227 for node in instance_nodes:
3228 instance_groups.setdefault(self.all_node_info[node].group,
3232 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3233 # Sort so that we always list the primary node first.
3234 for group, nodes in sorted(instance_groups.items(),
3235 key=lambda (_, nodes): pnode in nodes,
3238 self._ErrorIf(len(instance_groups) > 1,
3239 constants.CV_EINSTANCESPLITGROUPS,
3240 instance, "instance has primary and secondary nodes in"
3241 " different groups: %s", utils.CommaJoin(pretty_list),
3242 code=self.ETYPE_WARNING)
3244 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3245 i_non_a_balanced.append(instance)
3247 for snode in inst_config.secondary_nodes:
3248 s_img = node_image[snode]
3249 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3250 snode, "instance %s, connection to secondary node failed",
3254 inst_nodes_offline.append(snode)
3256 # warn that the instance lives on offline nodes
3257 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3258 "instance has offline secondary node(s) %s",
3259 utils.CommaJoin(inst_nodes_offline))
3260 # ... or ghost/non-vm_capable nodes
3261 for node in inst_config.all_nodes:
3262 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3263 instance, "instance lives on ghost node %s", node)
3264 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3265 instance, "instance lives on non-vm_capable node %s", node)
3267 feedback_fn("* Verifying orphan volumes")
3268 reserved = utils.FieldSet(*cluster.reserved_lvs)
3270 # We will get spurious "unknown volume" warnings if any node of this group
3271 # is secondary for an instance whose primary is in another group. To avoid
3272 # them, we find these instances and add their volumes to node_vol_should.
3273 for inst in self.all_inst_info.values():
3274 for secondary in inst.secondary_nodes:
3275 if (secondary in self.my_node_info
3276 and inst.name not in self.my_inst_info):
3277 inst.MapLVsByNode(node_vol_should)
3280 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3282 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3283 feedback_fn("* Verifying N+1 Memory redundancy")
3284 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3286 feedback_fn("* Other Notes")
3288 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3289 % len(i_non_redundant))
3291 if i_non_a_balanced:
3292 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3293 % len(i_non_a_balanced))
3296 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3299 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3302 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3306 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3307 """Analyze the post-hooks' result
3309 This method analyses the hook result, handles it, and sends some
3310 nicely-formatted feedback back to the user.
3312 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3313 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3314 @param hooks_results: the results of the multi-node hooks rpc call
3315 @param feedback_fn: function used send feedback back to the caller
3316 @param lu_result: previous Exec result
3317 @return: the new Exec result, based on the previous result
3321 # We only really run POST phase hooks, only for non-empty groups,
3322 # and are only interested in their results
3323 if not self.my_node_names:
3326 elif phase == constants.HOOKS_PHASE_POST:
3327 # Used to change hooks' output to proper indentation
3328 feedback_fn("* Hooks Results")
3329 assert hooks_results, "invalid result from hooks"
3331 for node_name in hooks_results:
3332 res = hooks_results[node_name]
3334 test = msg and not res.offline
3335 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3336 "Communication failure in hooks execution: %s", msg)
3337 if res.offline or msg:
3338 # No need to investigate payload if node is offline or gave
3341 for script, hkr, output in res.payload:
3342 test = hkr == constants.HKR_FAIL
3343 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3344 "Script %s failed, output:", script)
3346 output = self._HOOKS_INDENT_RE.sub(" ", output)
3347 feedback_fn("%s" % output)
3353 class LUClusterVerifyDisks(NoHooksLU):
3354 """Verifies the cluster disks status.
3359 def ExpandNames(self):
3360 self.share_locks = _ShareAll()
3361 self.needed_locks = {
3362 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3365 def Exec(self, feedback_fn):
3366 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3368 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3369 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3370 for group in group_names])
3373 class LUGroupVerifyDisks(NoHooksLU):
3374 """Verifies the status of all disks in a node group.
3379 def ExpandNames(self):
3380 # Raises errors.OpPrereqError on its own if group can't be found
3381 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3383 self.share_locks = _ShareAll()
3384 self.needed_locks = {
3385 locking.LEVEL_INSTANCE: [],
3386 locking.LEVEL_NODEGROUP: [],
3387 locking.LEVEL_NODE: [],
3390 def DeclareLocks(self, level):
3391 if level == locking.LEVEL_INSTANCE:
3392 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3394 # Lock instances optimistically, needs verification once node and group
3395 # locks have been acquired
3396 self.needed_locks[locking.LEVEL_INSTANCE] = \
3397 self.cfg.GetNodeGroupInstances(self.group_uuid)
3399 elif level == locking.LEVEL_NODEGROUP:
3400 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3402 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3403 set([self.group_uuid] +
3404 # Lock all groups used by instances optimistically; this requires
3405 # going via the node before it's locked, requiring verification
3408 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3409 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3411 elif level == locking.LEVEL_NODE:
3412 # This will only lock the nodes in the group to be verified which contain
3414 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3415 self._LockInstancesNodes()
3417 # Lock all nodes in group to be verified
3418 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3419 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3420 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3422 def CheckPrereq(self):
3423 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3424 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3425 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3427 assert self.group_uuid in owned_groups
3429 # Check if locked instances are still correct
3430 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3432 # Get instance information
3433 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3435 # Check if node groups for locked instances are still correct
3436 for (instance_name, inst) in self.instances.items():
3437 assert owned_nodes.issuperset(inst.all_nodes), \
3438 "Instance %s's nodes changed while we kept the lock" % instance_name
3440 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3443 assert self.group_uuid in inst_groups, \
3444 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3446 def Exec(self, feedback_fn):
3447 """Verify integrity of cluster disks.
3449 @rtype: tuple of three items
3450 @return: a tuple of (dict of node-to-node_error, list of instances
3451 which need activate-disks, dict of instance: (node, volume) for
3456 res_instances = set()
3459 nv_dict = _MapInstanceDisksToNodes([inst
3460 for inst in self.instances.values()
3461 if inst.admin_state == constants.ADMINST_UP])
3464 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3465 set(self.cfg.GetVmCapableNodeList()))
3467 node_lvs = self.rpc.call_lv_list(nodes, [])
3469 for (node, node_res) in node_lvs.items():
3470 if node_res.offline:
3473 msg = node_res.fail_msg
3475 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3476 res_nodes[node] = msg
3479 for lv_name, (_, _, lv_online) in node_res.payload.items():
3480 inst = nv_dict.pop((node, lv_name), None)
3481 if not (lv_online or inst is None):
3482 res_instances.add(inst)
3484 # any leftover items in nv_dict are missing LVs, let's arrange the data
3486 for key, inst in nv_dict.iteritems():
3487 res_missing.setdefault(inst, []).append(list(key))
3489 return (res_nodes, list(res_instances), res_missing)
3492 class LUClusterRepairDiskSizes(NoHooksLU):
3493 """Verifies the cluster disks sizes.
3498 def ExpandNames(self):
3499 if self.op.instances:
3500 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3501 self.needed_locks = {
3502 locking.LEVEL_NODE_RES: [],
3503 locking.LEVEL_INSTANCE: self.wanted_names,
3505 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3507 self.wanted_names = None
3508 self.needed_locks = {
3509 locking.LEVEL_NODE_RES: locking.ALL_SET,
3510 locking.LEVEL_INSTANCE: locking.ALL_SET,
3512 self.share_locks = {
3513 locking.LEVEL_NODE_RES: 1,
3514 locking.LEVEL_INSTANCE: 0,
3517 def DeclareLocks(self, level):
3518 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3519 self._LockInstancesNodes(primary_only=True, level=level)
3521 def CheckPrereq(self):
3522 """Check prerequisites.
3524 This only checks the optional instance list against the existing names.
3527 if self.wanted_names is None:
3528 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3530 self.wanted_instances = \
3531 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3533 def _EnsureChildSizes(self, disk):
3534 """Ensure children of the disk have the needed disk size.
3536 This is valid mainly for DRBD8 and fixes an issue where the
3537 children have smaller disk size.
3539 @param disk: an L{ganeti.objects.Disk} object
3542 if disk.dev_type == constants.LD_DRBD8:
3543 assert disk.children, "Empty children for DRBD8?"
3544 fchild = disk.children[0]
3545 mismatch = fchild.size < disk.size
3547 self.LogInfo("Child disk has size %d, parent %d, fixing",
3548 fchild.size, disk.size)
3549 fchild.size = disk.size
3551 # and we recurse on this child only, not on the metadev
3552 return self._EnsureChildSizes(fchild) or mismatch
3556 def Exec(self, feedback_fn):
3557 """Verify the size of cluster disks.
3560 # TODO: check child disks too
3561 # TODO: check differences in size between primary/secondary nodes
3563 for instance in self.wanted_instances:
3564 pnode = instance.primary_node
3565 if pnode not in per_node_disks:
3566 per_node_disks[pnode] = []
3567 for idx, disk in enumerate(instance.disks):
3568 per_node_disks[pnode].append((instance, idx, disk))
3570 assert not (frozenset(per_node_disks.keys()) -
3571 self.owned_locks(locking.LEVEL_NODE_RES)), \
3572 "Not owning correct locks"
3573 assert not self.owned_locks(locking.LEVEL_NODE)
3576 for node, dskl in per_node_disks.items():
3577 newl = [v[2].Copy() for v in dskl]
3579 self.cfg.SetDiskID(dsk, node)
3580 result = self.rpc.call_blockdev_getsize(node, newl)
3582 self.LogWarning("Failure in blockdev_getsize call to node"
3583 " %s, ignoring", node)
3585 if len(result.payload) != len(dskl):
3586 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3587 " result.payload=%s", node, len(dskl), result.payload)
3588 self.LogWarning("Invalid result from node %s, ignoring node results",
3591 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3593 self.LogWarning("Disk %d of instance %s did not return size"
3594 " information, ignoring", idx, instance.name)
3596 if not isinstance(size, (int, long)):
3597 self.LogWarning("Disk %d of instance %s did not return valid"
3598 " size information, ignoring", idx, instance.name)
3601 if size != disk.size:
3602 self.LogInfo("Disk %d of instance %s has mismatched size,"
3603 " correcting: recorded %d, actual %d", idx,
3604 instance.name, disk.size, size)
3606 self.cfg.Update(instance, feedback_fn)
3607 changed.append((instance.name, idx, size))
3608 if self._EnsureChildSizes(disk):
3609 self.cfg.Update(instance, feedback_fn)
3610 changed.append((instance.name, idx, disk.size))
3614 class LUClusterRename(LogicalUnit):
3615 """Rename the cluster.
3618 HPATH = "cluster-rename"
3619 HTYPE = constants.HTYPE_CLUSTER
3621 def BuildHooksEnv(self):
3626 "OP_TARGET": self.cfg.GetClusterName(),
3627 "NEW_NAME": self.op.name,
3630 def BuildHooksNodes(self):
3631 """Build hooks nodes.
3634 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3636 def CheckPrereq(self):
3637 """Verify that the passed name is a valid one.
3640 hostname = netutils.GetHostname(name=self.op.name,
3641 family=self.cfg.GetPrimaryIPFamily())
3643 new_name = hostname.name
3644 self.ip = new_ip = hostname.ip
3645 old_name = self.cfg.GetClusterName()
3646 old_ip = self.cfg.GetMasterIP()
3647 if new_name == old_name and new_ip == old_ip:
3648 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3649 " cluster has changed",
3651 if new_ip != old_ip:
3652 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3653 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3654 " reachable on the network" %
3655 new_ip, errors.ECODE_NOTUNIQUE)
3657 self.op.name = new_name
3659 def Exec(self, feedback_fn):
3660 """Rename the cluster.
3663 clustername = self.op.name
3666 # shutdown the master IP
3667 master_params = self.cfg.GetMasterNetworkParameters()
3668 ems = self.cfg.GetUseExternalMipScript()
3669 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3671 result.Raise("Could not disable the master role")
3674 cluster = self.cfg.GetClusterInfo()
3675 cluster.cluster_name = clustername
3676 cluster.master_ip = new_ip
3677 self.cfg.Update(cluster, feedback_fn)
3679 # update the known hosts file
3680 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3681 node_list = self.cfg.GetOnlineNodeList()
3683 node_list.remove(master_params.name)
3686 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3688 master_params.ip = new_ip
3689 result = self.rpc.call_node_activate_master_ip(master_params.name,
3691 msg = result.fail_msg
3693 self.LogWarning("Could not re-enable the master role on"
3694 " the master, please restart manually: %s", msg)
3699 def _ValidateNetmask(cfg, netmask):
3700 """Checks if a netmask is valid.
3702 @type cfg: L{config.ConfigWriter}
3703 @param cfg: The cluster configuration
3705 @param netmask: the netmask to be verified
3706 @raise errors.OpPrereqError: if the validation fails
3709 ip_family = cfg.GetPrimaryIPFamily()
3711 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3712 except errors.ProgrammerError:
3713 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3715 if not ipcls.ValidateNetmask(netmask):
3716 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3720 class LUClusterSetParams(LogicalUnit):
3721 """Change the parameters of the cluster.
3724 HPATH = "cluster-modify"
3725 HTYPE = constants.HTYPE_CLUSTER
3728 def CheckArguments(self):
3732 if self.op.uid_pool:
3733 uidpool.CheckUidPool(self.op.uid_pool)
3735 if self.op.add_uids:
3736 uidpool.CheckUidPool(self.op.add_uids)
3738 if self.op.remove_uids:
3739 uidpool.CheckUidPool(self.op.remove_uids)
3741 if self.op.master_netmask is not None:
3742 _ValidateNetmask(self.cfg, self.op.master_netmask)
3744 if self.op.diskparams:
3745 for dt_params in self.op.diskparams.values():
3746 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3748 def ExpandNames(self):
3749 # FIXME: in the future maybe other cluster params won't require checking on
3750 # all nodes to be modified.
3751 self.needed_locks = {
3752 locking.LEVEL_NODE: locking.ALL_SET,
3754 self.share_locks[locking.LEVEL_NODE] = 1
3756 def BuildHooksEnv(self):
3761 "OP_TARGET": self.cfg.GetClusterName(),
3762 "NEW_VG_NAME": self.op.vg_name,
3765 def BuildHooksNodes(self):
3766 """Build hooks nodes.
3769 mn = self.cfg.GetMasterNode()
3772 def CheckPrereq(self):
3773 """Check prerequisites.
3775 This checks whether the given params don't conflict and
3776 if the given volume group is valid.
3779 if self.op.vg_name is not None and not self.op.vg_name:
3780 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3781 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3782 " instances exist", errors.ECODE_INVAL)
3784 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3785 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3786 raise errors.OpPrereqError("Cannot disable drbd helper while"
3787 " drbd-based instances exist",
3790 node_list = self.owned_locks(locking.LEVEL_NODE)
3792 # if vg_name not None, checks given volume group on all nodes
3794 vglist = self.rpc.call_vg_list(node_list)
3795 for node in node_list:
3796 msg = vglist[node].fail_msg
3798 # ignoring down node
3799 self.LogWarning("Error while gathering data on node %s"
3800 " (ignoring node): %s", node, msg)
3802 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3804 constants.MIN_VG_SIZE)
3806 raise errors.OpPrereqError("Error on node '%s': %s" %
3807 (node, vgstatus), errors.ECODE_ENVIRON)
3809 if self.op.drbd_helper:
3810 # checks given drbd helper on all nodes
3811 helpers = self.rpc.call_drbd_helper(node_list)
3812 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3814 self.LogInfo("Not checking drbd helper on offline node %s", node)
3816 msg = helpers[node].fail_msg
3818 raise errors.OpPrereqError("Error checking drbd helper on node"
3819 " '%s': %s" % (node, msg),
3820 errors.ECODE_ENVIRON)
3821 node_helper = helpers[node].payload
3822 if node_helper != self.op.drbd_helper:
3823 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3824 (node, node_helper), errors.ECODE_ENVIRON)
3826 self.cluster = cluster = self.cfg.GetClusterInfo()
3827 # validate params changes
3828 if self.op.beparams:
3829 objects.UpgradeBeParams(self.op.beparams)
3830 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3831 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3833 if self.op.ndparams:
3834 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3835 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3837 # TODO: we need a more general way to handle resetting
3838 # cluster-level parameters to default values
3839 if self.new_ndparams["oob_program"] == "":
3840 self.new_ndparams["oob_program"] = \
3841 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3843 if self.op.hv_state:
3844 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3845 self.cluster.hv_state_static)
3846 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3847 for hv, values in new_hv_state.items())
3849 if self.op.disk_state:
3850 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3851 self.cluster.disk_state_static)
3852 self.new_disk_state = \
3853 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3854 for name, values in svalues.items()))
3855 for storage, svalues in new_disk_state.items())
3859 for key, value in self.op.ipolicy.items():
3860 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3861 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3863 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3864 self.new_ipolicy = ipolicy
3866 if self.op.nicparams:
3867 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3868 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3869 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3872 # check all instances for consistency
3873 for instance in self.cfg.GetAllInstancesInfo().values():
3874 for nic_idx, nic in enumerate(instance.nics):
3875 params_copy = copy.deepcopy(nic.nicparams)
3876 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3878 # check parameter syntax
3880 objects.NIC.CheckParameterSyntax(params_filled)
3881 except errors.ConfigurationError, err:
3882 nic_errors.append("Instance %s, nic/%d: %s" %
3883 (instance.name, nic_idx, err))
3885 # if we're moving instances to routed, check that they have an ip
3886 target_mode = params_filled[constants.NIC_MODE]
3887 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3888 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3889 " address" % (instance.name, nic_idx))
3891 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3892 "\n".join(nic_errors))
3894 # hypervisor list/parameters
3895 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3896 if self.op.hvparams:
3897 for hv_name, hv_dict in self.op.hvparams.items():
3898 if hv_name not in self.new_hvparams:
3899 self.new_hvparams[hv_name] = hv_dict
3901 self.new_hvparams[hv_name].update(hv_dict)
3903 # disk template parameters
3904 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3905 if self.op.diskparams:
3906 for dt_name, dt_params in self.op.diskparams.items():
3907 if dt_name not in self.op.diskparams:
3908 self.new_diskparams[dt_name] = dt_params
3910 self.new_diskparams[dt_name].update(dt_params)
3912 # os hypervisor parameters
3913 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3915 for os_name, hvs in self.op.os_hvp.items():
3916 if os_name not in self.new_os_hvp:
3917 self.new_os_hvp[os_name] = hvs
3919 for hv_name, hv_dict in hvs.items():
3920 if hv_name not in self.new_os_hvp[os_name]:
3921 self.new_os_hvp[os_name][hv_name] = hv_dict
3923 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3926 self.new_osp = objects.FillDict(cluster.osparams, {})
3927 if self.op.osparams:
3928 for os_name, osp in self.op.osparams.items():
3929 if os_name not in self.new_osp:
3930 self.new_osp[os_name] = {}
3932 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3935 if not self.new_osp[os_name]:
3936 # we removed all parameters
3937 del self.new_osp[os_name]
3939 # check the parameter validity (remote check)
3940 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3941 os_name, self.new_osp[os_name])
3943 # changes to the hypervisor list
3944 if self.op.enabled_hypervisors is not None:
3945 self.hv_list = self.op.enabled_hypervisors
3946 for hv in self.hv_list:
3947 # if the hypervisor doesn't already exist in the cluster
3948 # hvparams, we initialize it to empty, and then (in both
3949 # cases) we make sure to fill the defaults, as we might not
3950 # have a complete defaults list if the hypervisor wasn't
3952 if hv not in new_hvp:
3954 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3955 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3957 self.hv_list = cluster.enabled_hypervisors
3959 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3960 # either the enabled list has changed, or the parameters have, validate
3961 for hv_name, hv_params in self.new_hvparams.items():
3962 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3963 (self.op.enabled_hypervisors and
3964 hv_name in self.op.enabled_hypervisors)):
3965 # either this is a new hypervisor, or its parameters have changed
3966 hv_class = hypervisor.GetHypervisor(hv_name)
3967 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3968 hv_class.CheckParameterSyntax(hv_params)
3969 _CheckHVParams(self, node_list, hv_name, hv_params)
3972 # no need to check any newly-enabled hypervisors, since the
3973 # defaults have already been checked in the above code-block
3974 for os_name, os_hvp in self.new_os_hvp.items():
3975 for hv_name, hv_params in os_hvp.items():
3976 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3977 # we need to fill in the new os_hvp on top of the actual hv_p
3978 cluster_defaults = self.new_hvparams.get(hv_name, {})
3979 new_osp = objects.FillDict(cluster_defaults, hv_params)
3980 hv_class = hypervisor.GetHypervisor(hv_name)
3981 hv_class.CheckParameterSyntax(new_osp)
3982 _CheckHVParams(self, node_list, hv_name, new_osp)
3984 if self.op.default_iallocator:
3985 alloc_script = utils.FindFile(self.op.default_iallocator,
3986 constants.IALLOCATOR_SEARCH_PATH,
3988 if alloc_script is None:
3989 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3990 " specified" % self.op.default_iallocator,
3993 def Exec(self, feedback_fn):
3994 """Change the parameters of the cluster.
3997 if self.op.vg_name is not None:
3998 new_volume = self.op.vg_name
4001 if new_volume != self.cfg.GetVGName():
4002 self.cfg.SetVGName(new_volume)
4004 feedback_fn("Cluster LVM configuration already in desired"
4005 " state, not changing")
4006 if self.op.drbd_helper is not None:
4007 new_helper = self.op.drbd_helper
4010 if new_helper != self.cfg.GetDRBDHelper():
4011 self.cfg.SetDRBDHelper(new_helper)
4013 feedback_fn("Cluster DRBD helper already in desired state,"
4015 if self.op.hvparams:
4016 self.cluster.hvparams = self.new_hvparams
4018 self.cluster.os_hvp = self.new_os_hvp
4019 if self.op.enabled_hypervisors is not None:
4020 self.cluster.hvparams = self.new_hvparams
4021 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4022 if self.op.beparams:
4023 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4024 if self.op.nicparams:
4025 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4027 self.cluster.ipolicy = self.new_ipolicy
4028 if self.op.osparams:
4029 self.cluster.osparams = self.new_osp
4030 if self.op.ndparams:
4031 self.cluster.ndparams = self.new_ndparams
4032 if self.op.diskparams:
4033 self.cluster.diskparams = self.new_diskparams
4034 if self.op.hv_state:
4035 self.cluster.hv_state_static = self.new_hv_state
4036 if self.op.disk_state:
4037 self.cluster.disk_state_static = self.new_disk_state
4039 if self.op.candidate_pool_size is not None:
4040 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4041 # we need to update the pool size here, otherwise the save will fail
4042 _AdjustCandidatePool(self, [])
4044 if self.op.maintain_node_health is not None:
4045 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4046 feedback_fn("Note: CONFD was disabled at build time, node health"
4047 " maintenance is not useful (still enabling it)")
4048 self.cluster.maintain_node_health = self.op.maintain_node_health
4050 if self.op.prealloc_wipe_disks is not None:
4051 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4053 if self.op.add_uids is not None:
4054 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4056 if self.op.remove_uids is not None:
4057 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4059 if self.op.uid_pool is not None:
4060 self.cluster.uid_pool = self.op.uid_pool
4062 if self.op.default_iallocator is not None:
4063 self.cluster.default_iallocator = self.op.default_iallocator
4065 if self.op.reserved_lvs is not None:
4066 self.cluster.reserved_lvs = self.op.reserved_lvs
4068 if self.op.use_external_mip_script is not None:
4069 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4071 def helper_os(aname, mods, desc):
4073 lst = getattr(self.cluster, aname)
4074 for key, val in mods:
4075 if key == constants.DDM_ADD:
4077 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4080 elif key == constants.DDM_REMOVE:
4084 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4086 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4088 if self.op.hidden_os:
4089 helper_os("hidden_os", self.op.hidden_os, "hidden")
4091 if self.op.blacklisted_os:
4092 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4094 if self.op.master_netdev:
4095 master_params = self.cfg.GetMasterNetworkParameters()
4096 ems = self.cfg.GetUseExternalMipScript()
4097 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4098 self.cluster.master_netdev)
4099 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4101 result.Raise("Could not disable the master ip")
4102 feedback_fn("Changing master_netdev from %s to %s" %
4103 (master_params.netdev, self.op.master_netdev))
4104 self.cluster.master_netdev = self.op.master_netdev
4106 if self.op.master_netmask:
4107 master_params = self.cfg.GetMasterNetworkParameters()
4108 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4109 result = self.rpc.call_node_change_master_netmask(master_params.name,
4110 master_params.netmask,
4111 self.op.master_netmask,
4113 master_params.netdev)
4115 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4118 self.cluster.master_netmask = self.op.master_netmask
4120 self.cfg.Update(self.cluster, feedback_fn)
4122 if self.op.master_netdev:
4123 master_params = self.cfg.GetMasterNetworkParameters()
4124 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4125 self.op.master_netdev)
4126 ems = self.cfg.GetUseExternalMipScript()
4127 result = self.rpc.call_node_activate_master_ip(master_params.name,
4130 self.LogWarning("Could not re-enable the master ip on"
4131 " the master, please restart manually: %s",
4135 def _UploadHelper(lu, nodes, fname):
4136 """Helper for uploading a file and showing warnings.
4139 if os.path.exists(fname):
4140 result = lu.rpc.call_upload_file(nodes, fname)
4141 for to_node, to_result in result.items():
4142 msg = to_result.fail_msg
4144 msg = ("Copy of file %s to node %s failed: %s" %
4145 (fname, to_node, msg))
4146 lu.proc.LogWarning(msg)
4149 def _ComputeAncillaryFiles(cluster, redist):
4150 """Compute files external to Ganeti which need to be consistent.
4152 @type redist: boolean
4153 @param redist: Whether to include files which need to be redistributed
4156 # Compute files for all nodes
4158 constants.SSH_KNOWN_HOSTS_FILE,
4159 constants.CONFD_HMAC_KEY,
4160 constants.CLUSTER_DOMAIN_SECRET_FILE,
4161 constants.SPICE_CERT_FILE,
4162 constants.SPICE_CACERT_FILE,
4163 constants.RAPI_USERS_FILE,
4167 files_all.update(constants.ALL_CERT_FILES)
4168 files_all.update(ssconf.SimpleStore().GetFileList())
4170 # we need to ship at least the RAPI certificate
4171 files_all.add(constants.RAPI_CERT_FILE)
4173 if cluster.modify_etc_hosts:
4174 files_all.add(constants.ETC_HOSTS)
4176 # Files which are optional, these must:
4177 # - be present in one other category as well
4178 # - either exist or not exist on all nodes of that category (mc, vm all)
4180 constants.RAPI_USERS_FILE,
4183 # Files which should only be on master candidates
4187 files_mc.add(constants.CLUSTER_CONF_FILE)
4189 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4191 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4193 # Files which should only be on VM-capable nodes
4194 files_vm = set(filename
4195 for hv_name in cluster.enabled_hypervisors
4196 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4198 files_opt |= set(filename
4199 for hv_name in cluster.enabled_hypervisors
4200 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4202 # Filenames in each category must be unique
4203 all_files_set = files_all | files_mc | files_vm
4204 assert (len(all_files_set) ==
4205 sum(map(len, [files_all, files_mc, files_vm]))), \
4206 "Found file listed in more than one file list"
4208 # Optional files must be present in one other category
4209 assert all_files_set.issuperset(files_opt), \
4210 "Optional file not in a different required list"
4212 return (files_all, files_opt, files_mc, files_vm)
4215 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4216 """Distribute additional files which are part of the cluster configuration.
4218 ConfigWriter takes care of distributing the config and ssconf files, but
4219 there are more files which should be distributed to all nodes. This function
4220 makes sure those are copied.
4222 @param lu: calling logical unit
4223 @param additional_nodes: list of nodes not in the config to distribute to
4224 @type additional_vm: boolean
4225 @param additional_vm: whether the additional nodes are vm-capable or not
4228 # Gather target nodes
4229 cluster = lu.cfg.GetClusterInfo()
4230 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4232 online_nodes = lu.cfg.GetOnlineNodeList()
4233 vm_nodes = lu.cfg.GetVmCapableNodeList()
4235 if additional_nodes is not None:
4236 online_nodes.extend(additional_nodes)
4238 vm_nodes.extend(additional_nodes)
4240 # Never distribute to master node
4241 for nodelist in [online_nodes, vm_nodes]:
4242 if master_info.name in nodelist:
4243 nodelist.remove(master_info.name)
4246 (files_all, _, files_mc, files_vm) = \
4247 _ComputeAncillaryFiles(cluster, True)
4249 # Never re-distribute configuration file from here
4250 assert not (constants.CLUSTER_CONF_FILE in files_all or
4251 constants.CLUSTER_CONF_FILE in files_vm)
4252 assert not files_mc, "Master candidates not handled in this function"
4255 (online_nodes, files_all),
4256 (vm_nodes, files_vm),
4260 for (node_list, files) in filemap:
4262 _UploadHelper(lu, node_list, fname)
4265 class LUClusterRedistConf(NoHooksLU):
4266 """Force the redistribution of cluster configuration.
4268 This is a very simple LU.
4273 def ExpandNames(self):
4274 self.needed_locks = {
4275 locking.LEVEL_NODE: locking.ALL_SET,
4277 self.share_locks[locking.LEVEL_NODE] = 1
4279 def Exec(self, feedback_fn):
4280 """Redistribute the configuration.
4283 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4284 _RedistributeAncillaryFiles(self)
4287 class LUClusterActivateMasterIp(NoHooksLU):
4288 """Activate the master IP on the master node.
4291 def Exec(self, feedback_fn):
4292 """Activate the master IP.
4295 master_params = self.cfg.GetMasterNetworkParameters()
4296 ems = self.cfg.GetUseExternalMipScript()
4297 result = self.rpc.call_node_activate_master_ip(master_params.name,
4299 result.Raise("Could not activate the master IP")
4302 class LUClusterDeactivateMasterIp(NoHooksLU):
4303 """Deactivate the master IP on the master node.
4306 def Exec(self, feedback_fn):
4307 """Deactivate the master IP.
4310 master_params = self.cfg.GetMasterNetworkParameters()
4311 ems = self.cfg.GetUseExternalMipScript()
4312 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4314 result.Raise("Could not deactivate the master IP")
4317 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4318 """Sleep and poll for an instance's disk to sync.
4321 if not instance.disks or disks is not None and not disks:
4324 disks = _ExpandCheckDisks(instance, disks)
4327 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4329 node = instance.primary_node
4332 lu.cfg.SetDiskID(dev, node)
4334 # TODO: Convert to utils.Retry
4337 degr_retries = 10 # in seconds, as we sleep 1 second each time
4341 cumul_degraded = False
4342 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4343 msg = rstats.fail_msg
4345 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4348 raise errors.RemoteError("Can't contact node %s for mirror data,"
4349 " aborting." % node)
4352 rstats = rstats.payload
4354 for i, mstat in enumerate(rstats):
4356 lu.LogWarning("Can't compute data for node %s/%s",
4357 node, disks[i].iv_name)
4360 cumul_degraded = (cumul_degraded or
4361 (mstat.is_degraded and mstat.sync_percent is None))
4362 if mstat.sync_percent is not None:
4364 if mstat.estimated_time is not None:
4365 rem_time = ("%s remaining (estimated)" %
4366 utils.FormatSeconds(mstat.estimated_time))
4367 max_time = mstat.estimated_time
4369 rem_time = "no time estimate"
4370 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4371 (disks[i].iv_name, mstat.sync_percent, rem_time))
4373 # if we're done but degraded, let's do a few small retries, to
4374 # make sure we see a stable and not transient situation; therefore
4375 # we force restart of the loop
4376 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4377 logging.info("Degraded disks found, %d retries left", degr_retries)
4385 time.sleep(min(60, max_time))
4388 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4389 return not cumul_degraded
4392 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4393 """Check that mirrors are not degraded.
4395 The ldisk parameter, if True, will change the test from the
4396 is_degraded attribute (which represents overall non-ok status for
4397 the device(s)) to the ldisk (representing the local storage status).
4400 lu.cfg.SetDiskID(dev, node)
4404 if on_primary or dev.AssembleOnSecondary():
4405 rstats = lu.rpc.call_blockdev_find(node, dev)
4406 msg = rstats.fail_msg
4408 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4410 elif not rstats.payload:
4411 lu.LogWarning("Can't find disk on node %s", node)
4415 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4417 result = result and not rstats.payload.is_degraded
4420 for child in dev.children:
4421 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4426 class LUOobCommand(NoHooksLU):
4427 """Logical unit for OOB handling.
4431 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4433 def ExpandNames(self):
4434 """Gather locks we need.
4437 if self.op.node_names:
4438 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4439 lock_names = self.op.node_names
4441 lock_names = locking.ALL_SET
4443 self.needed_locks = {
4444 locking.LEVEL_NODE: lock_names,
4447 def CheckPrereq(self):
4448 """Check prerequisites.
4451 - the node exists in the configuration
4454 Any errors are signaled by raising errors.OpPrereqError.
4458 self.master_node = self.cfg.GetMasterNode()
4460 assert self.op.power_delay >= 0.0
4462 if self.op.node_names:
4463 if (self.op.command in self._SKIP_MASTER and
4464 self.master_node in self.op.node_names):
4465 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4466 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4468 if master_oob_handler:
4469 additional_text = ("run '%s %s %s' if you want to operate on the"
4470 " master regardless") % (master_oob_handler,
4474 additional_text = "it does not support out-of-band operations"
4476 raise errors.OpPrereqError(("Operating on the master node %s is not"
4477 " allowed for %s; %s") %
4478 (self.master_node, self.op.command,
4479 additional_text), errors.ECODE_INVAL)
4481 self.op.node_names = self.cfg.GetNodeList()
4482 if self.op.command in self._SKIP_MASTER:
4483 self.op.node_names.remove(self.master_node)
4485 if self.op.command in self._SKIP_MASTER:
4486 assert self.master_node not in self.op.node_names
4488 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4490 raise errors.OpPrereqError("Node %s not found" % node_name,
4493 self.nodes.append(node)
4495 if (not self.op.ignore_status and
4496 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4497 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4498 " not marked offline") % node_name,
4501 def Exec(self, feedback_fn):
4502 """Execute OOB and return result if we expect any.
4505 master_node = self.master_node
4508 for idx, node in enumerate(utils.NiceSort(self.nodes,
4509 key=lambda node: node.name)):
4510 node_entry = [(constants.RS_NORMAL, node.name)]
4511 ret.append(node_entry)
4513 oob_program = _SupportsOob(self.cfg, node)
4516 node_entry.append((constants.RS_UNAVAIL, None))
4519 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4520 self.op.command, oob_program, node.name)
4521 result = self.rpc.call_run_oob(master_node, oob_program,
4522 self.op.command, node.name,
4526 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4527 node.name, result.fail_msg)
4528 node_entry.append((constants.RS_NODATA, None))
4531 self._CheckPayload(result)
4532 except errors.OpExecError, err:
4533 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4535 node_entry.append((constants.RS_NODATA, None))
4537 if self.op.command == constants.OOB_HEALTH:
4538 # For health we should log important events
4539 for item, status in result.payload:
4540 if status in [constants.OOB_STATUS_WARNING,
4541 constants.OOB_STATUS_CRITICAL]:
4542 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4543 item, node.name, status)
4545 if self.op.command == constants.OOB_POWER_ON:
4547 elif self.op.command == constants.OOB_POWER_OFF:
4548 node.powered = False
4549 elif self.op.command == constants.OOB_POWER_STATUS:
4550 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4551 if powered != node.powered:
4552 logging.warning(("Recorded power state (%s) of node '%s' does not"
4553 " match actual power state (%s)"), node.powered,
4556 # For configuration changing commands we should update the node
4557 if self.op.command in (constants.OOB_POWER_ON,
4558 constants.OOB_POWER_OFF):
4559 self.cfg.Update(node, feedback_fn)
4561 node_entry.append((constants.RS_NORMAL, result.payload))
4563 if (self.op.command == constants.OOB_POWER_ON and
4564 idx < len(self.nodes) - 1):
4565 time.sleep(self.op.power_delay)
4569 def _CheckPayload(self, result):
4570 """Checks if the payload is valid.
4572 @param result: RPC result
4573 @raises errors.OpExecError: If payload is not valid
4577 if self.op.command == constants.OOB_HEALTH:
4578 if not isinstance(result.payload, list):
4579 errs.append("command 'health' is expected to return a list but got %s" %
4580 type(result.payload))
4582 for item, status in result.payload:
4583 if status not in constants.OOB_STATUSES:
4584 errs.append("health item '%s' has invalid status '%s'" %
4587 if self.op.command == constants.OOB_POWER_STATUS:
4588 if not isinstance(result.payload, dict):
4589 errs.append("power-status is expected to return a dict but got %s" %
4590 type(result.payload))
4592 if self.op.command in [
4593 constants.OOB_POWER_ON,
4594 constants.OOB_POWER_OFF,
4595 constants.OOB_POWER_CYCLE,
4597 if result.payload is not None:
4598 errs.append("%s is expected to not return payload but got '%s'" %
4599 (self.op.command, result.payload))
4602 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4603 utils.CommaJoin(errs))
4606 class _OsQuery(_QueryBase):
4607 FIELDS = query.OS_FIELDS
4609 def ExpandNames(self, lu):
4610 # Lock all nodes in shared mode
4611 # Temporary removal of locks, should be reverted later
4612 # TODO: reintroduce locks when they are lighter-weight
4613 lu.needed_locks = {}
4614 #self.share_locks[locking.LEVEL_NODE] = 1
4615 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4617 # The following variables interact with _QueryBase._GetNames
4619 self.wanted = self.names
4621 self.wanted = locking.ALL_SET
4623 self.do_locking = self.use_locking
4625 def DeclareLocks(self, lu, level):
4629 def _DiagnoseByOS(rlist):
4630 """Remaps a per-node return list into an a per-os per-node dictionary
4632 @param rlist: a map with node names as keys and OS objects as values
4635 @return: a dictionary with osnames as keys and as value another
4636 map, with nodes as keys and tuples of (path, status, diagnose,
4637 variants, parameters, api_versions) as values, eg::
4639 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4640 (/srv/..., False, "invalid api")],
4641 "node2": [(/srv/..., True, "", [], [])]}
4646 # we build here the list of nodes that didn't fail the RPC (at RPC
4647 # level), so that nodes with a non-responding node daemon don't
4648 # make all OSes invalid
4649 good_nodes = [node_name for node_name in rlist
4650 if not rlist[node_name].fail_msg]
4651 for node_name, nr in rlist.items():
4652 if nr.fail_msg or not nr.payload:
4654 for (name, path, status, diagnose, variants,
4655 params, api_versions) in nr.payload:
4656 if name not in all_os:
4657 # build a list of nodes for this os containing empty lists
4658 # for each node in node_list
4660 for nname in good_nodes:
4661 all_os[name][nname] = []
4662 # convert params from [name, help] to (name, help)
4663 params = [tuple(v) for v in params]
4664 all_os[name][node_name].append((path, status, diagnose,
4665 variants, params, api_versions))
4668 def _GetQueryData(self, lu):
4669 """Computes the list of nodes and their attributes.
4672 # Locking is not used
4673 assert not (compat.any(lu.glm.is_owned(level)
4674 for level in locking.LEVELS
4675 if level != locking.LEVEL_CLUSTER) or
4676 self.do_locking or self.use_locking)
4678 valid_nodes = [node.name
4679 for node in lu.cfg.GetAllNodesInfo().values()
4680 if not node.offline and node.vm_capable]
4681 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4682 cluster = lu.cfg.GetClusterInfo()
4686 for (os_name, os_data) in pol.items():
4687 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4688 hidden=(os_name in cluster.hidden_os),
4689 blacklisted=(os_name in cluster.blacklisted_os))
4693 api_versions = set()
4695 for idx, osl in enumerate(os_data.values()):
4696 info.valid = bool(info.valid and osl and osl[0][1])
4700 (node_variants, node_params, node_api) = osl[0][3:6]
4703 variants.update(node_variants)
4704 parameters.update(node_params)
4705 api_versions.update(node_api)
4707 # Filter out inconsistent values
4708 variants.intersection_update(node_variants)
4709 parameters.intersection_update(node_params)
4710 api_versions.intersection_update(node_api)
4712 info.variants = list(variants)
4713 info.parameters = list(parameters)
4714 info.api_versions = list(api_versions)
4716 data[os_name] = info
4718 # Prepare data in requested order
4719 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4723 class LUOsDiagnose(NoHooksLU):
4724 """Logical unit for OS diagnose/query.
4730 def _BuildFilter(fields, names):
4731 """Builds a filter for querying OSes.
4734 name_filter = qlang.MakeSimpleFilter("name", names)
4736 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4737 # respective field is not requested
4738 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4739 for fname in ["hidden", "blacklisted"]
4740 if fname not in fields]
4741 if "valid" not in fields:
4742 status_filter.append([qlang.OP_TRUE, "valid"])
4745 status_filter.insert(0, qlang.OP_AND)
4747 status_filter = None
4749 if name_filter and status_filter:
4750 return [qlang.OP_AND, name_filter, status_filter]
4754 return status_filter
4756 def CheckArguments(self):
4757 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4758 self.op.output_fields, False)
4760 def ExpandNames(self):
4761 self.oq.ExpandNames(self)
4763 def Exec(self, feedback_fn):
4764 return self.oq.OldStyleQuery(self)
4767 class LUNodeRemove(LogicalUnit):
4768 """Logical unit for removing a node.
4771 HPATH = "node-remove"
4772 HTYPE = constants.HTYPE_NODE
4774 def BuildHooksEnv(self):
4777 This doesn't run on the target node in the pre phase as a failed
4778 node would then be impossible to remove.
4782 "OP_TARGET": self.op.node_name,
4783 "NODE_NAME": self.op.node_name,
4786 def BuildHooksNodes(self):
4787 """Build hooks nodes.
4790 all_nodes = self.cfg.GetNodeList()
4792 all_nodes.remove(self.op.node_name)
4794 logging.warning("Node '%s', which is about to be removed, was not found"
4795 " in the list of all nodes", self.op.node_name)
4796 return (all_nodes, all_nodes)
4798 def CheckPrereq(self):
4799 """Check prerequisites.
4802 - the node exists in the configuration
4803 - it does not have primary or secondary instances
4804 - it's not the master
4806 Any errors are signaled by raising errors.OpPrereqError.
4809 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4810 node = self.cfg.GetNodeInfo(self.op.node_name)
4811 assert node is not None
4813 masternode = self.cfg.GetMasterNode()
4814 if node.name == masternode:
4815 raise errors.OpPrereqError("Node is the master node, failover to another"
4816 " node is required", errors.ECODE_INVAL)
4818 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4819 if node.name in instance.all_nodes:
4820 raise errors.OpPrereqError("Instance %s is still running on the node,"
4821 " please remove first" % instance_name,
4823 self.op.node_name = node.name
4826 def Exec(self, feedback_fn):
4827 """Removes the node from the cluster.
4831 logging.info("Stopping the node daemon and removing configs from node %s",
4834 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4836 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4839 # Promote nodes to master candidate as needed
4840 _AdjustCandidatePool(self, exceptions=[node.name])
4841 self.context.RemoveNode(node.name)
4843 # Run post hooks on the node before it's removed
4844 _RunPostHook(self, node.name)
4846 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4847 msg = result.fail_msg
4849 self.LogWarning("Errors encountered on the remote node while leaving"
4850 " the cluster: %s", msg)
4852 # Remove node from our /etc/hosts
4853 if self.cfg.GetClusterInfo().modify_etc_hosts:
4854 master_node = self.cfg.GetMasterNode()
4855 result = self.rpc.call_etc_hosts_modify(master_node,
4856 constants.ETC_HOSTS_REMOVE,
4858 result.Raise("Can't update hosts file with new host data")
4859 _RedistributeAncillaryFiles(self)
4862 class _NodeQuery(_QueryBase):
4863 FIELDS = query.NODE_FIELDS
4865 def ExpandNames(self, lu):
4866 lu.needed_locks = {}
4867 lu.share_locks = _ShareAll()
4870 self.wanted = _GetWantedNodes(lu, self.names)
4872 self.wanted = locking.ALL_SET
4874 self.do_locking = (self.use_locking and
4875 query.NQ_LIVE in self.requested_data)
4878 # If any non-static field is requested we need to lock the nodes
4879 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4881 def DeclareLocks(self, lu, level):
4884 def _GetQueryData(self, lu):
4885 """Computes the list of nodes and their attributes.
4888 all_info = lu.cfg.GetAllNodesInfo()
4890 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4892 # Gather data as requested
4893 if query.NQ_LIVE in self.requested_data:
4894 # filter out non-vm_capable nodes
4895 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4897 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4898 [lu.cfg.GetHypervisorType()])
4899 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4900 for (name, nresult) in node_data.items()
4901 if not nresult.fail_msg and nresult.payload)
4905 if query.NQ_INST in self.requested_data:
4906 node_to_primary = dict([(name, set()) for name in nodenames])
4907 node_to_secondary = dict([(name, set()) for name in nodenames])
4909 inst_data = lu.cfg.GetAllInstancesInfo()
4911 for inst in inst_data.values():
4912 if inst.primary_node in node_to_primary:
4913 node_to_primary[inst.primary_node].add(inst.name)
4914 for secnode in inst.secondary_nodes:
4915 if secnode in node_to_secondary:
4916 node_to_secondary[secnode].add(inst.name)
4918 node_to_primary = None
4919 node_to_secondary = None
4921 if query.NQ_OOB in self.requested_data:
4922 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4923 for name, node in all_info.iteritems())
4927 if query.NQ_GROUP in self.requested_data:
4928 groups = lu.cfg.GetAllNodeGroupsInfo()
4932 return query.NodeQueryData([all_info[name] for name in nodenames],
4933 live_data, lu.cfg.GetMasterNode(),
4934 node_to_primary, node_to_secondary, groups,
4935 oob_support, lu.cfg.GetClusterInfo())
4938 class LUNodeQuery(NoHooksLU):
4939 """Logical unit for querying nodes.
4942 # pylint: disable=W0142
4945 def CheckArguments(self):
4946 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4947 self.op.output_fields, self.op.use_locking)
4949 def ExpandNames(self):
4950 self.nq.ExpandNames(self)
4952 def DeclareLocks(self, level):
4953 self.nq.DeclareLocks(self, level)
4955 def Exec(self, feedback_fn):
4956 return self.nq.OldStyleQuery(self)
4959 class LUNodeQueryvols(NoHooksLU):
4960 """Logical unit for getting volumes on node(s).
4964 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4965 _FIELDS_STATIC = utils.FieldSet("node")
4967 def CheckArguments(self):
4968 _CheckOutputFields(static=self._FIELDS_STATIC,
4969 dynamic=self._FIELDS_DYNAMIC,
4970 selected=self.op.output_fields)
4972 def ExpandNames(self):
4973 self.share_locks = _ShareAll()
4974 self.needed_locks = {}
4976 if not self.op.nodes:
4977 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4979 self.needed_locks[locking.LEVEL_NODE] = \
4980 _GetWantedNodes(self, self.op.nodes)
4982 def Exec(self, feedback_fn):
4983 """Computes the list of nodes and their attributes.
4986 nodenames = self.owned_locks(locking.LEVEL_NODE)
4987 volumes = self.rpc.call_node_volumes(nodenames)
4989 ilist = self.cfg.GetAllInstancesInfo()
4990 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4993 for node in nodenames:
4994 nresult = volumes[node]
4997 msg = nresult.fail_msg
4999 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5002 node_vols = sorted(nresult.payload,
5003 key=operator.itemgetter("dev"))
5005 for vol in node_vols:
5007 for field in self.op.output_fields:
5010 elif field == "phys":
5014 elif field == "name":
5016 elif field == "size":
5017 val = int(float(vol["size"]))
5018 elif field == "instance":
5019 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5021 raise errors.ParameterError(field)
5022 node_output.append(str(val))
5024 output.append(node_output)
5029 class LUNodeQueryStorage(NoHooksLU):
5030 """Logical unit for getting information on storage units on node(s).
5033 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5036 def CheckArguments(self):
5037 _CheckOutputFields(static=self._FIELDS_STATIC,
5038 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5039 selected=self.op.output_fields)
5041 def ExpandNames(self):
5042 self.share_locks = _ShareAll()
5043 self.needed_locks = {}
5046 self.needed_locks[locking.LEVEL_NODE] = \
5047 _GetWantedNodes(self, self.op.nodes)
5049 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5051 def Exec(self, feedback_fn):
5052 """Computes the list of nodes and their attributes.
5055 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5057 # Always get name to sort by
5058 if constants.SF_NAME in self.op.output_fields:
5059 fields = self.op.output_fields[:]
5061 fields = [constants.SF_NAME] + self.op.output_fields
5063 # Never ask for node or type as it's only known to the LU
5064 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5065 while extra in fields:
5066 fields.remove(extra)
5068 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5069 name_idx = field_idx[constants.SF_NAME]
5071 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5072 data = self.rpc.call_storage_list(self.nodes,
5073 self.op.storage_type, st_args,
5074 self.op.name, fields)
5078 for node in utils.NiceSort(self.nodes):
5079 nresult = data[node]
5083 msg = nresult.fail_msg
5085 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5088 rows = dict([(row[name_idx], row) for row in nresult.payload])
5090 for name in utils.NiceSort(rows.keys()):
5095 for field in self.op.output_fields:
5096 if field == constants.SF_NODE:
5098 elif field == constants.SF_TYPE:
5099 val = self.op.storage_type
5100 elif field in field_idx:
5101 val = row[field_idx[field]]
5103 raise errors.ParameterError(field)
5112 class _InstanceQuery(_QueryBase):
5113 FIELDS = query.INSTANCE_FIELDS
5115 def ExpandNames(self, lu):
5116 lu.needed_locks = {}
5117 lu.share_locks = _ShareAll()
5120 self.wanted = _GetWantedInstances(lu, self.names)
5122 self.wanted = locking.ALL_SET
5124 self.do_locking = (self.use_locking and
5125 query.IQ_LIVE in self.requested_data)
5127 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5128 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5129 lu.needed_locks[locking.LEVEL_NODE] = []
5130 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5132 self.do_grouplocks = (self.do_locking and
5133 query.IQ_NODES in self.requested_data)
5135 def DeclareLocks(self, lu, level):
5137 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5138 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5140 # Lock all groups used by instances optimistically; this requires going
5141 # via the node before it's locked, requiring verification later on
5142 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5144 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5145 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5146 elif level == locking.LEVEL_NODE:
5147 lu._LockInstancesNodes() # pylint: disable=W0212
5150 def _CheckGroupLocks(lu):
5151 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5152 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5154 # Check if node groups for locked instances are still correct
5155 for instance_name in owned_instances:
5156 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5158 def _GetQueryData(self, lu):
5159 """Computes the list of instances and their attributes.
5162 if self.do_grouplocks:
5163 self._CheckGroupLocks(lu)
5165 cluster = lu.cfg.GetClusterInfo()
5166 all_info = lu.cfg.GetAllInstancesInfo()
5168 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5170 instance_list = [all_info[name] for name in instance_names]
5171 nodes = frozenset(itertools.chain(*(inst.all_nodes
5172 for inst in instance_list)))
5173 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5176 wrongnode_inst = set()
5178 # Gather data as requested
5179 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5181 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5183 result = node_data[name]
5185 # offline nodes will be in both lists
5186 assert result.fail_msg
5187 offline_nodes.append(name)
5189 bad_nodes.append(name)
5190 elif result.payload:
5191 for inst in result.payload:
5192 if inst in all_info:
5193 if all_info[inst].primary_node == name:
5194 live_data.update(result.payload)
5196 wrongnode_inst.add(inst)
5198 # orphan instance; we don't list it here as we don't
5199 # handle this case yet in the output of instance listing
5200 logging.warning("Orphan instance '%s' found on node %s",
5202 # else no instance is alive
5206 if query.IQ_DISKUSAGE in self.requested_data:
5207 disk_usage = dict((inst.name,
5208 _ComputeDiskSize(inst.disk_template,
5209 [{constants.IDISK_SIZE: disk.size}
5210 for disk in inst.disks]))
5211 for inst in instance_list)
5215 if query.IQ_CONSOLE in self.requested_data:
5217 for inst in instance_list:
5218 if inst.name in live_data:
5219 # Instance is running
5220 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5222 consinfo[inst.name] = None
5223 assert set(consinfo.keys()) == set(instance_names)
5227 if query.IQ_NODES in self.requested_data:
5228 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5230 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5231 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5232 for uuid in set(map(operator.attrgetter("group"),
5238 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5239 disk_usage, offline_nodes, bad_nodes,
5240 live_data, wrongnode_inst, consinfo,
5244 class LUQuery(NoHooksLU):
5245 """Query for resources/items of a certain kind.
5248 # pylint: disable=W0142
5251 def CheckArguments(self):
5252 qcls = _GetQueryImplementation(self.op.what)
5254 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5256 def ExpandNames(self):
5257 self.impl.ExpandNames(self)
5259 def DeclareLocks(self, level):
5260 self.impl.DeclareLocks(self, level)
5262 def Exec(self, feedback_fn):
5263 return self.impl.NewStyleQuery(self)
5266 class LUQueryFields(NoHooksLU):
5267 """Query for resources/items of a certain kind.
5270 # pylint: disable=W0142
5273 def CheckArguments(self):
5274 self.qcls = _GetQueryImplementation(self.op.what)
5276 def ExpandNames(self):
5277 self.needed_locks = {}
5279 def Exec(self, feedback_fn):
5280 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5283 class LUNodeModifyStorage(NoHooksLU):
5284 """Logical unit for modifying a storage volume on a node.
5289 def CheckArguments(self):
5290 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5292 storage_type = self.op.storage_type
5295 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5297 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5298 " modified" % storage_type,
5301 diff = set(self.op.changes.keys()) - modifiable
5303 raise errors.OpPrereqError("The following fields can not be modified for"
5304 " storage units of type '%s': %r" %
5305 (storage_type, list(diff)),
5308 def ExpandNames(self):
5309 self.needed_locks = {
5310 locking.LEVEL_NODE: self.op.node_name,
5313 def Exec(self, feedback_fn):
5314 """Computes the list of nodes and their attributes.
5317 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5318 result = self.rpc.call_storage_modify(self.op.node_name,
5319 self.op.storage_type, st_args,
5320 self.op.name, self.op.changes)
5321 result.Raise("Failed to modify storage unit '%s' on %s" %
5322 (self.op.name, self.op.node_name))
5325 class LUNodeAdd(LogicalUnit):
5326 """Logical unit for adding node to the cluster.
5330 HTYPE = constants.HTYPE_NODE
5331 _NFLAGS = ["master_capable", "vm_capable"]
5333 def CheckArguments(self):
5334 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5335 # validate/normalize the node name
5336 self.hostname = netutils.GetHostname(name=self.op.node_name,
5337 family=self.primary_ip_family)
5338 self.op.node_name = self.hostname.name
5340 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5341 raise errors.OpPrereqError("Cannot readd the master node",
5344 if self.op.readd and self.op.group:
5345 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5346 " being readded", errors.ECODE_INVAL)
5348 def BuildHooksEnv(self):
5351 This will run on all nodes before, and on all nodes + the new node after.
5355 "OP_TARGET": self.op.node_name,
5356 "NODE_NAME": self.op.node_name,
5357 "NODE_PIP": self.op.primary_ip,
5358 "NODE_SIP": self.op.secondary_ip,
5359 "MASTER_CAPABLE": str(self.op.master_capable),
5360 "VM_CAPABLE": str(self.op.vm_capable),
5363 def BuildHooksNodes(self):
5364 """Build hooks nodes.
5367 # Exclude added node
5368 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5369 post_nodes = pre_nodes + [self.op.node_name, ]
5371 return (pre_nodes, post_nodes)
5373 def CheckPrereq(self):
5374 """Check prerequisites.
5377 - the new node is not already in the config
5379 - its parameters (single/dual homed) matches the cluster
5381 Any errors are signaled by raising errors.OpPrereqError.
5385 hostname = self.hostname
5386 node = hostname.name
5387 primary_ip = self.op.primary_ip = hostname.ip
5388 if self.op.secondary_ip is None:
5389 if self.primary_ip_family == netutils.IP6Address.family:
5390 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5391 " IPv4 address must be given as secondary",
5393 self.op.secondary_ip = primary_ip
5395 secondary_ip = self.op.secondary_ip
5396 if not netutils.IP4Address.IsValid(secondary_ip):
5397 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5398 " address" % secondary_ip, errors.ECODE_INVAL)
5400 node_list = cfg.GetNodeList()
5401 if not self.op.readd and node in node_list:
5402 raise errors.OpPrereqError("Node %s is already in the configuration" %
5403 node, errors.ECODE_EXISTS)
5404 elif self.op.readd and node not in node_list:
5405 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5408 self.changed_primary_ip = False
5410 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5411 if self.op.readd and node == existing_node_name:
5412 if existing_node.secondary_ip != secondary_ip:
5413 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5414 " address configuration as before",
5416 if existing_node.primary_ip != primary_ip:
5417 self.changed_primary_ip = True
5421 if (existing_node.primary_ip == primary_ip or
5422 existing_node.secondary_ip == primary_ip or
5423 existing_node.primary_ip == secondary_ip or
5424 existing_node.secondary_ip == secondary_ip):
5425 raise errors.OpPrereqError("New node ip address(es) conflict with"
5426 " existing node %s" % existing_node.name,
5427 errors.ECODE_NOTUNIQUE)
5429 # After this 'if' block, None is no longer a valid value for the
5430 # _capable op attributes
5432 old_node = self.cfg.GetNodeInfo(node)
5433 assert old_node is not None, "Can't retrieve locked node %s" % node
5434 for attr in self._NFLAGS:
5435 if getattr(self.op, attr) is None:
5436 setattr(self.op, attr, getattr(old_node, attr))
5438 for attr in self._NFLAGS:
5439 if getattr(self.op, attr) is None:
5440 setattr(self.op, attr, True)
5442 if self.op.readd and not self.op.vm_capable:
5443 pri, sec = cfg.GetNodeInstances(node)
5445 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5446 " flag set to false, but it already holds"
5447 " instances" % node,
5450 # check that the type of the node (single versus dual homed) is the
5451 # same as for the master
5452 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5453 master_singlehomed = myself.secondary_ip == myself.primary_ip
5454 newbie_singlehomed = secondary_ip == primary_ip
5455 if master_singlehomed != newbie_singlehomed:
5456 if master_singlehomed:
5457 raise errors.OpPrereqError("The master has no secondary ip but the"
5458 " new node has one",
5461 raise errors.OpPrereqError("The master has a secondary ip but the"
5462 " new node doesn't have one",
5465 # checks reachability
5466 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5467 raise errors.OpPrereqError("Node not reachable by ping",
5468 errors.ECODE_ENVIRON)
5470 if not newbie_singlehomed:
5471 # check reachability from my secondary ip to newbie's secondary ip
5472 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5473 source=myself.secondary_ip):
5474 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5475 " based ping to node daemon port",
5476 errors.ECODE_ENVIRON)
5483 if self.op.master_capable:
5484 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5486 self.master_candidate = False
5489 self.new_node = old_node
5491 node_group = cfg.LookupNodeGroup(self.op.group)
5492 self.new_node = objects.Node(name=node,
5493 primary_ip=primary_ip,
5494 secondary_ip=secondary_ip,
5495 master_candidate=self.master_candidate,
5496 offline=False, drained=False,
5499 if self.op.ndparams:
5500 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5502 if self.op.hv_state:
5503 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5505 if self.op.disk_state:
5506 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5508 def Exec(self, feedback_fn):
5509 """Adds the new node to the cluster.
5512 new_node = self.new_node
5513 node = new_node.name
5515 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5518 # We adding a new node so we assume it's powered
5519 new_node.powered = True
5521 # for re-adds, reset the offline/drained/master-candidate flags;
5522 # we need to reset here, otherwise offline would prevent RPC calls
5523 # later in the procedure; this also means that if the re-add
5524 # fails, we are left with a non-offlined, broken node
5526 new_node.drained = new_node.offline = False # pylint: disable=W0201
5527 self.LogInfo("Readding a node, the offline/drained flags were reset")
5528 # if we demote the node, we do cleanup later in the procedure
5529 new_node.master_candidate = self.master_candidate
5530 if self.changed_primary_ip:
5531 new_node.primary_ip = self.op.primary_ip
5533 # copy the master/vm_capable flags
5534 for attr in self._NFLAGS:
5535 setattr(new_node, attr, getattr(self.op, attr))
5537 # notify the user about any possible mc promotion
5538 if new_node.master_candidate:
5539 self.LogInfo("Node will be a master candidate")
5541 if self.op.ndparams:
5542 new_node.ndparams = self.op.ndparams
5544 new_node.ndparams = {}
5546 if self.op.hv_state:
5547 new_node.hv_state_static = self.new_hv_state
5549 if self.op.disk_state:
5550 new_node.disk_state_static = self.new_disk_state
5552 # check connectivity
5553 result = self.rpc.call_version([node])[node]
5554 result.Raise("Can't get version information from node %s" % node)
5555 if constants.PROTOCOL_VERSION == result.payload:
5556 logging.info("Communication to node %s fine, sw version %s match",
5557 node, result.payload)
5559 raise errors.OpExecError("Version mismatch master version %s,"
5560 " node version %s" %
5561 (constants.PROTOCOL_VERSION, result.payload))
5563 # Add node to our /etc/hosts, and add key to known_hosts
5564 if self.cfg.GetClusterInfo().modify_etc_hosts:
5565 master_node = self.cfg.GetMasterNode()
5566 result = self.rpc.call_etc_hosts_modify(master_node,
5567 constants.ETC_HOSTS_ADD,
5570 result.Raise("Can't update hosts file with new host data")
5572 if new_node.secondary_ip != new_node.primary_ip:
5573 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5576 node_verify_list = [self.cfg.GetMasterNode()]
5577 node_verify_param = {
5578 constants.NV_NODELIST: ([node], {}),
5579 # TODO: do a node-net-test as well?
5582 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5583 self.cfg.GetClusterName())
5584 for verifier in node_verify_list:
5585 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5586 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5588 for failed in nl_payload:
5589 feedback_fn("ssh/hostname verification failed"
5590 " (checking from %s): %s" %
5591 (verifier, nl_payload[failed]))
5592 raise errors.OpExecError("ssh/hostname verification failed")
5595 _RedistributeAncillaryFiles(self)
5596 self.context.ReaddNode(new_node)
5597 # make sure we redistribute the config
5598 self.cfg.Update(new_node, feedback_fn)
5599 # and make sure the new node will not have old files around
5600 if not new_node.master_candidate:
5601 result = self.rpc.call_node_demote_from_mc(new_node.name)
5602 msg = result.fail_msg
5604 self.LogWarning("Node failed to demote itself from master"
5605 " candidate status: %s" % msg)
5607 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5608 additional_vm=self.op.vm_capable)
5609 self.context.AddNode(new_node, self.proc.GetECId())
5612 class LUNodeSetParams(LogicalUnit):
5613 """Modifies the parameters of a node.
5615 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5616 to the node role (as _ROLE_*)
5617 @cvar _R2F: a dictionary from node role to tuples of flags
5618 @cvar _FLAGS: a list of attribute names corresponding to the flags
5621 HPATH = "node-modify"
5622 HTYPE = constants.HTYPE_NODE
5624 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5626 (True, False, False): _ROLE_CANDIDATE,
5627 (False, True, False): _ROLE_DRAINED,
5628 (False, False, True): _ROLE_OFFLINE,
5629 (False, False, False): _ROLE_REGULAR,
5631 _R2F = dict((v, k) for k, v in _F2R.items())
5632 _FLAGS = ["master_candidate", "drained", "offline"]
5634 def CheckArguments(self):
5635 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5636 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5637 self.op.master_capable, self.op.vm_capable,
5638 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5640 if all_mods.count(None) == len(all_mods):
5641 raise errors.OpPrereqError("Please pass at least one modification",
5643 if all_mods.count(True) > 1:
5644 raise errors.OpPrereqError("Can't set the node into more than one"
5645 " state at the same time",
5648 # Boolean value that tells us whether we might be demoting from MC
5649 self.might_demote = (self.op.master_candidate == False or
5650 self.op.offline == True or
5651 self.op.drained == True or
5652 self.op.master_capable == False)
5654 if self.op.secondary_ip:
5655 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5656 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5657 " address" % self.op.secondary_ip,
5660 self.lock_all = self.op.auto_promote and self.might_demote
5661 self.lock_instances = self.op.secondary_ip is not None
5663 def _InstanceFilter(self, instance):
5664 """Filter for getting affected instances.
5667 return (instance.disk_template in constants.DTS_INT_MIRROR and
5668 self.op.node_name in instance.all_nodes)
5670 def ExpandNames(self):
5672 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5674 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5676 # Since modifying a node can have severe effects on currently running
5677 # operations the resource lock is at least acquired in shared mode
5678 self.needed_locks[locking.LEVEL_NODE_RES] = \
5679 self.needed_locks[locking.LEVEL_NODE]
5681 # Get node resource and instance locks in shared mode; they are not used
5682 # for anything but read-only access
5683 self.share_locks[locking.LEVEL_NODE_RES] = 1
5684 self.share_locks[locking.LEVEL_INSTANCE] = 1
5686 if self.lock_instances:
5687 self.needed_locks[locking.LEVEL_INSTANCE] = \
5688 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5690 def BuildHooksEnv(self):
5693 This runs on the master node.
5697 "OP_TARGET": self.op.node_name,
5698 "MASTER_CANDIDATE": str(self.op.master_candidate),
5699 "OFFLINE": str(self.op.offline),
5700 "DRAINED": str(self.op.drained),
5701 "MASTER_CAPABLE": str(self.op.master_capable),
5702 "VM_CAPABLE": str(self.op.vm_capable),
5705 def BuildHooksNodes(self):
5706 """Build hooks nodes.
5709 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5712 def CheckPrereq(self):
5713 """Check prerequisites.
5715 This only checks the instance list against the existing names.
5718 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5720 if self.lock_instances:
5721 affected_instances = \
5722 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5724 # Verify instance locks
5725 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5726 wanted_instances = frozenset(affected_instances.keys())
5727 if wanted_instances - owned_instances:
5728 raise errors.OpPrereqError("Instances affected by changing node %s's"
5729 " secondary IP address have changed since"
5730 " locks were acquired, wanted '%s', have"
5731 " '%s'; retry the operation" %
5733 utils.CommaJoin(wanted_instances),
5734 utils.CommaJoin(owned_instances)),
5737 affected_instances = None
5739 if (self.op.master_candidate is not None or
5740 self.op.drained is not None or
5741 self.op.offline is not None):
5742 # we can't change the master's node flags
5743 if self.op.node_name == self.cfg.GetMasterNode():
5744 raise errors.OpPrereqError("The master role can be changed"
5745 " only via master-failover",
5748 if self.op.master_candidate and not node.master_capable:
5749 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5750 " it a master candidate" % node.name,
5753 if self.op.vm_capable == False:
5754 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5756 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5757 " the vm_capable flag" % node.name,
5760 if node.master_candidate and self.might_demote and not self.lock_all:
5761 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5762 # check if after removing the current node, we're missing master
5764 (mc_remaining, mc_should, _) = \
5765 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5766 if mc_remaining < mc_should:
5767 raise errors.OpPrereqError("Not enough master candidates, please"
5768 " pass auto promote option to allow"
5769 " promotion", errors.ECODE_STATE)
5771 self.old_flags = old_flags = (node.master_candidate,
5772 node.drained, node.offline)
5773 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5774 self.old_role = old_role = self._F2R[old_flags]
5776 # Check for ineffective changes
5777 for attr in self._FLAGS:
5778 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5779 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5780 setattr(self.op, attr, None)
5782 # Past this point, any flag change to False means a transition
5783 # away from the respective state, as only real changes are kept
5785 # TODO: We might query the real power state if it supports OOB
5786 if _SupportsOob(self.cfg, node):
5787 if self.op.offline is False and not (node.powered or
5788 self.op.powered == True):
5789 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5790 " offline status can be reset") %
5792 elif self.op.powered is not None:
5793 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5794 " as it does not support out-of-band"
5795 " handling") % self.op.node_name)
5797 # If we're being deofflined/drained, we'll MC ourself if needed
5798 if (self.op.drained == False or self.op.offline == False or
5799 (self.op.master_capable and not node.master_capable)):
5800 if _DecideSelfPromotion(self):
5801 self.op.master_candidate = True
5802 self.LogInfo("Auto-promoting node to master candidate")
5804 # If we're no longer master capable, we'll demote ourselves from MC
5805 if self.op.master_capable == False and node.master_candidate:
5806 self.LogInfo("Demoting from master candidate")
5807 self.op.master_candidate = False
5810 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5811 if self.op.master_candidate:
5812 new_role = self._ROLE_CANDIDATE
5813 elif self.op.drained:
5814 new_role = self._ROLE_DRAINED
5815 elif self.op.offline:
5816 new_role = self._ROLE_OFFLINE
5817 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5818 # False is still in new flags, which means we're un-setting (the
5820 new_role = self._ROLE_REGULAR
5821 else: # no new flags, nothing, keep old role
5824 self.new_role = new_role
5826 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5827 # Trying to transition out of offline status
5828 # TODO: Use standard RPC runner, but make sure it works when the node is
5829 # still marked offline
5830 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5832 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5833 " to report its version: %s" %
5834 (node.name, result.fail_msg),
5837 self.LogWarning("Transitioning node from offline to online state"
5838 " without using re-add. Please make sure the node"
5841 if self.op.secondary_ip:
5842 # Ok even without locking, because this can't be changed by any LU
5843 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5844 master_singlehomed = master.secondary_ip == master.primary_ip
5845 if master_singlehomed and self.op.secondary_ip:
5846 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5847 " homed cluster", errors.ECODE_INVAL)
5849 assert not (frozenset(affected_instances) -
5850 self.owned_locks(locking.LEVEL_INSTANCE))
5853 if affected_instances:
5854 raise errors.OpPrereqError("Cannot change secondary IP address:"
5855 " offline node has instances (%s)"
5856 " configured to use it" %
5857 utils.CommaJoin(affected_instances.keys()))
5859 # On online nodes, check that no instances are running, and that
5860 # the node has the new ip and we can reach it.
5861 for instance in affected_instances.values():
5862 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5863 msg="cannot change secondary ip")
5865 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5866 if master.name != node.name:
5867 # check reachability from master secondary ip to new secondary ip
5868 if not netutils.TcpPing(self.op.secondary_ip,
5869 constants.DEFAULT_NODED_PORT,
5870 source=master.secondary_ip):
5871 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5872 " based ping to node daemon port",
5873 errors.ECODE_ENVIRON)
5875 if self.op.ndparams:
5876 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5877 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5878 self.new_ndparams = new_ndparams
5880 if self.op.hv_state:
5881 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5882 self.node.hv_state_static)
5884 if self.op.disk_state:
5885 self.new_disk_state = \
5886 _MergeAndVerifyDiskState(self.op.disk_state,
5887 self.node.disk_state_static)
5889 def Exec(self, feedback_fn):
5894 old_role = self.old_role
5895 new_role = self.new_role
5899 if self.op.ndparams:
5900 node.ndparams = self.new_ndparams
5902 if self.op.powered is not None:
5903 node.powered = self.op.powered
5905 if self.op.hv_state:
5906 node.hv_state_static = self.new_hv_state
5908 if self.op.disk_state:
5909 node.disk_state_static = self.new_disk_state
5911 for attr in ["master_capable", "vm_capable"]:
5912 val = getattr(self.op, attr)
5914 setattr(node, attr, val)
5915 result.append((attr, str(val)))
5917 if new_role != old_role:
5918 # Tell the node to demote itself, if no longer MC and not offline
5919 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5920 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5922 self.LogWarning("Node failed to demote itself: %s", msg)
5924 new_flags = self._R2F[new_role]
5925 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5927 result.append((desc, str(nf)))
5928 (node.master_candidate, node.drained, node.offline) = new_flags
5930 # we locked all nodes, we adjust the CP before updating this node
5932 _AdjustCandidatePool(self, [node.name])
5934 if self.op.secondary_ip:
5935 node.secondary_ip = self.op.secondary_ip
5936 result.append(("secondary_ip", self.op.secondary_ip))
5938 # this will trigger configuration file update, if needed
5939 self.cfg.Update(node, feedback_fn)
5941 # this will trigger job queue propagation or cleanup if the mc
5943 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5944 self.context.ReaddNode(node)
5949 class LUNodePowercycle(NoHooksLU):
5950 """Powercycles a node.
5955 def CheckArguments(self):
5956 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5957 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5958 raise errors.OpPrereqError("The node is the master and the force"
5959 " parameter was not set",
5962 def ExpandNames(self):
5963 """Locking for PowercycleNode.
5965 This is a last-resort option and shouldn't block on other
5966 jobs. Therefore, we grab no locks.
5969 self.needed_locks = {}
5971 def Exec(self, feedback_fn):
5975 result = self.rpc.call_node_powercycle(self.op.node_name,
5976 self.cfg.GetHypervisorType())
5977 result.Raise("Failed to schedule the reboot")
5978 return result.payload
5981 class LUClusterQuery(NoHooksLU):
5982 """Query cluster configuration.
5987 def ExpandNames(self):
5988 self.needed_locks = {}
5990 def Exec(self, feedback_fn):
5991 """Return cluster config.
5994 cluster = self.cfg.GetClusterInfo()
5997 # Filter just for enabled hypervisors
5998 for os_name, hv_dict in cluster.os_hvp.items():
5999 os_hvp[os_name] = {}
6000 for hv_name, hv_params in hv_dict.items():
6001 if hv_name in cluster.enabled_hypervisors:
6002 os_hvp[os_name][hv_name] = hv_params
6004 # Convert ip_family to ip_version
6005 primary_ip_version = constants.IP4_VERSION
6006 if cluster.primary_ip_family == netutils.IP6Address.family:
6007 primary_ip_version = constants.IP6_VERSION
6010 "software_version": constants.RELEASE_VERSION,
6011 "protocol_version": constants.PROTOCOL_VERSION,
6012 "config_version": constants.CONFIG_VERSION,
6013 "os_api_version": max(constants.OS_API_VERSIONS),
6014 "export_version": constants.EXPORT_VERSION,
6015 "architecture": (platform.architecture()[0], platform.machine()),
6016 "name": cluster.cluster_name,
6017 "master": cluster.master_node,
6018 "default_hypervisor": cluster.primary_hypervisor,
6019 "enabled_hypervisors": cluster.enabled_hypervisors,
6020 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6021 for hypervisor_name in cluster.enabled_hypervisors]),
6023 "beparams": cluster.beparams,
6024 "osparams": cluster.osparams,
6025 "ipolicy": cluster.ipolicy,
6026 "nicparams": cluster.nicparams,
6027 "ndparams": cluster.ndparams,
6028 "candidate_pool_size": cluster.candidate_pool_size,
6029 "master_netdev": cluster.master_netdev,
6030 "master_netmask": cluster.master_netmask,
6031 "use_external_mip_script": cluster.use_external_mip_script,
6032 "volume_group_name": cluster.volume_group_name,
6033 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6034 "file_storage_dir": cluster.file_storage_dir,
6035 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6036 "maintain_node_health": cluster.maintain_node_health,
6037 "ctime": cluster.ctime,
6038 "mtime": cluster.mtime,
6039 "uuid": cluster.uuid,
6040 "tags": list(cluster.GetTags()),
6041 "uid_pool": cluster.uid_pool,
6042 "default_iallocator": cluster.default_iallocator,
6043 "reserved_lvs": cluster.reserved_lvs,
6044 "primary_ip_version": primary_ip_version,
6045 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6046 "hidden_os": cluster.hidden_os,
6047 "blacklisted_os": cluster.blacklisted_os,
6053 class LUClusterConfigQuery(NoHooksLU):
6054 """Return configuration values.
6058 _FIELDS_DYNAMIC = utils.FieldSet()
6059 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6060 "watcher_pause", "volume_group_name")
6062 def CheckArguments(self):
6063 _CheckOutputFields(static=self._FIELDS_STATIC,
6064 dynamic=self._FIELDS_DYNAMIC,
6065 selected=self.op.output_fields)
6067 def ExpandNames(self):
6068 self.needed_locks = {}
6070 def Exec(self, feedback_fn):
6071 """Dump a representation of the cluster config to the standard output.
6075 for field in self.op.output_fields:
6076 if field == "cluster_name":
6077 entry = self.cfg.GetClusterName()
6078 elif field == "master_node":
6079 entry = self.cfg.GetMasterNode()
6080 elif field == "drain_flag":
6081 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6082 elif field == "watcher_pause":
6083 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6084 elif field == "volume_group_name":
6085 entry = self.cfg.GetVGName()
6087 raise errors.ParameterError(field)
6088 values.append(entry)
6092 class LUInstanceActivateDisks(NoHooksLU):
6093 """Bring up an instance's disks.
6098 def ExpandNames(self):
6099 self._ExpandAndLockInstance()
6100 self.needed_locks[locking.LEVEL_NODE] = []
6101 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6103 def DeclareLocks(self, level):
6104 if level == locking.LEVEL_NODE:
6105 self._LockInstancesNodes()
6107 def CheckPrereq(self):
6108 """Check prerequisites.
6110 This checks that the instance is in the cluster.
6113 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6114 assert self.instance is not None, \
6115 "Cannot retrieve locked instance %s" % self.op.instance_name
6116 _CheckNodeOnline(self, self.instance.primary_node)
6118 def Exec(self, feedback_fn):
6119 """Activate the disks.
6122 disks_ok, disks_info = \
6123 _AssembleInstanceDisks(self, self.instance,
6124 ignore_size=self.op.ignore_size)
6126 raise errors.OpExecError("Cannot activate block devices")
6131 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6133 """Prepare the block devices for an instance.
6135 This sets up the block devices on all nodes.
6137 @type lu: L{LogicalUnit}
6138 @param lu: the logical unit on whose behalf we execute
6139 @type instance: L{objects.Instance}
6140 @param instance: the instance for whose disks we assemble
6141 @type disks: list of L{objects.Disk} or None
6142 @param disks: which disks to assemble (or all, if None)
6143 @type ignore_secondaries: boolean
6144 @param ignore_secondaries: if true, errors on secondary nodes
6145 won't result in an error return from the function
6146 @type ignore_size: boolean
6147 @param ignore_size: if true, the current known size of the disk
6148 will not be used during the disk activation, useful for cases
6149 when the size is wrong
6150 @return: False if the operation failed, otherwise a list of
6151 (host, instance_visible_name, node_visible_name)
6152 with the mapping from node devices to instance devices
6157 iname = instance.name
6158 disks = _ExpandCheckDisks(instance, disks)
6160 # With the two passes mechanism we try to reduce the window of
6161 # opportunity for the race condition of switching DRBD to primary
6162 # before handshaking occured, but we do not eliminate it
6164 # The proper fix would be to wait (with some limits) until the
6165 # connection has been made and drbd transitions from WFConnection
6166 # into any other network-connected state (Connected, SyncTarget,
6169 # 1st pass, assemble on all nodes in secondary mode
6170 for idx, inst_disk in enumerate(disks):
6171 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6173 node_disk = node_disk.Copy()
6174 node_disk.UnsetSize()
6175 lu.cfg.SetDiskID(node_disk, node)
6176 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6177 msg = result.fail_msg
6179 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6180 " (is_primary=False, pass=1): %s",
6181 inst_disk.iv_name, node, msg)
6182 if not ignore_secondaries:
6185 # FIXME: race condition on drbd migration to primary
6187 # 2nd pass, do only the primary node
6188 for idx, inst_disk in enumerate(disks):
6191 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6192 if node != instance.primary_node:
6195 node_disk = node_disk.Copy()
6196 node_disk.UnsetSize()
6197 lu.cfg.SetDiskID(node_disk, node)
6198 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6199 msg = result.fail_msg
6201 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6202 " (is_primary=True, pass=2): %s",
6203 inst_disk.iv_name, node, msg)
6206 dev_path = result.payload
6208 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6210 # leave the disks configured for the primary node
6211 # this is a workaround that would be fixed better by
6212 # improving the logical/physical id handling
6214 lu.cfg.SetDiskID(disk, instance.primary_node)
6216 return disks_ok, device_info
6219 def _StartInstanceDisks(lu, instance, force):
6220 """Start the disks of an instance.
6223 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6224 ignore_secondaries=force)
6226 _ShutdownInstanceDisks(lu, instance)
6227 if force is not None and not force:
6228 lu.proc.LogWarning("", hint="If the message above refers to a"
6230 " you can retry the operation using '--force'.")
6231 raise errors.OpExecError("Disk consistency error")
6234 class LUInstanceDeactivateDisks(NoHooksLU):
6235 """Shutdown an instance's disks.
6240 def ExpandNames(self):
6241 self._ExpandAndLockInstance()
6242 self.needed_locks[locking.LEVEL_NODE] = []
6243 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6245 def DeclareLocks(self, level):
6246 if level == locking.LEVEL_NODE:
6247 self._LockInstancesNodes()
6249 def CheckPrereq(self):
6250 """Check prerequisites.
6252 This checks that the instance is in the cluster.
6255 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6256 assert self.instance is not None, \
6257 "Cannot retrieve locked instance %s" % self.op.instance_name
6259 def Exec(self, feedback_fn):
6260 """Deactivate the disks
6263 instance = self.instance
6265 _ShutdownInstanceDisks(self, instance)
6267 _SafeShutdownInstanceDisks(self, instance)
6270 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6271 """Shutdown block devices of an instance.
6273 This function checks if an instance is running, before calling
6274 _ShutdownInstanceDisks.
6277 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6278 _ShutdownInstanceDisks(lu, instance, disks=disks)
6281 def _ExpandCheckDisks(instance, disks):
6282 """Return the instance disks selected by the disks list
6284 @type disks: list of L{objects.Disk} or None
6285 @param disks: selected disks
6286 @rtype: list of L{objects.Disk}
6287 @return: selected instance disks to act on
6291 return instance.disks
6293 if not set(disks).issubset(instance.disks):
6294 raise errors.ProgrammerError("Can only act on disks belonging to the"
6299 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6300 """Shutdown block devices of an instance.
6302 This does the shutdown on all nodes of the instance.
6304 If the ignore_primary is false, errors on the primary node are
6309 disks = _ExpandCheckDisks(instance, disks)
6312 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6313 lu.cfg.SetDiskID(top_disk, node)
6314 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6315 msg = result.fail_msg
6317 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6318 disk.iv_name, node, msg)
6319 if ((node == instance.primary_node and not ignore_primary) or
6320 (node != instance.primary_node and not result.offline)):
6325 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6326 """Checks if a node has enough free memory.
6328 This function check if a given node has the needed amount of free
6329 memory. In case the node has less memory or we cannot get the
6330 information from the node, this function raise an OpPrereqError
6333 @type lu: C{LogicalUnit}
6334 @param lu: a logical unit from which we get configuration data
6336 @param node: the node to check
6337 @type reason: C{str}
6338 @param reason: string to use in the error message
6339 @type requested: C{int}
6340 @param requested: the amount of memory in MiB to check for
6341 @type hypervisor_name: C{str}
6342 @param hypervisor_name: the hypervisor to ask for memory stats
6343 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6344 we cannot check the node
6347 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6348 nodeinfo[node].Raise("Can't get data from node %s" % node,
6349 prereq=True, ecode=errors.ECODE_ENVIRON)
6350 (_, _, (hv_info, )) = nodeinfo[node].payload
6352 free_mem = hv_info.get("memory_free", None)
6353 if not isinstance(free_mem, int):
6354 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6355 " was '%s'" % (node, free_mem),
6356 errors.ECODE_ENVIRON)
6357 if requested > free_mem:
6358 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6359 " needed %s MiB, available %s MiB" %
6360 (node, reason, requested, free_mem),
6364 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6365 """Checks if nodes have enough free disk space in the all VGs.
6367 This function check if all given nodes have the needed amount of
6368 free disk. In case any node has less disk or we cannot get the
6369 information from the node, this function raise an OpPrereqError
6372 @type lu: C{LogicalUnit}
6373 @param lu: a logical unit from which we get configuration data
6374 @type nodenames: C{list}
6375 @param nodenames: the list of node names to check
6376 @type req_sizes: C{dict}
6377 @param req_sizes: the hash of vg and corresponding amount of disk in
6379 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6380 or we cannot check the node
6383 for vg, req_size in req_sizes.items():
6384 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6387 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6388 """Checks if nodes have enough free disk space in the specified VG.
6390 This function check if all given nodes have the needed amount of
6391 free disk. In case any node has less disk or we cannot get the
6392 information from the node, this function raise an OpPrereqError
6395 @type lu: C{LogicalUnit}
6396 @param lu: a logical unit from which we get configuration data
6397 @type nodenames: C{list}
6398 @param nodenames: the list of node names to check
6400 @param vg: the volume group to check
6401 @type requested: C{int}
6402 @param requested: the amount of disk in MiB to check for
6403 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6404 or we cannot check the node
6407 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6408 for node in nodenames:
6409 info = nodeinfo[node]
6410 info.Raise("Cannot get current information from node %s" % node,
6411 prereq=True, ecode=errors.ECODE_ENVIRON)
6412 (_, (vg_info, ), _) = info.payload
6413 vg_free = vg_info.get("vg_free", None)
6414 if not isinstance(vg_free, int):
6415 raise errors.OpPrereqError("Can't compute free disk space on node"
6416 " %s for vg %s, result was '%s'" %
6417 (node, vg, vg_free), errors.ECODE_ENVIRON)
6418 if requested > vg_free:
6419 raise errors.OpPrereqError("Not enough disk space on target node %s"
6420 " vg %s: required %d MiB, available %d MiB" %
6421 (node, vg, requested, vg_free),
6425 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6426 """Checks if nodes have enough physical CPUs
6428 This function checks if all given nodes have the needed number of
6429 physical CPUs. In case any node has less CPUs or we cannot get the
6430 information from the node, this function raises an OpPrereqError
6433 @type lu: C{LogicalUnit}
6434 @param lu: a logical unit from which we get configuration data
6435 @type nodenames: C{list}
6436 @param nodenames: the list of node names to check
6437 @type requested: C{int}
6438 @param requested: the minimum acceptable number of physical CPUs
6439 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6440 or we cannot check the node
6443 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6444 for node in nodenames:
6445 info = nodeinfo[node]
6446 info.Raise("Cannot get current information from node %s" % node,
6447 prereq=True, ecode=errors.ECODE_ENVIRON)
6448 (_, _, (hv_info, )) = info.payload
6449 num_cpus = hv_info.get("cpu_total", None)
6450 if not isinstance(num_cpus, int):
6451 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6452 " on node %s, result was '%s'" %
6453 (node, num_cpus), errors.ECODE_ENVIRON)
6454 if requested > num_cpus:
6455 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6456 "required" % (node, num_cpus, requested),
6460 class LUInstanceStartup(LogicalUnit):
6461 """Starts an instance.
6464 HPATH = "instance-start"
6465 HTYPE = constants.HTYPE_INSTANCE
6468 def CheckArguments(self):
6470 if self.op.beparams:
6471 # fill the beparams dict
6472 objects.UpgradeBeParams(self.op.beparams)
6473 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6475 def ExpandNames(self):
6476 self._ExpandAndLockInstance()
6478 def BuildHooksEnv(self):
6481 This runs on master, primary and secondary nodes of the instance.
6485 "FORCE": self.op.force,
6488 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6492 def BuildHooksNodes(self):
6493 """Build hooks nodes.
6496 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6499 def CheckPrereq(self):
6500 """Check prerequisites.
6502 This checks that the instance is in the cluster.
6505 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6506 assert self.instance is not None, \
6507 "Cannot retrieve locked instance %s" % self.op.instance_name
6510 if self.op.hvparams:
6511 # check hypervisor parameter syntax (locally)
6512 cluster = self.cfg.GetClusterInfo()
6513 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6514 filled_hvp = cluster.FillHV(instance)
6515 filled_hvp.update(self.op.hvparams)
6516 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6517 hv_type.CheckParameterSyntax(filled_hvp)
6518 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6520 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6522 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6524 if self.primary_offline and self.op.ignore_offline_nodes:
6525 self.proc.LogWarning("Ignoring offline primary node")
6527 if self.op.hvparams or self.op.beparams:
6528 self.proc.LogWarning("Overridden parameters are ignored")
6530 _CheckNodeOnline(self, instance.primary_node)
6532 bep = self.cfg.GetClusterInfo().FillBE(instance)
6534 # check bridges existence
6535 _CheckInstanceBridgesExist(self, instance)
6537 remote_info = self.rpc.call_instance_info(instance.primary_node,
6539 instance.hypervisor)
6540 remote_info.Raise("Error checking node %s" % instance.primary_node,
6541 prereq=True, ecode=errors.ECODE_ENVIRON)
6542 if not remote_info.payload: # not running already
6543 _CheckNodeFreeMemory(self, instance.primary_node,
6544 "starting instance %s" % instance.name,
6545 bep[constants.BE_MAXMEM], instance.hypervisor)
6547 def Exec(self, feedback_fn):
6548 """Start the instance.
6551 instance = self.instance
6552 force = self.op.force
6554 if not self.op.no_remember:
6555 self.cfg.MarkInstanceUp(instance.name)
6557 if self.primary_offline:
6558 assert self.op.ignore_offline_nodes
6559 self.proc.LogInfo("Primary node offline, marked instance as started")
6561 node_current = instance.primary_node
6563 _StartInstanceDisks(self, instance, force)
6566 self.rpc.call_instance_start(node_current,
6567 (instance, self.op.hvparams,
6569 self.op.startup_paused)
6570 msg = result.fail_msg
6572 _ShutdownInstanceDisks(self, instance)
6573 raise errors.OpExecError("Could not start instance: %s" % msg)
6576 class LUInstanceReboot(LogicalUnit):
6577 """Reboot an instance.
6580 HPATH = "instance-reboot"
6581 HTYPE = constants.HTYPE_INSTANCE
6584 def ExpandNames(self):
6585 self._ExpandAndLockInstance()
6587 def BuildHooksEnv(self):
6590 This runs on master, primary and secondary nodes of the instance.
6594 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6595 "REBOOT_TYPE": self.op.reboot_type,
6596 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6599 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6603 def BuildHooksNodes(self):
6604 """Build hooks nodes.
6607 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6610 def CheckPrereq(self):
6611 """Check prerequisites.
6613 This checks that the instance is in the cluster.
6616 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6617 assert self.instance is not None, \
6618 "Cannot retrieve locked instance %s" % self.op.instance_name
6619 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6620 _CheckNodeOnline(self, instance.primary_node)
6622 # check bridges existence
6623 _CheckInstanceBridgesExist(self, instance)
6625 def Exec(self, feedback_fn):
6626 """Reboot the instance.
6629 instance = self.instance
6630 ignore_secondaries = self.op.ignore_secondaries
6631 reboot_type = self.op.reboot_type
6633 remote_info = self.rpc.call_instance_info(instance.primary_node,
6635 instance.hypervisor)
6636 remote_info.Raise("Error checking node %s" % instance.primary_node)
6637 instance_running = bool(remote_info.payload)
6639 node_current = instance.primary_node
6641 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6642 constants.INSTANCE_REBOOT_HARD]:
6643 for disk in instance.disks:
6644 self.cfg.SetDiskID(disk, node_current)
6645 result = self.rpc.call_instance_reboot(node_current, instance,
6647 self.op.shutdown_timeout)
6648 result.Raise("Could not reboot instance")
6650 if instance_running:
6651 result = self.rpc.call_instance_shutdown(node_current, instance,
6652 self.op.shutdown_timeout)
6653 result.Raise("Could not shutdown instance for full reboot")
6654 _ShutdownInstanceDisks(self, instance)
6656 self.LogInfo("Instance %s was already stopped, starting now",
6658 _StartInstanceDisks(self, instance, ignore_secondaries)
6659 result = self.rpc.call_instance_start(node_current,
6660 (instance, None, None), False)
6661 msg = result.fail_msg
6663 _ShutdownInstanceDisks(self, instance)
6664 raise errors.OpExecError("Could not start instance for"
6665 " full reboot: %s" % msg)
6667 self.cfg.MarkInstanceUp(instance.name)
6670 class LUInstanceShutdown(LogicalUnit):
6671 """Shutdown an instance.
6674 HPATH = "instance-stop"
6675 HTYPE = constants.HTYPE_INSTANCE
6678 def ExpandNames(self):
6679 self._ExpandAndLockInstance()
6681 def BuildHooksEnv(self):
6684 This runs on master, primary and secondary nodes of the instance.
6687 env = _BuildInstanceHookEnvByObject(self, self.instance)
6688 env["TIMEOUT"] = self.op.timeout
6691 def BuildHooksNodes(self):
6692 """Build hooks nodes.
6695 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6698 def CheckPrereq(self):
6699 """Check prerequisites.
6701 This checks that the instance is in the cluster.
6704 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6705 assert self.instance is not None, \
6706 "Cannot retrieve locked instance %s" % self.op.instance_name
6708 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6710 self.primary_offline = \
6711 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6713 if self.primary_offline and self.op.ignore_offline_nodes:
6714 self.proc.LogWarning("Ignoring offline primary node")
6716 _CheckNodeOnline(self, self.instance.primary_node)
6718 def Exec(self, feedback_fn):
6719 """Shutdown the instance.
6722 instance = self.instance
6723 node_current = instance.primary_node
6724 timeout = self.op.timeout
6726 if not self.op.no_remember:
6727 self.cfg.MarkInstanceDown(instance.name)
6729 if self.primary_offline:
6730 assert self.op.ignore_offline_nodes
6731 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6733 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6734 msg = result.fail_msg
6736 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6738 _ShutdownInstanceDisks(self, instance)
6741 class LUInstanceReinstall(LogicalUnit):
6742 """Reinstall an instance.
6745 HPATH = "instance-reinstall"
6746 HTYPE = constants.HTYPE_INSTANCE
6749 def ExpandNames(self):
6750 self._ExpandAndLockInstance()
6752 def BuildHooksEnv(self):
6755 This runs on master, primary and secondary nodes of the instance.
6758 return _BuildInstanceHookEnvByObject(self, self.instance)
6760 def BuildHooksNodes(self):
6761 """Build hooks nodes.
6764 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6767 def CheckPrereq(self):
6768 """Check prerequisites.
6770 This checks that the instance is in the cluster and is not running.
6773 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6774 assert instance is not None, \
6775 "Cannot retrieve locked instance %s" % self.op.instance_name
6776 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6777 " offline, cannot reinstall")
6778 for node in instance.secondary_nodes:
6779 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6780 " cannot reinstall")
6782 if instance.disk_template == constants.DT_DISKLESS:
6783 raise errors.OpPrereqError("Instance '%s' has no disks" %
6784 self.op.instance_name,
6786 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6788 if self.op.os_type is not None:
6790 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6791 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6792 instance_os = self.op.os_type
6794 instance_os = instance.os
6796 nodelist = list(instance.all_nodes)
6798 if self.op.osparams:
6799 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6800 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6801 self.os_inst = i_osdict # the new dict (without defaults)
6805 self.instance = instance
6807 def Exec(self, feedback_fn):
6808 """Reinstall the instance.
6811 inst = self.instance
6813 if self.op.os_type is not None:
6814 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6815 inst.os = self.op.os_type
6816 # Write to configuration
6817 self.cfg.Update(inst, feedback_fn)
6819 _StartInstanceDisks(self, inst, None)
6821 feedback_fn("Running the instance OS create scripts...")
6822 # FIXME: pass debug option from opcode to backend
6823 result = self.rpc.call_instance_os_add(inst.primary_node,
6824 (inst, self.os_inst), True,
6825 self.op.debug_level)
6826 result.Raise("Could not install OS for instance %s on node %s" %
6827 (inst.name, inst.primary_node))
6829 _ShutdownInstanceDisks(self, inst)
6832 class LUInstanceRecreateDisks(LogicalUnit):
6833 """Recreate an instance's missing disks.
6836 HPATH = "instance-recreate-disks"
6837 HTYPE = constants.HTYPE_INSTANCE
6840 def CheckArguments(self):
6841 # normalise the disk list
6842 self.op.disks = sorted(frozenset(self.op.disks))
6844 def ExpandNames(self):
6845 self._ExpandAndLockInstance()
6846 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6848 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6849 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6851 self.needed_locks[locking.LEVEL_NODE] = []
6853 def DeclareLocks(self, level):
6854 if level == locking.LEVEL_NODE:
6855 # if we replace the nodes, we only need to lock the old primary,
6856 # otherwise we need to lock all nodes for disk re-creation
6857 primary_only = bool(self.op.nodes)
6858 self._LockInstancesNodes(primary_only=primary_only)
6859 elif level == locking.LEVEL_NODE_RES:
6861 self.needed_locks[locking.LEVEL_NODE_RES] = \
6862 self.needed_locks[locking.LEVEL_NODE][:]
6864 def BuildHooksEnv(self):
6867 This runs on master, primary and secondary nodes of the instance.
6870 return _BuildInstanceHookEnvByObject(self, self.instance)
6872 def BuildHooksNodes(self):
6873 """Build hooks nodes.
6876 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6879 def CheckPrereq(self):
6880 """Check prerequisites.
6882 This checks that the instance is in the cluster and is not running.
6885 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6886 assert instance is not None, \
6887 "Cannot retrieve locked instance %s" % self.op.instance_name
6889 if len(self.op.nodes) != len(instance.all_nodes):
6890 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6891 " %d replacement nodes were specified" %
6892 (instance.name, len(instance.all_nodes),
6893 len(self.op.nodes)),
6895 assert instance.disk_template != constants.DT_DRBD8 or \
6896 len(self.op.nodes) == 2
6897 assert instance.disk_template != constants.DT_PLAIN or \
6898 len(self.op.nodes) == 1
6899 primary_node = self.op.nodes[0]
6901 primary_node = instance.primary_node
6902 _CheckNodeOnline(self, primary_node)
6904 if instance.disk_template == constants.DT_DISKLESS:
6905 raise errors.OpPrereqError("Instance '%s' has no disks" %
6906 self.op.instance_name, errors.ECODE_INVAL)
6907 # if we replace nodes *and* the old primary is offline, we don't
6909 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6910 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6911 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6912 if not (self.op.nodes and old_pnode.offline):
6913 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6914 msg="cannot recreate disks")
6916 if not self.op.disks:
6917 self.op.disks = range(len(instance.disks))
6919 for idx in self.op.disks:
6920 if idx >= len(instance.disks):
6921 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6923 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6924 raise errors.OpPrereqError("Can't recreate disks partially and"
6925 " change the nodes at the same time",
6927 self.instance = instance
6929 def Exec(self, feedback_fn):
6930 """Recreate the disks.
6933 instance = self.instance
6935 assert (self.owned_locks(locking.LEVEL_NODE) ==
6936 self.owned_locks(locking.LEVEL_NODE_RES))
6939 mods = [] # keeps track of needed logical_id changes
6941 for idx, disk in enumerate(instance.disks):
6942 if idx not in self.op.disks: # disk idx has not been passed in
6945 # update secondaries for disks, if needed
6947 if disk.dev_type == constants.LD_DRBD8:
6948 # need to update the nodes and minors
6949 assert len(self.op.nodes) == 2
6950 assert len(disk.logical_id) == 6 # otherwise disk internals
6952 (_, _, old_port, _, _, old_secret) = disk.logical_id
6953 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6954 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6955 new_minors[0], new_minors[1], old_secret)
6956 assert len(disk.logical_id) == len(new_id)
6957 mods.append((idx, new_id))
6959 # now that we have passed all asserts above, we can apply the mods
6960 # in a single run (to avoid partial changes)
6961 for idx, new_id in mods:
6962 instance.disks[idx].logical_id = new_id
6964 # change primary node, if needed
6966 instance.primary_node = self.op.nodes[0]
6967 self.LogWarning("Changing the instance's nodes, you will have to"
6968 " remove any disks left on the older nodes manually")
6971 self.cfg.Update(instance, feedback_fn)
6973 _CreateDisks(self, instance, to_skip=to_skip)
6976 class LUInstanceRename(LogicalUnit):
6977 """Rename an instance.
6980 HPATH = "instance-rename"
6981 HTYPE = constants.HTYPE_INSTANCE
6983 def CheckArguments(self):
6987 if self.op.ip_check and not self.op.name_check:
6988 # TODO: make the ip check more flexible and not depend on the name check
6989 raise errors.OpPrereqError("IP address check requires a name check",
6992 def BuildHooksEnv(self):
6995 This runs on master, primary and secondary nodes of the instance.
6998 env = _BuildInstanceHookEnvByObject(self, self.instance)
6999 env["INSTANCE_NEW_NAME"] = self.op.new_name
7002 def BuildHooksNodes(self):
7003 """Build hooks nodes.
7006 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7009 def CheckPrereq(self):
7010 """Check prerequisites.
7012 This checks that the instance is in the cluster and is not running.
7015 self.op.instance_name = _ExpandInstanceName(self.cfg,
7016 self.op.instance_name)
7017 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7018 assert instance is not None
7019 _CheckNodeOnline(self, instance.primary_node)
7020 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7021 msg="cannot rename")
7022 self.instance = instance
7024 new_name = self.op.new_name
7025 if self.op.name_check:
7026 hostname = netutils.GetHostname(name=new_name)
7027 if hostname.name != new_name:
7028 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7030 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7031 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7032 " same as given hostname '%s'") %
7033 (hostname.name, self.op.new_name),
7035 new_name = self.op.new_name = hostname.name
7036 if (self.op.ip_check and
7037 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7038 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7039 (hostname.ip, new_name),
7040 errors.ECODE_NOTUNIQUE)
7042 instance_list = self.cfg.GetInstanceList()
7043 if new_name in instance_list and new_name != instance.name:
7044 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7045 new_name, errors.ECODE_EXISTS)
7047 def Exec(self, feedback_fn):
7048 """Rename the instance.
7051 inst = self.instance
7052 old_name = inst.name
7054 rename_file_storage = False
7055 if (inst.disk_template in constants.DTS_FILEBASED and
7056 self.op.new_name != inst.name):
7057 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7058 rename_file_storage = True
7060 self.cfg.RenameInstance(inst.name, self.op.new_name)
7061 # Change the instance lock. This is definitely safe while we hold the BGL.
7062 # Otherwise the new lock would have to be added in acquired mode.
7064 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7065 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7067 # re-read the instance from the configuration after rename
7068 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7070 if rename_file_storage:
7071 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7072 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7073 old_file_storage_dir,
7074 new_file_storage_dir)
7075 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7076 " (but the instance has been renamed in Ganeti)" %
7077 (inst.primary_node, old_file_storage_dir,
7078 new_file_storage_dir))
7080 _StartInstanceDisks(self, inst, None)
7082 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7083 old_name, self.op.debug_level)
7084 msg = result.fail_msg
7086 msg = ("Could not run OS rename script for instance %s on node %s"
7087 " (but the instance has been renamed in Ganeti): %s" %
7088 (inst.name, inst.primary_node, msg))
7089 self.proc.LogWarning(msg)
7091 _ShutdownInstanceDisks(self, inst)
7096 class LUInstanceRemove(LogicalUnit):
7097 """Remove an instance.
7100 HPATH = "instance-remove"
7101 HTYPE = constants.HTYPE_INSTANCE
7104 def ExpandNames(self):
7105 self._ExpandAndLockInstance()
7106 self.needed_locks[locking.LEVEL_NODE] = []
7107 self.needed_locks[locking.LEVEL_NODE_RES] = []
7108 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7110 def DeclareLocks(self, level):
7111 if level == locking.LEVEL_NODE:
7112 self._LockInstancesNodes()
7113 elif level == locking.LEVEL_NODE_RES:
7115 self.needed_locks[locking.LEVEL_NODE_RES] = \
7116 self.needed_locks[locking.LEVEL_NODE][:]
7118 def BuildHooksEnv(self):
7121 This runs on master, primary and secondary nodes of the instance.
7124 env = _BuildInstanceHookEnvByObject(self, self.instance)
7125 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7128 def BuildHooksNodes(self):
7129 """Build hooks nodes.
7132 nl = [self.cfg.GetMasterNode()]
7133 nl_post = list(self.instance.all_nodes) + nl
7134 return (nl, nl_post)
7136 def CheckPrereq(self):
7137 """Check prerequisites.
7139 This checks that the instance is in the cluster.
7142 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7143 assert self.instance is not None, \
7144 "Cannot retrieve locked instance %s" % self.op.instance_name
7146 def Exec(self, feedback_fn):
7147 """Remove the instance.
7150 instance = self.instance
7151 logging.info("Shutting down instance %s on node %s",
7152 instance.name, instance.primary_node)
7154 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7155 self.op.shutdown_timeout)
7156 msg = result.fail_msg
7158 if self.op.ignore_failures:
7159 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7161 raise errors.OpExecError("Could not shutdown instance %s on"
7163 (instance.name, instance.primary_node, msg))
7165 assert (self.owned_locks(locking.LEVEL_NODE) ==
7166 self.owned_locks(locking.LEVEL_NODE_RES))
7167 assert not (set(instance.all_nodes) -
7168 self.owned_locks(locking.LEVEL_NODE)), \
7169 "Not owning correct locks"
7171 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7174 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7175 """Utility function to remove an instance.
7178 logging.info("Removing block devices for instance %s", instance.name)
7180 if not _RemoveDisks(lu, instance):
7181 if not ignore_failures:
7182 raise errors.OpExecError("Can't remove instance's disks")
7183 feedback_fn("Warning: can't remove instance's disks")
7185 logging.info("Removing instance %s out of cluster config", instance.name)
7187 lu.cfg.RemoveInstance(instance.name)
7189 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7190 "Instance lock removal conflict"
7192 # Remove lock for the instance
7193 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7196 class LUInstanceQuery(NoHooksLU):
7197 """Logical unit for querying instances.
7200 # pylint: disable=W0142
7203 def CheckArguments(self):
7204 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7205 self.op.output_fields, self.op.use_locking)
7207 def ExpandNames(self):
7208 self.iq.ExpandNames(self)
7210 def DeclareLocks(self, level):
7211 self.iq.DeclareLocks(self, level)
7213 def Exec(self, feedback_fn):
7214 return self.iq.OldStyleQuery(self)
7217 class LUInstanceFailover(LogicalUnit):
7218 """Failover an instance.
7221 HPATH = "instance-failover"
7222 HTYPE = constants.HTYPE_INSTANCE
7225 def CheckArguments(self):
7226 """Check the arguments.
7229 self.iallocator = getattr(self.op, "iallocator", None)
7230 self.target_node = getattr(self.op, "target_node", None)
7232 def ExpandNames(self):
7233 self._ExpandAndLockInstance()
7235 if self.op.target_node is not None:
7236 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7238 self.needed_locks[locking.LEVEL_NODE] = []
7239 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7241 ignore_consistency = self.op.ignore_consistency
7242 shutdown_timeout = self.op.shutdown_timeout
7243 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7246 ignore_consistency=ignore_consistency,
7247 shutdown_timeout=shutdown_timeout,
7248 ignore_ipolicy=self.op.ignore_ipolicy)
7249 self.tasklets = [self._migrater]
7251 def DeclareLocks(self, level):
7252 if level == locking.LEVEL_NODE:
7253 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7254 if instance.disk_template in constants.DTS_EXT_MIRROR:
7255 if self.op.target_node is None:
7256 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7258 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7259 self.op.target_node]
7260 del self.recalculate_locks[locking.LEVEL_NODE]
7262 self._LockInstancesNodes()
7264 def BuildHooksEnv(self):
7267 This runs on master, primary and secondary nodes of the instance.
7270 instance = self._migrater.instance
7271 source_node = instance.primary_node
7272 target_node = self.op.target_node
7274 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7275 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7276 "OLD_PRIMARY": source_node,
7277 "NEW_PRIMARY": target_node,
7280 if instance.disk_template in constants.DTS_INT_MIRROR:
7281 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7282 env["NEW_SECONDARY"] = source_node
7284 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7286 env.update(_BuildInstanceHookEnvByObject(self, instance))
7290 def BuildHooksNodes(self):
7291 """Build hooks nodes.
7294 instance = self._migrater.instance
7295 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7296 return (nl, nl + [instance.primary_node])
7299 class LUInstanceMigrate(LogicalUnit):
7300 """Migrate an instance.
7302 This is migration without shutting down, compared to the failover,
7303 which is done with shutdown.
7306 HPATH = "instance-migrate"
7307 HTYPE = constants.HTYPE_INSTANCE
7310 def ExpandNames(self):
7311 self._ExpandAndLockInstance()
7313 if self.op.target_node is not None:
7314 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7316 self.needed_locks[locking.LEVEL_NODE] = []
7317 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7319 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7320 cleanup=self.op.cleanup,
7322 fallback=self.op.allow_failover,
7323 ignore_ipolicy=self.op.ignore_ipolicy)
7324 self.tasklets = [self._migrater]
7326 def DeclareLocks(self, level):
7327 if level == locking.LEVEL_NODE:
7328 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7329 if instance.disk_template in constants.DTS_EXT_MIRROR:
7330 if self.op.target_node is None:
7331 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7333 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7334 self.op.target_node]
7335 del self.recalculate_locks[locking.LEVEL_NODE]
7337 self._LockInstancesNodes()
7339 def BuildHooksEnv(self):
7342 This runs on master, primary and secondary nodes of the instance.
7345 instance = self._migrater.instance
7346 source_node = instance.primary_node
7347 target_node = self.op.target_node
7348 env = _BuildInstanceHookEnvByObject(self, instance)
7350 "MIGRATE_LIVE": self._migrater.live,
7351 "MIGRATE_CLEANUP": self.op.cleanup,
7352 "OLD_PRIMARY": source_node,
7353 "NEW_PRIMARY": target_node,
7356 if instance.disk_template in constants.DTS_INT_MIRROR:
7357 env["OLD_SECONDARY"] = target_node
7358 env["NEW_SECONDARY"] = source_node
7360 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7364 def BuildHooksNodes(self):
7365 """Build hooks nodes.
7368 instance = self._migrater.instance
7369 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7370 return (nl, nl + [instance.primary_node])
7373 class LUInstanceMove(LogicalUnit):
7374 """Move an instance by data-copying.
7377 HPATH = "instance-move"
7378 HTYPE = constants.HTYPE_INSTANCE
7381 def ExpandNames(self):
7382 self._ExpandAndLockInstance()
7383 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7384 self.op.target_node = target_node
7385 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7386 self.needed_locks[locking.LEVEL_NODE_RES] = []
7387 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7389 def DeclareLocks(self, level):
7390 if level == locking.LEVEL_NODE:
7391 self._LockInstancesNodes(primary_only=True)
7392 elif level == locking.LEVEL_NODE_RES:
7394 self.needed_locks[locking.LEVEL_NODE_RES] = \
7395 self.needed_locks[locking.LEVEL_NODE][:]
7397 def BuildHooksEnv(self):
7400 This runs on master, primary and secondary nodes of the instance.
7404 "TARGET_NODE": self.op.target_node,
7405 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7407 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7410 def BuildHooksNodes(self):
7411 """Build hooks nodes.
7415 self.cfg.GetMasterNode(),
7416 self.instance.primary_node,
7417 self.op.target_node,
7421 def CheckPrereq(self):
7422 """Check prerequisites.
7424 This checks that the instance is in the cluster.
7427 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7428 assert self.instance is not None, \
7429 "Cannot retrieve locked instance %s" % self.op.instance_name
7431 node = self.cfg.GetNodeInfo(self.op.target_node)
7432 assert node is not None, \
7433 "Cannot retrieve locked node %s" % self.op.target_node
7435 self.target_node = target_node = node.name
7437 if target_node == instance.primary_node:
7438 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7439 (instance.name, target_node),
7442 bep = self.cfg.GetClusterInfo().FillBE(instance)
7444 for idx, dsk in enumerate(instance.disks):
7445 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7446 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7447 " cannot copy" % idx, errors.ECODE_STATE)
7449 _CheckNodeOnline(self, target_node)
7450 _CheckNodeNotDrained(self, target_node)
7451 _CheckNodeVmCapable(self, target_node)
7453 if instance.admin_state == constants.ADMINST_UP:
7454 # check memory requirements on the secondary node
7455 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7456 instance.name, bep[constants.BE_MAXMEM],
7457 instance.hypervisor)
7459 self.LogInfo("Not checking memory on the secondary node as"
7460 " instance will not be started")
7462 # check bridge existance
7463 _CheckInstanceBridgesExist(self, instance, node=target_node)
7465 def Exec(self, feedback_fn):
7466 """Move an instance.
7468 The move is done by shutting it down on its present node, copying
7469 the data over (slow) and starting it on the new node.
7472 instance = self.instance
7474 source_node = instance.primary_node
7475 target_node = self.target_node
7477 self.LogInfo("Shutting down instance %s on source node %s",
7478 instance.name, source_node)
7480 assert (self.owned_locks(locking.LEVEL_NODE) ==
7481 self.owned_locks(locking.LEVEL_NODE_RES))
7483 result = self.rpc.call_instance_shutdown(source_node, instance,
7484 self.op.shutdown_timeout)
7485 msg = result.fail_msg
7487 if self.op.ignore_consistency:
7488 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7489 " Proceeding anyway. Please make sure node"
7490 " %s is down. Error details: %s",
7491 instance.name, source_node, source_node, msg)
7493 raise errors.OpExecError("Could not shutdown instance %s on"
7495 (instance.name, source_node, msg))
7497 # create the target disks
7499 _CreateDisks(self, instance, target_node=target_node)
7500 except errors.OpExecError:
7501 self.LogWarning("Device creation failed, reverting...")
7503 _RemoveDisks(self, instance, target_node=target_node)
7505 self.cfg.ReleaseDRBDMinors(instance.name)
7508 cluster_name = self.cfg.GetClusterInfo().cluster_name
7511 # activate, get path, copy the data over
7512 for idx, disk in enumerate(instance.disks):
7513 self.LogInfo("Copying data for disk %d", idx)
7514 result = self.rpc.call_blockdev_assemble(target_node, disk,
7515 instance.name, True, idx)
7517 self.LogWarning("Can't assemble newly created disk %d: %s",
7518 idx, result.fail_msg)
7519 errs.append(result.fail_msg)
7521 dev_path = result.payload
7522 result = self.rpc.call_blockdev_export(source_node, disk,
7523 target_node, dev_path,
7526 self.LogWarning("Can't copy data over for disk %d: %s",
7527 idx, result.fail_msg)
7528 errs.append(result.fail_msg)
7532 self.LogWarning("Some disks failed to copy, aborting")
7534 _RemoveDisks(self, instance, target_node=target_node)
7536 self.cfg.ReleaseDRBDMinors(instance.name)
7537 raise errors.OpExecError("Errors during disk copy: %s" %
7540 instance.primary_node = target_node
7541 self.cfg.Update(instance, feedback_fn)
7543 self.LogInfo("Removing the disks on the original node")
7544 _RemoveDisks(self, instance, target_node=source_node)
7546 # Only start the instance if it's marked as up
7547 if instance.admin_state == constants.ADMINST_UP:
7548 self.LogInfo("Starting instance %s on node %s",
7549 instance.name, target_node)
7551 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7552 ignore_secondaries=True)
7554 _ShutdownInstanceDisks(self, instance)
7555 raise errors.OpExecError("Can't activate the instance's disks")
7557 result = self.rpc.call_instance_start(target_node,
7558 (instance, None, None), False)
7559 msg = result.fail_msg
7561 _ShutdownInstanceDisks(self, instance)
7562 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7563 (instance.name, target_node, msg))
7566 class LUNodeMigrate(LogicalUnit):
7567 """Migrate all instances from a node.
7570 HPATH = "node-migrate"
7571 HTYPE = constants.HTYPE_NODE
7574 def CheckArguments(self):
7577 def ExpandNames(self):
7578 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7580 self.share_locks = _ShareAll()
7581 self.needed_locks = {
7582 locking.LEVEL_NODE: [self.op.node_name],
7585 def BuildHooksEnv(self):
7588 This runs on the master, the primary and all the secondaries.
7592 "NODE_NAME": self.op.node_name,
7595 def BuildHooksNodes(self):
7596 """Build hooks nodes.
7599 nl = [self.cfg.GetMasterNode()]
7602 def CheckPrereq(self):
7605 def Exec(self, feedback_fn):
7606 # Prepare jobs for migration instances
7608 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7611 iallocator=self.op.iallocator,
7612 target_node=self.op.target_node)]
7613 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7616 # TODO: Run iallocator in this opcode and pass correct placement options to
7617 # OpInstanceMigrate. Since other jobs can modify the cluster between
7618 # running the iallocator and the actual migration, a good consistency model
7619 # will have to be found.
7621 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7622 frozenset([self.op.node_name]))
7624 return ResultWithJobs(jobs)
7627 class TLMigrateInstance(Tasklet):
7628 """Tasklet class for instance migration.
7631 @ivar live: whether the migration will be done live or non-live;
7632 this variable is initalized only after CheckPrereq has run
7633 @type cleanup: boolean
7634 @ivar cleanup: Wheater we cleanup from a failed migration
7635 @type iallocator: string
7636 @ivar iallocator: The iallocator used to determine target_node
7637 @type target_node: string
7638 @ivar target_node: If given, the target_node to reallocate the instance to
7639 @type failover: boolean
7640 @ivar failover: Whether operation results in failover or migration
7641 @type fallback: boolean
7642 @ivar fallback: Whether fallback to failover is allowed if migration not
7644 @type ignore_consistency: boolean
7645 @ivar ignore_consistency: Wheter we should ignore consistency between source
7647 @type shutdown_timeout: int
7648 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7649 @type ignore_ipolicy: bool
7650 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7655 _MIGRATION_POLL_INTERVAL = 1 # seconds
7656 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7658 def __init__(self, lu, instance_name, cleanup=False,
7659 failover=False, fallback=False,
7660 ignore_consistency=False,
7661 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7662 ignore_ipolicy=False):
7663 """Initializes this class.
7666 Tasklet.__init__(self, lu)
7669 self.instance_name = instance_name
7670 self.cleanup = cleanup
7671 self.live = False # will be overridden later
7672 self.failover = failover
7673 self.fallback = fallback
7674 self.ignore_consistency = ignore_consistency
7675 self.shutdown_timeout = shutdown_timeout
7676 self.ignore_ipolicy = ignore_ipolicy
7678 def CheckPrereq(self):
7679 """Check prerequisites.
7681 This checks that the instance is in the cluster.
7684 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7685 instance = self.cfg.GetInstanceInfo(instance_name)
7686 assert instance is not None
7687 self.instance = instance
7688 cluster = self.cfg.GetClusterInfo()
7690 if (not self.cleanup and
7691 not instance.admin_state == constants.ADMINST_UP and
7692 not self.failover and self.fallback):
7693 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7694 " switching to failover")
7695 self.failover = True
7697 if instance.disk_template not in constants.DTS_MIRRORED:
7702 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7703 " %s" % (instance.disk_template, text),
7706 if instance.disk_template in constants.DTS_EXT_MIRROR:
7707 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7709 if self.lu.op.iallocator:
7710 self._RunAllocator()
7712 # We set set self.target_node as it is required by
7714 self.target_node = self.lu.op.target_node
7716 # Check that the target node is correct in terms of instance policy
7717 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7718 ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7719 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7720 ignore=self.ignore_ipolicy)
7722 # self.target_node is already populated, either directly or by the
7724 target_node = self.target_node
7725 if self.target_node == instance.primary_node:
7726 raise errors.OpPrereqError("Cannot migrate instance %s"
7727 " to its primary (%s)" %
7728 (instance.name, instance.primary_node))
7730 if len(self.lu.tasklets) == 1:
7731 # It is safe to release locks only when we're the only tasklet
7733 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7734 keep=[instance.primary_node, self.target_node])
7737 secondary_nodes = instance.secondary_nodes
7738 if not secondary_nodes:
7739 raise errors.ConfigurationError("No secondary node but using"
7740 " %s disk template" %
7741 instance.disk_template)
7742 target_node = secondary_nodes[0]
7743 if self.lu.op.iallocator or (self.lu.op.target_node and
7744 self.lu.op.target_node != target_node):
7746 text = "failed over"
7749 raise errors.OpPrereqError("Instances with disk template %s cannot"
7750 " be %s to arbitrary nodes"
7751 " (neither an iallocator nor a target"
7752 " node can be passed)" %
7753 (instance.disk_template, text),
7755 nodeinfo = self.cfg.GetNodeInfo(target_node)
7756 ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7757 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7758 ignore=self.ignore_ipolicy)
7760 i_be = cluster.FillBE(instance)
7762 # check memory requirements on the secondary node
7763 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7764 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7765 instance.name, i_be[constants.BE_MAXMEM],
7766 instance.hypervisor)
7768 self.lu.LogInfo("Not checking memory on the secondary node as"
7769 " instance will not be started")
7771 # check if failover must be forced instead of migration
7772 if (not self.cleanup and not self.failover and
7773 i_be[constants.BE_ALWAYS_FAILOVER]):
7775 self.lu.LogInfo("Instance configured to always failover; fallback"
7777 self.failover = True
7779 raise errors.OpPrereqError("This instance has been configured to"
7780 " always failover, please allow failover",
7783 # check bridge existance
7784 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7786 if not self.cleanup:
7787 _CheckNodeNotDrained(self.lu, target_node)
7788 if not self.failover:
7789 result = self.rpc.call_instance_migratable(instance.primary_node,
7791 if result.fail_msg and self.fallback:
7792 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7794 self.failover = True
7796 result.Raise("Can't migrate, please use failover",
7797 prereq=True, ecode=errors.ECODE_STATE)
7799 assert not (self.failover and self.cleanup)
7801 if not self.failover:
7802 if self.lu.op.live is not None and self.lu.op.mode is not None:
7803 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7804 " parameters are accepted",
7806 if self.lu.op.live is not None:
7808 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7810 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7811 # reset the 'live' parameter to None so that repeated
7812 # invocations of CheckPrereq do not raise an exception
7813 self.lu.op.live = None
7814 elif self.lu.op.mode is None:
7815 # read the default value from the hypervisor
7816 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7817 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7819 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7821 # Failover is never live
7824 def _RunAllocator(self):
7825 """Run the allocator based on input opcode.
7828 # FIXME: add a self.ignore_ipolicy option
7829 ial = IAllocator(self.cfg, self.rpc,
7830 mode=constants.IALLOCATOR_MODE_RELOC,
7831 name=self.instance_name,
7832 # TODO See why hail breaks with a single node below
7833 relocate_from=[self.instance.primary_node,
7834 self.instance.primary_node],
7837 ial.Run(self.lu.op.iallocator)
7840 raise errors.OpPrereqError("Can't compute nodes using"
7841 " iallocator '%s': %s" %
7842 (self.lu.op.iallocator, ial.info),
7844 if len(ial.result) != ial.required_nodes:
7845 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7846 " of nodes (%s), required %s" %
7847 (self.lu.op.iallocator, len(ial.result),
7848 ial.required_nodes), errors.ECODE_FAULT)
7849 self.target_node = ial.result[0]
7850 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7851 self.instance_name, self.lu.op.iallocator,
7852 utils.CommaJoin(ial.result))
7854 def _WaitUntilSync(self):
7855 """Poll with custom rpc for disk sync.
7857 This uses our own step-based rpc call.
7860 self.feedback_fn("* wait until resync is done")
7864 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7866 self.instance.disks)
7868 for node, nres in result.items():
7869 nres.Raise("Cannot resync disks on node %s" % node)
7870 node_done, node_percent = nres.payload
7871 all_done = all_done and node_done
7872 if node_percent is not None:
7873 min_percent = min(min_percent, node_percent)
7875 if min_percent < 100:
7876 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7879 def _EnsureSecondary(self, node):
7880 """Demote a node to secondary.
7883 self.feedback_fn("* switching node %s to secondary mode" % node)
7885 for dev in self.instance.disks:
7886 self.cfg.SetDiskID(dev, node)
7888 result = self.rpc.call_blockdev_close(node, self.instance.name,
7889 self.instance.disks)
7890 result.Raise("Cannot change disk to secondary on node %s" % node)
7892 def _GoStandalone(self):
7893 """Disconnect from the network.
7896 self.feedback_fn("* changing into standalone mode")
7897 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7898 self.instance.disks)
7899 for node, nres in result.items():
7900 nres.Raise("Cannot disconnect disks node %s" % node)
7902 def _GoReconnect(self, multimaster):
7903 """Reconnect to the network.
7909 msg = "single-master"
7910 self.feedback_fn("* changing disks into %s mode" % msg)
7911 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7912 self.instance.disks,
7913 self.instance.name, multimaster)
7914 for node, nres in result.items():
7915 nres.Raise("Cannot change disks config on node %s" % node)
7917 def _ExecCleanup(self):
7918 """Try to cleanup after a failed migration.
7920 The cleanup is done by:
7921 - check that the instance is running only on one node
7922 (and update the config if needed)
7923 - change disks on its secondary node to secondary
7924 - wait until disks are fully synchronized
7925 - disconnect from the network
7926 - change disks into single-master mode
7927 - wait again until disks are fully synchronized
7930 instance = self.instance
7931 target_node = self.target_node
7932 source_node = self.source_node
7934 # check running on only one node
7935 self.feedback_fn("* checking where the instance actually runs"
7936 " (if this hangs, the hypervisor might be in"
7938 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7939 for node, result in ins_l.items():
7940 result.Raise("Can't contact node %s" % node)
7942 runningon_source = instance.name in ins_l[source_node].payload
7943 runningon_target = instance.name in ins_l[target_node].payload
7945 if runningon_source and runningon_target:
7946 raise errors.OpExecError("Instance seems to be running on two nodes,"
7947 " or the hypervisor is confused; you will have"
7948 " to ensure manually that it runs only on one"
7949 " and restart this operation")
7951 if not (runningon_source or runningon_target):
7952 raise errors.OpExecError("Instance does not seem to be running at all;"
7953 " in this case it's safer to repair by"
7954 " running 'gnt-instance stop' to ensure disk"
7955 " shutdown, and then restarting it")
7957 if runningon_target:
7958 # the migration has actually succeeded, we need to update the config
7959 self.feedback_fn("* instance running on secondary node (%s),"
7960 " updating config" % target_node)
7961 instance.primary_node = target_node
7962 self.cfg.Update(instance, self.feedback_fn)
7963 demoted_node = source_node
7965 self.feedback_fn("* instance confirmed to be running on its"
7966 " primary node (%s)" % source_node)
7967 demoted_node = target_node
7969 if instance.disk_template in constants.DTS_INT_MIRROR:
7970 self._EnsureSecondary(demoted_node)
7972 self._WaitUntilSync()
7973 except errors.OpExecError:
7974 # we ignore here errors, since if the device is standalone, it
7975 # won't be able to sync
7977 self._GoStandalone()
7978 self._GoReconnect(False)
7979 self._WaitUntilSync()
7981 self.feedback_fn("* done")
7983 def _RevertDiskStatus(self):
7984 """Try to revert the disk status after a failed migration.
7987 target_node = self.target_node
7988 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7992 self._EnsureSecondary(target_node)
7993 self._GoStandalone()
7994 self._GoReconnect(False)
7995 self._WaitUntilSync()
7996 except errors.OpExecError, err:
7997 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7998 " please try to recover the instance manually;"
7999 " error '%s'" % str(err))
8001 def _AbortMigration(self):
8002 """Call the hypervisor code to abort a started migration.
8005 instance = self.instance
8006 target_node = self.target_node
8007 source_node = self.source_node
8008 migration_info = self.migration_info
8010 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8014 abort_msg = abort_result.fail_msg
8016 logging.error("Aborting migration failed on target node %s: %s",
8017 target_node, abort_msg)
8018 # Don't raise an exception here, as we stil have to try to revert the
8019 # disk status, even if this step failed.
8021 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8022 instance, False, self.live)
8023 abort_msg = abort_result.fail_msg
8025 logging.error("Aborting migration failed on source node %s: %s",
8026 source_node, abort_msg)
8028 def _ExecMigration(self):
8029 """Migrate an instance.
8031 The migrate is done by:
8032 - change the disks into dual-master mode
8033 - wait until disks are fully synchronized again
8034 - migrate the instance
8035 - change disks on the new secondary node (the old primary) to secondary
8036 - wait until disks are fully synchronized
8037 - change disks into single-master mode
8040 instance = self.instance
8041 target_node = self.target_node
8042 source_node = self.source_node
8044 # Check for hypervisor version mismatch and warn the user.
8045 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8046 None, [self.instance.hypervisor])
8047 for ninfo in nodeinfo.values():
8048 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8050 (_, _, (src_info, )) = nodeinfo[source_node].payload
8051 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8053 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8054 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8055 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8056 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8057 if src_version != dst_version:
8058 self.feedback_fn("* warning: hypervisor version mismatch between"
8059 " source (%s) and target (%s) node" %
8060 (src_version, dst_version))
8062 self.feedback_fn("* checking disk consistency between source and target")
8063 for dev in instance.disks:
8064 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8065 raise errors.OpExecError("Disk %s is degraded or not fully"
8066 " synchronized on target node,"
8067 " aborting migration" % dev.iv_name)
8069 # First get the migration information from the remote node
8070 result = self.rpc.call_migration_info(source_node, instance)
8071 msg = result.fail_msg
8073 log_err = ("Failed fetching source migration information from %s: %s" %
8075 logging.error(log_err)
8076 raise errors.OpExecError(log_err)
8078 self.migration_info = migration_info = result.payload
8080 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8081 # Then switch the disks to master/master mode
8082 self._EnsureSecondary(target_node)
8083 self._GoStandalone()
8084 self._GoReconnect(True)
8085 self._WaitUntilSync()
8087 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8088 result = self.rpc.call_accept_instance(target_node,
8091 self.nodes_ip[target_node])
8093 msg = result.fail_msg
8095 logging.error("Instance pre-migration failed, trying to revert"
8096 " disk status: %s", msg)
8097 self.feedback_fn("Pre-migration failed, aborting")
8098 self._AbortMigration()
8099 self._RevertDiskStatus()
8100 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8101 (instance.name, msg))
8103 self.feedback_fn("* migrating instance to %s" % target_node)
8104 result = self.rpc.call_instance_migrate(source_node, instance,
8105 self.nodes_ip[target_node],
8107 msg = result.fail_msg
8109 logging.error("Instance migration failed, trying to revert"
8110 " disk status: %s", msg)
8111 self.feedback_fn("Migration failed, aborting")
8112 self._AbortMigration()
8113 self._RevertDiskStatus()
8114 raise errors.OpExecError("Could not migrate instance %s: %s" %
8115 (instance.name, msg))
8117 self.feedback_fn("* starting memory transfer")
8118 last_feedback = time.time()
8120 result = self.rpc.call_instance_get_migration_status(source_node,
8122 msg = result.fail_msg
8123 ms = result.payload # MigrationStatus instance
8124 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8125 logging.error("Instance migration failed, trying to revert"
8126 " disk status: %s", msg)
8127 self.feedback_fn("Migration failed, aborting")
8128 self._AbortMigration()
8129 self._RevertDiskStatus()
8130 raise errors.OpExecError("Could not migrate instance %s: %s" %
8131 (instance.name, msg))
8133 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8134 self.feedback_fn("* memory transfer complete")
8137 if (utils.TimeoutExpired(last_feedback,
8138 self._MIGRATION_FEEDBACK_INTERVAL) and
8139 ms.transferred_ram is not None):
8140 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8141 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8142 last_feedback = time.time()
8144 time.sleep(self._MIGRATION_POLL_INTERVAL)
8146 result = self.rpc.call_instance_finalize_migration_src(source_node,
8150 msg = result.fail_msg
8152 logging.error("Instance migration succeeded, but finalization failed"
8153 " on the source node: %s", msg)
8154 raise errors.OpExecError("Could not finalize instance migration: %s" %
8157 instance.primary_node = target_node
8159 # distribute new instance config to the other nodes
8160 self.cfg.Update(instance, self.feedback_fn)
8162 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8166 msg = result.fail_msg
8168 logging.error("Instance migration succeeded, but finalization failed"
8169 " on the target node: %s", msg)
8170 raise errors.OpExecError("Could not finalize instance migration: %s" %
8173 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8174 self._EnsureSecondary(source_node)
8175 self._WaitUntilSync()
8176 self._GoStandalone()
8177 self._GoReconnect(False)
8178 self._WaitUntilSync()
8180 self.feedback_fn("* done")
8182 def _ExecFailover(self):
8183 """Failover an instance.
8185 The failover is done by shutting it down on its present node and
8186 starting it on the secondary.
8189 instance = self.instance
8190 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8192 source_node = instance.primary_node
8193 target_node = self.target_node
8195 if instance.admin_state == constants.ADMINST_UP:
8196 self.feedback_fn("* checking disk consistency between source and target")
8197 for dev in instance.disks:
8198 # for drbd, these are drbd over lvm
8199 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8200 if primary_node.offline:
8201 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8203 (primary_node.name, dev.iv_name, target_node))
8204 elif not self.ignore_consistency:
8205 raise errors.OpExecError("Disk %s is degraded on target node,"
8206 " aborting failover" % dev.iv_name)
8208 self.feedback_fn("* not checking disk consistency as instance is not"
8211 self.feedback_fn("* shutting down instance on source node")
8212 logging.info("Shutting down instance %s on node %s",
8213 instance.name, source_node)
8215 result = self.rpc.call_instance_shutdown(source_node, instance,
8216 self.shutdown_timeout)
8217 msg = result.fail_msg
8219 if self.ignore_consistency or primary_node.offline:
8220 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8221 " proceeding anyway; please make sure node"
8222 " %s is down; error details: %s",
8223 instance.name, source_node, source_node, msg)
8225 raise errors.OpExecError("Could not shutdown instance %s on"
8227 (instance.name, source_node, msg))
8229 self.feedback_fn("* deactivating the instance's disks on source node")
8230 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8231 raise errors.OpExecError("Can't shut down the instance's disks")
8233 instance.primary_node = target_node
8234 # distribute new instance config to the other nodes
8235 self.cfg.Update(instance, self.feedback_fn)
8237 # Only start the instance if it's marked as up
8238 if instance.admin_state == constants.ADMINST_UP:
8239 self.feedback_fn("* activating the instance's disks on target node %s" %
8241 logging.info("Starting instance %s on node %s",
8242 instance.name, target_node)
8244 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8245 ignore_secondaries=True)
8247 _ShutdownInstanceDisks(self.lu, instance)
8248 raise errors.OpExecError("Can't activate the instance's disks")
8250 self.feedback_fn("* starting the instance on the target node %s" %
8252 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8254 msg = result.fail_msg
8256 _ShutdownInstanceDisks(self.lu, instance)
8257 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8258 (instance.name, target_node, msg))
8260 def Exec(self, feedback_fn):
8261 """Perform the migration.
8264 self.feedback_fn = feedback_fn
8265 self.source_node = self.instance.primary_node
8267 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8268 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8269 self.target_node = self.instance.secondary_nodes[0]
8270 # Otherwise self.target_node has been populated either
8271 # directly, or through an iallocator.
8273 self.all_nodes = [self.source_node, self.target_node]
8274 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8275 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8278 feedback_fn("Failover instance %s" % self.instance.name)
8279 self._ExecFailover()
8281 feedback_fn("Migrating instance %s" % self.instance.name)
8284 return self._ExecCleanup()
8286 return self._ExecMigration()
8289 def _CreateBlockDev(lu, node, instance, device, force_create,
8291 """Create a tree of block devices on a given node.
8293 If this device type has to be created on secondaries, create it and
8296 If not, just recurse to children keeping the same 'force' value.
8298 @param lu: the lu on whose behalf we execute
8299 @param node: the node on which to create the device
8300 @type instance: L{objects.Instance}
8301 @param instance: the instance which owns the device
8302 @type device: L{objects.Disk}
8303 @param device: the device to create
8304 @type force_create: boolean
8305 @param force_create: whether to force creation of this device; this
8306 will be change to True whenever we find a device which has
8307 CreateOnSecondary() attribute
8308 @param info: the extra 'metadata' we should attach to the device
8309 (this will be represented as a LVM tag)
8310 @type force_open: boolean
8311 @param force_open: this parameter will be passes to the
8312 L{backend.BlockdevCreate} function where it specifies
8313 whether we run on primary or not, and it affects both
8314 the child assembly and the device own Open() execution
8317 if device.CreateOnSecondary():
8321 for child in device.children:
8322 _CreateBlockDev(lu, node, instance, child, force_create,
8325 if not force_create:
8328 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8331 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8332 """Create a single block device on a given node.
8334 This will not recurse over children of the device, so they must be
8337 @param lu: the lu on whose behalf we execute
8338 @param node: the node on which to create the device
8339 @type instance: L{objects.Instance}
8340 @param instance: the instance which owns the device
8341 @type device: L{objects.Disk}
8342 @param device: the device to create
8343 @param info: the extra 'metadata' we should attach to the device
8344 (this will be represented as a LVM tag)
8345 @type force_open: boolean
8346 @param force_open: this parameter will be passes to the
8347 L{backend.BlockdevCreate} function where it specifies
8348 whether we run on primary or not, and it affects both
8349 the child assembly and the device own Open() execution
8352 lu.cfg.SetDiskID(device, node)
8353 result = lu.rpc.call_blockdev_create(node, device, device.size,
8354 instance.name, force_open, info)
8355 result.Raise("Can't create block device %s on"
8356 " node %s for instance %s" % (device, node, instance.name))
8357 if device.physical_id is None:
8358 device.physical_id = result.payload
8361 def _GenerateUniqueNames(lu, exts):
8362 """Generate a suitable LV name.
8364 This will generate a logical volume name for the given instance.
8369 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8370 results.append("%s%s" % (new_id, val))
8374 def _ComputeLDParams(disk_template, disk_params):
8375 """Computes Logical Disk parameters from Disk Template parameters.
8377 @type disk_template: string
8378 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8379 @type disk_params: dict
8380 @param disk_params: disk template parameters; dict(template_name -> parameters
8382 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8383 contains the LD parameters of the node. The tree is flattened in-order.
8386 if disk_template not in constants.DISK_TEMPLATES:
8387 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8390 dt_params = disk_params[disk_template]
8391 if disk_template == constants.DT_DRBD8:
8393 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8394 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8395 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8396 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8397 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8398 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8399 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8400 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8401 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8402 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8403 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8404 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8408 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8411 result.append(drbd_params)
8415 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8418 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8420 result.append(data_params)
8424 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8427 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8429 result.append(meta_params)
8431 elif (disk_template == constants.DT_FILE or
8432 disk_template == constants.DT_SHARED_FILE):
8433 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8435 elif disk_template == constants.DT_PLAIN:
8437 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8440 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8442 result.append(params)
8444 elif disk_template == constants.DT_BLOCK:
8445 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8450 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8451 iv_name, p_minor, s_minor, drbd_params, data_params,
8453 """Generate a drbd8 device complete with its children.
8456 assert len(vgnames) == len(names) == 2
8457 port = lu.cfg.AllocatePort()
8458 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8460 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8461 logical_id=(vgnames[0], names[0]),
8463 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8464 logical_id=(vgnames[1], names[1]),
8466 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8467 logical_id=(primary, secondary, port,
8470 children=[dev_data, dev_meta],
8471 iv_name=iv_name, params=drbd_params)
8475 def _GenerateDiskTemplate(lu, template_name,
8476 instance_name, primary_node,
8477 secondary_nodes, disk_info,
8478 file_storage_dir, file_driver,
8479 base_index, feedback_fn, disk_params):
8480 """Generate the entire disk layout for a given template type.
8483 #TODO: compute space requirements
8485 vgname = lu.cfg.GetVGName()
8486 disk_count = len(disk_info)
8488 ld_params = _ComputeLDParams(template_name, disk_params)
8489 if template_name == constants.DT_DISKLESS:
8491 elif template_name == constants.DT_PLAIN:
8492 if len(secondary_nodes) != 0:
8493 raise errors.ProgrammerError("Wrong template configuration")
8495 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8496 for i in range(disk_count)])
8497 for idx, disk in enumerate(disk_info):
8498 disk_index = idx + base_index
8499 vg = disk.get(constants.IDISK_VG, vgname)
8500 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8501 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8502 size=disk[constants.IDISK_SIZE],
8503 logical_id=(vg, names[idx]),
8504 iv_name="disk/%d" % disk_index,
8505 mode=disk[constants.IDISK_MODE],
8506 params=ld_params[0])
8507 disks.append(disk_dev)
8508 elif template_name == constants.DT_DRBD8:
8509 drbd_params, data_params, meta_params = ld_params
8510 if len(secondary_nodes) != 1:
8511 raise errors.ProgrammerError("Wrong template configuration")
8512 remote_node = secondary_nodes[0]
8513 minors = lu.cfg.AllocateDRBDMinor(
8514 [primary_node, remote_node] * len(disk_info), instance_name)
8517 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8518 for i in range(disk_count)]):
8519 names.append(lv_prefix + "_data")
8520 names.append(lv_prefix + "_meta")
8521 for idx, disk in enumerate(disk_info):
8522 disk_index = idx + base_index
8523 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8524 data_vg = disk.get(constants.IDISK_VG, vgname)
8525 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8526 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8527 disk[constants.IDISK_SIZE],
8529 names[idx * 2:idx * 2 + 2],
8530 "disk/%d" % disk_index,
8531 minors[idx * 2], minors[idx * 2 + 1],
8532 drbd_params, data_params, meta_params)
8533 disk_dev.mode = disk[constants.IDISK_MODE]
8534 disks.append(disk_dev)
8535 elif template_name == constants.DT_FILE:
8536 if len(secondary_nodes) != 0:
8537 raise errors.ProgrammerError("Wrong template configuration")
8539 opcodes.RequireFileStorage()
8541 for idx, disk in enumerate(disk_info):
8542 disk_index = idx + base_index
8543 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8544 size=disk[constants.IDISK_SIZE],
8545 iv_name="disk/%d" % disk_index,
8546 logical_id=(file_driver,
8547 "%s/disk%d" % (file_storage_dir,
8549 mode=disk[constants.IDISK_MODE],
8550 params=ld_params[0])
8551 disks.append(disk_dev)
8552 elif template_name == constants.DT_SHARED_FILE:
8553 if len(secondary_nodes) != 0:
8554 raise errors.ProgrammerError("Wrong template configuration")
8556 opcodes.RequireSharedFileStorage()
8558 for idx, disk in enumerate(disk_info):
8559 disk_index = idx + base_index
8560 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8561 size=disk[constants.IDISK_SIZE],
8562 iv_name="disk/%d" % disk_index,
8563 logical_id=(file_driver,
8564 "%s/disk%d" % (file_storage_dir,
8566 mode=disk[constants.IDISK_MODE],
8567 params=ld_params[0])
8568 disks.append(disk_dev)
8569 elif template_name == constants.DT_BLOCK:
8570 if len(secondary_nodes) != 0:
8571 raise errors.ProgrammerError("Wrong template configuration")
8573 for idx, disk in enumerate(disk_info):
8574 disk_index = idx + base_index
8575 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8576 size=disk[constants.IDISK_SIZE],
8577 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8578 disk[constants.IDISK_ADOPT]),
8579 iv_name="disk/%d" % disk_index,
8580 mode=disk[constants.IDISK_MODE],
8581 params=ld_params[0])
8582 disks.append(disk_dev)
8585 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8589 def _GetInstanceInfoText(instance):
8590 """Compute that text that should be added to the disk's metadata.
8593 return "originstname+%s" % instance.name
8596 def _CalcEta(time_taken, written, total_size):
8597 """Calculates the ETA based on size written and total size.
8599 @param time_taken: The time taken so far
8600 @param written: amount written so far
8601 @param total_size: The total size of data to be written
8602 @return: The remaining time in seconds
8605 avg_time = time_taken / float(written)
8606 return (total_size - written) * avg_time
8609 def _WipeDisks(lu, instance):
8610 """Wipes instance disks.
8612 @type lu: L{LogicalUnit}
8613 @param lu: the logical unit on whose behalf we execute
8614 @type instance: L{objects.Instance}
8615 @param instance: the instance whose disks we should create
8616 @return: the success of the wipe
8619 node = instance.primary_node
8621 for device in instance.disks:
8622 lu.cfg.SetDiskID(device, node)
8624 logging.info("Pause sync of instance %s disks", instance.name)
8625 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8627 for idx, success in enumerate(result.payload):
8629 logging.warn("pause-sync of instance %s for disks %d failed",
8633 for idx, device in enumerate(instance.disks):
8634 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8635 # MAX_WIPE_CHUNK at max
8636 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8637 constants.MIN_WIPE_CHUNK_PERCENT)
8638 # we _must_ make this an int, otherwise rounding errors will
8640 wipe_chunk_size = int(wipe_chunk_size)
8642 lu.LogInfo("* Wiping disk %d", idx)
8643 logging.info("Wiping disk %d for instance %s, node %s using"
8644 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8649 start_time = time.time()
8651 while offset < size:
8652 wipe_size = min(wipe_chunk_size, size - offset)
8653 logging.debug("Wiping disk %d, offset %s, chunk %s",
8654 idx, offset, wipe_size)
8655 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8656 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8657 (idx, offset, wipe_size))
8660 if now - last_output >= 60:
8661 eta = _CalcEta(now - start_time, offset, size)
8662 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8663 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8666 logging.info("Resume sync of instance %s disks", instance.name)
8668 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8670 for idx, success in enumerate(result.payload):
8672 lu.LogWarning("Resume sync of disk %d failed, please have a"
8673 " look at the status and troubleshoot the issue", idx)
8674 logging.warn("resume-sync of instance %s for disks %d failed",
8678 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8679 """Create all disks for an instance.
8681 This abstracts away some work from AddInstance.
8683 @type lu: L{LogicalUnit}
8684 @param lu: the logical unit on whose behalf we execute
8685 @type instance: L{objects.Instance}
8686 @param instance: the instance whose disks we should create
8688 @param to_skip: list of indices to skip
8689 @type target_node: string
8690 @param target_node: if passed, overrides the target node for creation
8692 @return: the success of the creation
8695 info = _GetInstanceInfoText(instance)
8696 if target_node is None:
8697 pnode = instance.primary_node
8698 all_nodes = instance.all_nodes
8703 if instance.disk_template in constants.DTS_FILEBASED:
8704 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8705 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8707 result.Raise("Failed to create directory '%s' on"
8708 " node %s" % (file_storage_dir, pnode))
8710 # Note: this needs to be kept in sync with adding of disks in
8711 # LUInstanceSetParams
8712 for idx, device in enumerate(instance.disks):
8713 if to_skip and idx in to_skip:
8715 logging.info("Creating volume %s for instance %s",
8716 device.iv_name, instance.name)
8718 for node in all_nodes:
8719 f_create = node == pnode
8720 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8723 def _RemoveDisks(lu, instance, target_node=None):
8724 """Remove all disks for an instance.
8726 This abstracts away some work from `AddInstance()` and
8727 `RemoveInstance()`. Note that in case some of the devices couldn't
8728 be removed, the removal will continue with the other ones (compare
8729 with `_CreateDisks()`).
8731 @type lu: L{LogicalUnit}
8732 @param lu: the logical unit on whose behalf we execute
8733 @type instance: L{objects.Instance}
8734 @param instance: the instance whose disks we should remove
8735 @type target_node: string
8736 @param target_node: used to override the node on which to remove the disks
8738 @return: the success of the removal
8741 logging.info("Removing block devices for instance %s", instance.name)
8744 for device in instance.disks:
8746 edata = [(target_node, device)]
8748 edata = device.ComputeNodeTree(instance.primary_node)
8749 for node, disk in edata:
8750 lu.cfg.SetDiskID(disk, node)
8751 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8753 lu.LogWarning("Could not remove block device %s on node %s,"
8754 " continuing anyway: %s", device.iv_name, node, msg)
8757 # if this is a DRBD disk, return its port to the pool
8758 if device.dev_type in constants.LDS_DRBD:
8759 tcp_port = device.logical_id[2]
8760 lu.cfg.AddTcpUdpPort(tcp_port)
8762 if instance.disk_template == constants.DT_FILE:
8763 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8767 tgt = instance.primary_node
8768 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8770 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8771 file_storage_dir, instance.primary_node, result.fail_msg)
8777 def _ComputeDiskSizePerVG(disk_template, disks):
8778 """Compute disk size requirements in the volume group
8781 def _compute(disks, payload):
8782 """Universal algorithm.
8787 vgs[disk[constants.IDISK_VG]] = \
8788 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8792 # Required free disk space as a function of disk and swap space
8794 constants.DT_DISKLESS: {},
8795 constants.DT_PLAIN: _compute(disks, 0),
8796 # 128 MB are added for drbd metadata for each disk
8797 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8798 constants.DT_FILE: {},
8799 constants.DT_SHARED_FILE: {},
8802 if disk_template not in req_size_dict:
8803 raise errors.ProgrammerError("Disk template '%s' size requirement"
8804 " is unknown" % disk_template)
8806 return req_size_dict[disk_template]
8809 def _ComputeDiskSize(disk_template, disks):
8810 """Compute disk size requirements in the volume group
8813 # Required free disk space as a function of disk and swap space
8815 constants.DT_DISKLESS: None,
8816 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8817 # 128 MB are added for drbd metadata for each disk
8819 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8820 constants.DT_FILE: None,
8821 constants.DT_SHARED_FILE: 0,
8822 constants.DT_BLOCK: 0,
8825 if disk_template not in req_size_dict:
8826 raise errors.ProgrammerError("Disk template '%s' size requirement"
8827 " is unknown" % disk_template)
8829 return req_size_dict[disk_template]
8832 def _FilterVmNodes(lu, nodenames):
8833 """Filters out non-vm_capable nodes from a list.
8835 @type lu: L{LogicalUnit}
8836 @param lu: the logical unit for which we check
8837 @type nodenames: list
8838 @param nodenames: the list of nodes on which we should check
8840 @return: the list of vm-capable nodes
8843 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8844 return [name for name in nodenames if name not in vm_nodes]
8847 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8848 """Hypervisor parameter validation.
8850 This function abstract the hypervisor parameter validation to be
8851 used in both instance create and instance modify.
8853 @type lu: L{LogicalUnit}
8854 @param lu: the logical unit for which we check
8855 @type nodenames: list
8856 @param nodenames: the list of nodes on which we should check
8857 @type hvname: string
8858 @param hvname: the name of the hypervisor we should use
8859 @type hvparams: dict
8860 @param hvparams: the parameters which we need to check
8861 @raise errors.OpPrereqError: if the parameters are not valid
8864 nodenames = _FilterVmNodes(lu, nodenames)
8866 cluster = lu.cfg.GetClusterInfo()
8867 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8869 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8870 for node in nodenames:
8874 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8877 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8878 """OS parameters validation.
8880 @type lu: L{LogicalUnit}
8881 @param lu: the logical unit for which we check
8882 @type required: boolean
8883 @param required: whether the validation should fail if the OS is not
8885 @type nodenames: list
8886 @param nodenames: the list of nodes on which we should check
8887 @type osname: string
8888 @param osname: the name of the hypervisor we should use
8889 @type osparams: dict
8890 @param osparams: the parameters which we need to check
8891 @raise errors.OpPrereqError: if the parameters are not valid
8894 nodenames = _FilterVmNodes(lu, nodenames)
8895 result = lu.rpc.call_os_validate(nodenames, required, osname,
8896 [constants.OS_VALIDATE_PARAMETERS],
8898 for node, nres in result.items():
8899 # we don't check for offline cases since this should be run only
8900 # against the master node and/or an instance's nodes
8901 nres.Raise("OS Parameters validation failed on node %s" % node)
8902 if not nres.payload:
8903 lu.LogInfo("OS %s not found on node %s, validation skipped",
8907 class LUInstanceCreate(LogicalUnit):
8908 """Create an instance.
8911 HPATH = "instance-add"
8912 HTYPE = constants.HTYPE_INSTANCE
8915 def CheckArguments(self):
8919 # do not require name_check to ease forward/backward compatibility
8921 if self.op.no_install and self.op.start:
8922 self.LogInfo("No-installation mode selected, disabling startup")
8923 self.op.start = False
8924 # validate/normalize the instance name
8925 self.op.instance_name = \
8926 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8928 if self.op.ip_check and not self.op.name_check:
8929 # TODO: make the ip check more flexible and not depend on the name check
8930 raise errors.OpPrereqError("Cannot do IP address check without a name"
8931 " check", errors.ECODE_INVAL)
8933 # check nics' parameter names
8934 for nic in self.op.nics:
8935 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8937 # check disks. parameter names and consistent adopt/no-adopt strategy
8938 has_adopt = has_no_adopt = False
8939 for disk in self.op.disks:
8940 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8941 if constants.IDISK_ADOPT in disk:
8945 if has_adopt and has_no_adopt:
8946 raise errors.OpPrereqError("Either all disks are adopted or none is",
8949 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8950 raise errors.OpPrereqError("Disk adoption is not supported for the"
8951 " '%s' disk template" %
8952 self.op.disk_template,
8954 if self.op.iallocator is not None:
8955 raise errors.OpPrereqError("Disk adoption not allowed with an"
8956 " iallocator script", errors.ECODE_INVAL)
8957 if self.op.mode == constants.INSTANCE_IMPORT:
8958 raise errors.OpPrereqError("Disk adoption not allowed for"
8959 " instance import", errors.ECODE_INVAL)
8961 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8962 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8963 " but no 'adopt' parameter given" %
8964 self.op.disk_template,
8967 self.adopt_disks = has_adopt
8969 # instance name verification
8970 if self.op.name_check:
8971 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8972 self.op.instance_name = self.hostname1.name
8973 # used in CheckPrereq for ip ping check
8974 self.check_ip = self.hostname1.ip
8976 self.check_ip = None
8978 # file storage checks
8979 if (self.op.file_driver and
8980 not self.op.file_driver in constants.FILE_DRIVER):
8981 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8982 self.op.file_driver, errors.ECODE_INVAL)
8984 if self.op.disk_template == constants.DT_FILE:
8985 opcodes.RequireFileStorage()
8986 elif self.op.disk_template == constants.DT_SHARED_FILE:
8987 opcodes.RequireSharedFileStorage()
8989 ### Node/iallocator related checks
8990 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8992 if self.op.pnode is not None:
8993 if self.op.disk_template in constants.DTS_INT_MIRROR:
8994 if self.op.snode is None:
8995 raise errors.OpPrereqError("The networked disk templates need"
8996 " a mirror node", errors.ECODE_INVAL)
8998 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9000 self.op.snode = None
9002 self._cds = _GetClusterDomainSecret()
9004 if self.op.mode == constants.INSTANCE_IMPORT:
9005 # On import force_variant must be True, because if we forced it at
9006 # initial install, our only chance when importing it back is that it
9008 self.op.force_variant = True
9010 if self.op.no_install:
9011 self.LogInfo("No-installation mode has no effect during import")
9013 elif self.op.mode == constants.INSTANCE_CREATE:
9014 if self.op.os_type is None:
9015 raise errors.OpPrereqError("No guest OS specified",
9017 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9018 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9019 " installation" % self.op.os_type,
9021 if self.op.disk_template is None:
9022 raise errors.OpPrereqError("No disk template specified",
9025 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9026 # Check handshake to ensure both clusters have the same domain secret
9027 src_handshake = self.op.source_handshake
9028 if not src_handshake:
9029 raise errors.OpPrereqError("Missing source handshake",
9032 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9035 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9038 # Load and check source CA
9039 self.source_x509_ca_pem = self.op.source_x509_ca
9040 if not self.source_x509_ca_pem:
9041 raise errors.OpPrereqError("Missing source X509 CA",
9045 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9047 except OpenSSL.crypto.Error, err:
9048 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9049 (err, ), errors.ECODE_INVAL)
9051 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9052 if errcode is not None:
9053 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9056 self.source_x509_ca = cert
9058 src_instance_name = self.op.source_instance_name
9059 if not src_instance_name:
9060 raise errors.OpPrereqError("Missing source instance name",
9063 self.source_instance_name = \
9064 netutils.GetHostname(name=src_instance_name).name
9067 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9068 self.op.mode, errors.ECODE_INVAL)
9070 def ExpandNames(self):
9071 """ExpandNames for CreateInstance.
9073 Figure out the right locks for instance creation.
9076 self.needed_locks = {}
9078 instance_name = self.op.instance_name
9079 # this is just a preventive check, but someone might still add this
9080 # instance in the meantime, and creation will fail at lock-add time
9081 if instance_name in self.cfg.GetInstanceList():
9082 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9083 instance_name, errors.ECODE_EXISTS)
9085 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9087 if self.op.iallocator:
9088 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9089 # specifying a group on instance creation and then selecting nodes from
9091 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9092 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9094 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9095 nodelist = [self.op.pnode]
9096 if self.op.snode is not None:
9097 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9098 nodelist.append(self.op.snode)
9099 self.needed_locks[locking.LEVEL_NODE] = nodelist
9100 # Lock resources of instance's primary and secondary nodes (copy to
9101 # prevent accidential modification)
9102 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9104 # in case of import lock the source node too
9105 if self.op.mode == constants.INSTANCE_IMPORT:
9106 src_node = self.op.src_node
9107 src_path = self.op.src_path
9109 if src_path is None:
9110 self.op.src_path = src_path = self.op.instance_name
9112 if src_node is None:
9113 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9114 self.op.src_node = None
9115 if os.path.isabs(src_path):
9116 raise errors.OpPrereqError("Importing an instance from a path"
9117 " requires a source node option",
9120 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9121 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9122 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9123 if not os.path.isabs(src_path):
9124 self.op.src_path = src_path = \
9125 utils.PathJoin(constants.EXPORT_DIR, src_path)
9127 def _RunAllocator(self):
9128 """Run the allocator based on input opcode.
9131 nics = [n.ToDict() for n in self.nics]
9132 ial = IAllocator(self.cfg, self.rpc,
9133 mode=constants.IALLOCATOR_MODE_ALLOC,
9134 name=self.op.instance_name,
9135 disk_template=self.op.disk_template,
9138 vcpus=self.be_full[constants.BE_VCPUS],
9139 memory=self.be_full[constants.BE_MAXMEM],
9142 hypervisor=self.op.hypervisor,
9145 ial.Run(self.op.iallocator)
9148 raise errors.OpPrereqError("Can't compute nodes using"
9149 " iallocator '%s': %s" %
9150 (self.op.iallocator, ial.info),
9152 if len(ial.result) != ial.required_nodes:
9153 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9154 " of nodes (%s), required %s" %
9155 (self.op.iallocator, len(ial.result),
9156 ial.required_nodes), errors.ECODE_FAULT)
9157 self.op.pnode = ial.result[0]
9158 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9159 self.op.instance_name, self.op.iallocator,
9160 utils.CommaJoin(ial.result))
9161 if ial.required_nodes == 2:
9162 self.op.snode = ial.result[1]
9164 def BuildHooksEnv(self):
9167 This runs on master, primary and secondary nodes of the instance.
9171 "ADD_MODE": self.op.mode,
9173 if self.op.mode == constants.INSTANCE_IMPORT:
9174 env["SRC_NODE"] = self.op.src_node
9175 env["SRC_PATH"] = self.op.src_path
9176 env["SRC_IMAGES"] = self.src_images
9178 env.update(_BuildInstanceHookEnv(
9179 name=self.op.instance_name,
9180 primary_node=self.op.pnode,
9181 secondary_nodes=self.secondaries,
9182 status=self.op.start,
9183 os_type=self.op.os_type,
9184 minmem=self.be_full[constants.BE_MINMEM],
9185 maxmem=self.be_full[constants.BE_MAXMEM],
9186 vcpus=self.be_full[constants.BE_VCPUS],
9187 nics=_NICListToTuple(self, self.nics),
9188 disk_template=self.op.disk_template,
9189 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9190 for d in self.disks],
9193 hypervisor_name=self.op.hypervisor,
9199 def BuildHooksNodes(self):
9200 """Build hooks nodes.
9203 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9206 def _ReadExportInfo(self):
9207 """Reads the export information from disk.
9209 It will override the opcode source node and path with the actual
9210 information, if these two were not specified before.
9212 @return: the export information
9215 assert self.op.mode == constants.INSTANCE_IMPORT
9217 src_node = self.op.src_node
9218 src_path = self.op.src_path
9220 if src_node is None:
9221 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9222 exp_list = self.rpc.call_export_list(locked_nodes)
9224 for node in exp_list:
9225 if exp_list[node].fail_msg:
9227 if src_path in exp_list[node].payload:
9229 self.op.src_node = src_node = node
9230 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9234 raise errors.OpPrereqError("No export found for relative path %s" %
9235 src_path, errors.ECODE_INVAL)
9237 _CheckNodeOnline(self, src_node)
9238 result = self.rpc.call_export_info(src_node, src_path)
9239 result.Raise("No export or invalid export found in dir %s" % src_path)
9241 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9242 if not export_info.has_section(constants.INISECT_EXP):
9243 raise errors.ProgrammerError("Corrupted export config",
9244 errors.ECODE_ENVIRON)
9246 ei_version = export_info.get(constants.INISECT_EXP, "version")
9247 if (int(ei_version) != constants.EXPORT_VERSION):
9248 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9249 (ei_version, constants.EXPORT_VERSION),
9250 errors.ECODE_ENVIRON)
9253 def _ReadExportParams(self, einfo):
9254 """Use export parameters as defaults.
9256 In case the opcode doesn't specify (as in override) some instance
9257 parameters, then try to use them from the export information, if
9261 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9263 if self.op.disk_template is None:
9264 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9265 self.op.disk_template = einfo.get(constants.INISECT_INS,
9267 if self.op.disk_template not in constants.DISK_TEMPLATES:
9268 raise errors.OpPrereqError("Disk template specified in configuration"
9269 " file is not one of the allowed values:"
9270 " %s" % " ".join(constants.DISK_TEMPLATES))
9272 raise errors.OpPrereqError("No disk template specified and the export"
9273 " is missing the disk_template information",
9276 if not self.op.disks:
9278 # TODO: import the disk iv_name too
9279 for idx in range(constants.MAX_DISKS):
9280 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9281 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9282 disks.append({constants.IDISK_SIZE: disk_sz})
9283 self.op.disks = disks
9284 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9285 raise errors.OpPrereqError("No disk info specified and the export"
9286 " is missing the disk information",
9289 if not self.op.nics:
9291 for idx in range(constants.MAX_NICS):
9292 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9294 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9295 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9302 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9303 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9305 if (self.op.hypervisor is None and
9306 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9307 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9309 if einfo.has_section(constants.INISECT_HYP):
9310 # use the export parameters but do not override the ones
9311 # specified by the user
9312 for name, value in einfo.items(constants.INISECT_HYP):
9313 if name not in self.op.hvparams:
9314 self.op.hvparams[name] = value
9316 if einfo.has_section(constants.INISECT_BEP):
9317 # use the parameters, without overriding
9318 for name, value in einfo.items(constants.INISECT_BEP):
9319 if name not in self.op.beparams:
9320 self.op.beparams[name] = value
9321 # Compatibility for the old "memory" be param
9322 if name == constants.BE_MEMORY:
9323 if constants.BE_MAXMEM not in self.op.beparams:
9324 self.op.beparams[constants.BE_MAXMEM] = value
9325 if constants.BE_MINMEM not in self.op.beparams:
9326 self.op.beparams[constants.BE_MINMEM] = value
9328 # try to read the parameters old style, from the main section
9329 for name in constants.BES_PARAMETERS:
9330 if (name not in self.op.beparams and
9331 einfo.has_option(constants.INISECT_INS, name)):
9332 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9334 if einfo.has_section(constants.INISECT_OSP):
9335 # use the parameters, without overriding
9336 for name, value in einfo.items(constants.INISECT_OSP):
9337 if name not in self.op.osparams:
9338 self.op.osparams[name] = value
9340 def _RevertToDefaults(self, cluster):
9341 """Revert the instance parameters to the default values.
9345 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9346 for name in self.op.hvparams.keys():
9347 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9348 del self.op.hvparams[name]
9350 be_defs = cluster.SimpleFillBE({})
9351 for name in self.op.beparams.keys():
9352 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9353 del self.op.beparams[name]
9355 nic_defs = cluster.SimpleFillNIC({})
9356 for nic in self.op.nics:
9357 for name in constants.NICS_PARAMETERS:
9358 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9361 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9362 for name in self.op.osparams.keys():
9363 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9364 del self.op.osparams[name]
9366 def _CalculateFileStorageDir(self):
9367 """Calculate final instance file storage dir.
9370 # file storage dir calculation/check
9371 self.instance_file_storage_dir = None
9372 if self.op.disk_template in constants.DTS_FILEBASED:
9373 # build the full file storage dir path
9376 if self.op.disk_template == constants.DT_SHARED_FILE:
9377 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9379 get_fsd_fn = self.cfg.GetFileStorageDir
9381 cfg_storagedir = get_fsd_fn()
9382 if not cfg_storagedir:
9383 raise errors.OpPrereqError("Cluster file storage dir not defined")
9384 joinargs.append(cfg_storagedir)
9386 if self.op.file_storage_dir is not None:
9387 joinargs.append(self.op.file_storage_dir)
9389 joinargs.append(self.op.instance_name)
9391 # pylint: disable=W0142
9392 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9394 def CheckPrereq(self):
9395 """Check prerequisites.
9398 self._CalculateFileStorageDir()
9400 if self.op.mode == constants.INSTANCE_IMPORT:
9401 export_info = self._ReadExportInfo()
9402 self._ReadExportParams(export_info)
9404 if (not self.cfg.GetVGName() and
9405 self.op.disk_template not in constants.DTS_NOT_LVM):
9406 raise errors.OpPrereqError("Cluster does not support lvm-based"
9407 " instances", errors.ECODE_STATE)
9409 if (self.op.hypervisor is None or
9410 self.op.hypervisor == constants.VALUE_AUTO):
9411 self.op.hypervisor = self.cfg.GetHypervisorType()
9413 cluster = self.cfg.GetClusterInfo()
9414 enabled_hvs = cluster.enabled_hypervisors
9415 if self.op.hypervisor not in enabled_hvs:
9416 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9417 " cluster (%s)" % (self.op.hypervisor,
9418 ",".join(enabled_hvs)),
9421 # Check tag validity
9422 for tag in self.op.tags:
9423 objects.TaggableObject.ValidateTag(tag)
9425 # check hypervisor parameter syntax (locally)
9426 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9427 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9429 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9430 hv_type.CheckParameterSyntax(filled_hvp)
9431 self.hv_full = filled_hvp
9432 # check that we don't specify global parameters on an instance
9433 _CheckGlobalHvParams(self.op.hvparams)
9435 # fill and remember the beparams dict
9436 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9437 for param, value in self.op.beparams.iteritems():
9438 if value == constants.VALUE_AUTO:
9439 self.op.beparams[param] = default_beparams[param]
9440 objects.UpgradeBeParams(self.op.beparams)
9441 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9442 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9444 # build os parameters
9445 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9447 # now that hvp/bep are in final format, let's reset to defaults,
9449 if self.op.identify_defaults:
9450 self._RevertToDefaults(cluster)
9454 for idx, nic in enumerate(self.op.nics):
9455 nic_mode_req = nic.get(constants.INIC_MODE, None)
9456 nic_mode = nic_mode_req
9457 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9458 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9460 # in routed mode, for the first nic, the default ip is 'auto'
9461 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9462 default_ip_mode = constants.VALUE_AUTO
9464 default_ip_mode = constants.VALUE_NONE
9466 # ip validity checks
9467 ip = nic.get(constants.INIC_IP, default_ip_mode)
9468 if ip is None or ip.lower() == constants.VALUE_NONE:
9470 elif ip.lower() == constants.VALUE_AUTO:
9471 if not self.op.name_check:
9472 raise errors.OpPrereqError("IP address set to auto but name checks"
9473 " have been skipped",
9475 nic_ip = self.hostname1.ip
9477 if not netutils.IPAddress.IsValid(ip):
9478 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9482 # TODO: check the ip address for uniqueness
9483 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9484 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9487 # MAC address verification
9488 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9489 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9490 mac = utils.NormalizeAndValidateMac(mac)
9493 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9494 except errors.ReservationError:
9495 raise errors.OpPrereqError("MAC address %s already in use"
9496 " in cluster" % mac,
9497 errors.ECODE_NOTUNIQUE)
9499 # Build nic parameters
9500 link = nic.get(constants.INIC_LINK, None)
9501 if link == constants.VALUE_AUTO:
9502 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9505 nicparams[constants.NIC_MODE] = nic_mode
9507 nicparams[constants.NIC_LINK] = link
9509 check_params = cluster.SimpleFillNIC(nicparams)
9510 objects.NIC.CheckParameterSyntax(check_params)
9511 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9513 # disk checks/pre-build
9514 default_vg = self.cfg.GetVGName()
9516 for disk in self.op.disks:
9517 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9518 if mode not in constants.DISK_ACCESS_SET:
9519 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9520 mode, errors.ECODE_INVAL)
9521 size = disk.get(constants.IDISK_SIZE, None)
9523 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9526 except (TypeError, ValueError):
9527 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9530 data_vg = disk.get(constants.IDISK_VG, default_vg)
9532 constants.IDISK_SIZE: size,
9533 constants.IDISK_MODE: mode,
9534 constants.IDISK_VG: data_vg,
9536 if constants.IDISK_METAVG in disk:
9537 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9538 if constants.IDISK_ADOPT in disk:
9539 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9540 self.disks.append(new_disk)
9542 if self.op.mode == constants.INSTANCE_IMPORT:
9544 for idx in range(len(self.disks)):
9545 option = "disk%d_dump" % idx
9546 if export_info.has_option(constants.INISECT_INS, option):
9547 # FIXME: are the old os-es, disk sizes, etc. useful?
9548 export_name = export_info.get(constants.INISECT_INS, option)
9549 image = utils.PathJoin(self.op.src_path, export_name)
9550 disk_images.append(image)
9552 disk_images.append(False)
9554 self.src_images = disk_images
9556 old_name = export_info.get(constants.INISECT_INS, "name")
9557 if self.op.instance_name == old_name:
9558 for idx, nic in enumerate(self.nics):
9559 if nic.mac == constants.VALUE_AUTO:
9560 nic_mac_ini = "nic%d_mac" % idx
9561 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9563 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9565 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9566 if self.op.ip_check:
9567 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9568 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9569 (self.check_ip, self.op.instance_name),
9570 errors.ECODE_NOTUNIQUE)
9572 #### mac address generation
9573 # By generating here the mac address both the allocator and the hooks get
9574 # the real final mac address rather than the 'auto' or 'generate' value.
9575 # There is a race condition between the generation and the instance object
9576 # creation, which means that we know the mac is valid now, but we're not
9577 # sure it will be when we actually add the instance. If things go bad
9578 # adding the instance will abort because of a duplicate mac, and the
9579 # creation job will fail.
9580 for nic in self.nics:
9581 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9582 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9586 if self.op.iallocator is not None:
9587 self._RunAllocator()
9589 # Release all unneeded node locks
9590 _ReleaseLocks(self, locking.LEVEL_NODE,
9591 keep=filter(None, [self.op.pnode, self.op.snode,
9594 #### node related checks
9596 # check primary node
9597 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9598 assert self.pnode is not None, \
9599 "Cannot retrieve locked node %s" % self.op.pnode
9601 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9602 pnode.name, errors.ECODE_STATE)
9604 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9605 pnode.name, errors.ECODE_STATE)
9606 if not pnode.vm_capable:
9607 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9608 " '%s'" % pnode.name, errors.ECODE_STATE)
9610 self.secondaries = []
9612 # mirror node verification
9613 if self.op.disk_template in constants.DTS_INT_MIRROR:
9614 if self.op.snode == pnode.name:
9615 raise errors.OpPrereqError("The secondary node cannot be the"
9616 " primary node", errors.ECODE_INVAL)
9617 _CheckNodeOnline(self, self.op.snode)
9618 _CheckNodeNotDrained(self, self.op.snode)
9619 _CheckNodeVmCapable(self, self.op.snode)
9620 self.secondaries.append(self.op.snode)
9622 snode = self.cfg.GetNodeInfo(self.op.snode)
9623 if pnode.group != snode.group:
9624 self.LogWarning("The primary and secondary nodes are in two"
9625 " different node groups; the disk parameters"
9626 " from the first disk's node group will be"
9629 nodenames = [pnode.name] + self.secondaries
9631 # disk parameters (not customizable at instance or node level)
9632 # just use the primary node parameters, ignoring the secondary.
9633 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9635 if not self.adopt_disks:
9636 # Check lv size requirements, if not adopting
9637 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9638 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9640 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9641 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9642 disk[constants.IDISK_ADOPT])
9643 for disk in self.disks])
9644 if len(all_lvs) != len(self.disks):
9645 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9647 for lv_name in all_lvs:
9649 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9650 # to ReserveLV uses the same syntax
9651 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9652 except errors.ReservationError:
9653 raise errors.OpPrereqError("LV named %s used by another instance" %
9654 lv_name, errors.ECODE_NOTUNIQUE)
9656 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9657 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9659 node_lvs = self.rpc.call_lv_list([pnode.name],
9660 vg_names.payload.keys())[pnode.name]
9661 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9662 node_lvs = node_lvs.payload
9664 delta = all_lvs.difference(node_lvs.keys())
9666 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9667 utils.CommaJoin(delta),
9669 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9671 raise errors.OpPrereqError("Online logical volumes found, cannot"
9672 " adopt: %s" % utils.CommaJoin(online_lvs),
9674 # update the size of disk based on what is found
9675 for dsk in self.disks:
9676 dsk[constants.IDISK_SIZE] = \
9677 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9678 dsk[constants.IDISK_ADOPT])][0]))
9680 elif self.op.disk_template == constants.DT_BLOCK:
9681 # Normalize and de-duplicate device paths
9682 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9683 for disk in self.disks])
9684 if len(all_disks) != len(self.disks):
9685 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9687 baddisks = [d for d in all_disks
9688 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9690 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9691 " cannot be adopted" %
9692 (", ".join(baddisks),
9693 constants.ADOPTABLE_BLOCKDEV_ROOT),
9696 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9697 list(all_disks))[pnode.name]
9698 node_disks.Raise("Cannot get block device information from node %s" %
9700 node_disks = node_disks.payload
9701 delta = all_disks.difference(node_disks.keys())
9703 raise errors.OpPrereqError("Missing block device(s): %s" %
9704 utils.CommaJoin(delta),
9706 for dsk in self.disks:
9707 dsk[constants.IDISK_SIZE] = \
9708 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9710 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9712 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9713 # check OS parameters (remotely)
9714 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9716 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9718 # memory check on primary node
9719 #TODO(dynmem): use MINMEM for checking
9721 _CheckNodeFreeMemory(self, self.pnode.name,
9722 "creating instance %s" % self.op.instance_name,
9723 self.be_full[constants.BE_MAXMEM],
9726 self.dry_run_result = list(nodenames)
9728 def Exec(self, feedback_fn):
9729 """Create and add the instance to the cluster.
9732 instance = self.op.instance_name
9733 pnode_name = self.pnode.name
9735 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9736 self.owned_locks(locking.LEVEL_NODE)), \
9737 "Node locks differ from node resource locks"
9739 ht_kind = self.op.hypervisor
9740 if ht_kind in constants.HTS_REQ_PORT:
9741 network_port = self.cfg.AllocatePort()
9745 disks = _GenerateDiskTemplate(self,
9746 self.op.disk_template,
9747 instance, pnode_name,
9750 self.instance_file_storage_dir,
9751 self.op.file_driver,
9756 iobj = objects.Instance(name=instance, os=self.op.os_type,
9757 primary_node=pnode_name,
9758 nics=self.nics, disks=disks,
9759 disk_template=self.op.disk_template,
9760 admin_state=constants.ADMINST_DOWN,
9761 network_port=network_port,
9762 beparams=self.op.beparams,
9763 hvparams=self.op.hvparams,
9764 hypervisor=self.op.hypervisor,
9765 osparams=self.op.osparams,
9769 for tag in self.op.tags:
9772 if self.adopt_disks:
9773 if self.op.disk_template == constants.DT_PLAIN:
9774 # rename LVs to the newly-generated names; we need to construct
9775 # 'fake' LV disks with the old data, plus the new unique_id
9776 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9778 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9779 rename_to.append(t_dsk.logical_id)
9780 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9781 self.cfg.SetDiskID(t_dsk, pnode_name)
9782 result = self.rpc.call_blockdev_rename(pnode_name,
9783 zip(tmp_disks, rename_to))
9784 result.Raise("Failed to rename adoped LVs")
9786 feedback_fn("* creating instance disks...")
9788 _CreateDisks(self, iobj)
9789 except errors.OpExecError:
9790 self.LogWarning("Device creation failed, reverting...")
9792 _RemoveDisks(self, iobj)
9794 self.cfg.ReleaseDRBDMinors(instance)
9797 feedback_fn("adding instance %s to cluster config" % instance)
9799 self.cfg.AddInstance(iobj, self.proc.GetECId())
9801 # Declare that we don't want to remove the instance lock anymore, as we've
9802 # added the instance to the config
9803 del self.remove_locks[locking.LEVEL_INSTANCE]
9805 if self.op.mode == constants.INSTANCE_IMPORT:
9806 # Release unused nodes
9807 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9810 _ReleaseLocks(self, locking.LEVEL_NODE)
9813 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9814 feedback_fn("* wiping instance disks...")
9816 _WipeDisks(self, iobj)
9817 except errors.OpExecError, err:
9818 logging.exception("Wiping disks failed")
9819 self.LogWarning("Wiping instance disks failed (%s)", err)
9823 # Something is already wrong with the disks, don't do anything else
9825 elif self.op.wait_for_sync:
9826 disk_abort = not _WaitForSync(self, iobj)
9827 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9828 # make sure the disks are not degraded (still sync-ing is ok)
9829 feedback_fn("* checking mirrors status")
9830 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9835 _RemoveDisks(self, iobj)
9836 self.cfg.RemoveInstance(iobj.name)
9837 # Make sure the instance lock gets removed
9838 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9839 raise errors.OpExecError("There are some degraded disks for"
9842 # Release all node resource locks
9843 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9845 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9846 if self.op.mode == constants.INSTANCE_CREATE:
9847 if not self.op.no_install:
9848 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9849 not self.op.wait_for_sync)
9851 feedback_fn("* pausing disk sync to install instance OS")
9852 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9854 for idx, success in enumerate(result.payload):
9856 logging.warn("pause-sync of instance %s for disk %d failed",
9859 feedback_fn("* running the instance OS create scripts...")
9860 # FIXME: pass debug option from opcode to backend
9862 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9863 self.op.debug_level)
9865 feedback_fn("* resuming disk sync")
9866 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9868 for idx, success in enumerate(result.payload):
9870 logging.warn("resume-sync of instance %s for disk %d failed",
9873 os_add_result.Raise("Could not add os for instance %s"
9874 " on node %s" % (instance, pnode_name))
9876 elif self.op.mode == constants.INSTANCE_IMPORT:
9877 feedback_fn("* running the instance OS import scripts...")
9881 for idx, image in enumerate(self.src_images):
9885 # FIXME: pass debug option from opcode to backend
9886 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9887 constants.IEIO_FILE, (image, ),
9888 constants.IEIO_SCRIPT,
9889 (iobj.disks[idx], idx),
9891 transfers.append(dt)
9894 masterd.instance.TransferInstanceData(self, feedback_fn,
9895 self.op.src_node, pnode_name,
9896 self.pnode.secondary_ip,
9898 if not compat.all(import_result):
9899 self.LogWarning("Some disks for instance %s on node %s were not"
9900 " imported successfully" % (instance, pnode_name))
9902 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9903 feedback_fn("* preparing remote import...")
9904 # The source cluster will stop the instance before attempting to make a
9905 # connection. In some cases stopping an instance can take a long time,
9906 # hence the shutdown timeout is added to the connection timeout.
9907 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9908 self.op.source_shutdown_timeout)
9909 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9911 assert iobj.primary_node == self.pnode.name
9913 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9914 self.source_x509_ca,
9915 self._cds, timeouts)
9916 if not compat.all(disk_results):
9917 # TODO: Should the instance still be started, even if some disks
9918 # failed to import (valid for local imports, too)?
9919 self.LogWarning("Some disks for instance %s on node %s were not"
9920 " imported successfully" % (instance, pnode_name))
9922 # Run rename script on newly imported instance
9923 assert iobj.name == instance
9924 feedback_fn("Running rename script for %s" % instance)
9925 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9926 self.source_instance_name,
9927 self.op.debug_level)
9929 self.LogWarning("Failed to run rename script for %s on node"
9930 " %s: %s" % (instance, pnode_name, result.fail_msg))
9933 # also checked in the prereq part
9934 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9937 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9940 iobj.admin_state = constants.ADMINST_UP
9941 self.cfg.Update(iobj, feedback_fn)
9942 logging.info("Starting instance %s on node %s", instance, pnode_name)
9943 feedback_fn("* starting instance...")
9944 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9946 result.Raise("Could not start instance")
9948 return list(iobj.all_nodes)
9951 class LUInstanceConsole(NoHooksLU):
9952 """Connect to an instance's console.
9954 This is somewhat special in that it returns the command line that
9955 you need to run on the master node in order to connect to the
9961 def ExpandNames(self):
9962 self.share_locks = _ShareAll()
9963 self._ExpandAndLockInstance()
9965 def CheckPrereq(self):
9966 """Check prerequisites.
9968 This checks that the instance is in the cluster.
9971 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9972 assert self.instance is not None, \
9973 "Cannot retrieve locked instance %s" % self.op.instance_name
9974 _CheckNodeOnline(self, self.instance.primary_node)
9976 def Exec(self, feedback_fn):
9977 """Connect to the console of an instance
9980 instance = self.instance
9981 node = instance.primary_node
9983 node_insts = self.rpc.call_instance_list([node],
9984 [instance.hypervisor])[node]
9985 node_insts.Raise("Can't get node information from %s" % node)
9987 if instance.name not in node_insts.payload:
9988 if instance.admin_state == constants.ADMINST_UP:
9989 state = constants.INSTST_ERRORDOWN
9990 elif instance.admin_state == constants.ADMINST_DOWN:
9991 state = constants.INSTST_ADMINDOWN
9993 state = constants.INSTST_ADMINOFFLINE
9994 raise errors.OpExecError("Instance %s is not running (state %s)" %
9995 (instance.name, state))
9997 logging.debug("Connecting to console of %s on %s", instance.name, node)
9999 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10002 def _GetInstanceConsole(cluster, instance):
10003 """Returns console information for an instance.
10005 @type cluster: L{objects.Cluster}
10006 @type instance: L{objects.Instance}
10010 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10011 # beparams and hvparams are passed separately, to avoid editing the
10012 # instance and then saving the defaults in the instance itself.
10013 hvparams = cluster.FillHV(instance)
10014 beparams = cluster.FillBE(instance)
10015 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10017 assert console.instance == instance.name
10018 assert console.Validate()
10020 return console.ToDict()
10023 class LUInstanceReplaceDisks(LogicalUnit):
10024 """Replace the disks of an instance.
10027 HPATH = "mirrors-replace"
10028 HTYPE = constants.HTYPE_INSTANCE
10031 def CheckArguments(self):
10032 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10033 self.op.iallocator)
10035 def ExpandNames(self):
10036 self._ExpandAndLockInstance()
10038 assert locking.LEVEL_NODE not in self.needed_locks
10039 assert locking.LEVEL_NODE_RES not in self.needed_locks
10040 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10042 assert self.op.iallocator is None or self.op.remote_node is None, \
10043 "Conflicting options"
10045 if self.op.remote_node is not None:
10046 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10048 # Warning: do not remove the locking of the new secondary here
10049 # unless DRBD8.AddChildren is changed to work in parallel;
10050 # currently it doesn't since parallel invocations of
10051 # FindUnusedMinor will conflict
10052 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10053 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10055 self.needed_locks[locking.LEVEL_NODE] = []
10056 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10058 if self.op.iallocator is not None:
10059 # iallocator will select a new node in the same group
10060 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10062 self.needed_locks[locking.LEVEL_NODE_RES] = []
10064 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10065 self.op.iallocator, self.op.remote_node,
10066 self.op.disks, False, self.op.early_release)
10068 self.tasklets = [self.replacer]
10070 def DeclareLocks(self, level):
10071 if level == locking.LEVEL_NODEGROUP:
10072 assert self.op.remote_node is None
10073 assert self.op.iallocator is not None
10074 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10076 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10077 # Lock all groups used by instance optimistically; this requires going
10078 # via the node before it's locked, requiring verification later on
10079 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10080 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10082 elif level == locking.LEVEL_NODE:
10083 if self.op.iallocator is not None:
10084 assert self.op.remote_node is None
10085 assert not self.needed_locks[locking.LEVEL_NODE]
10087 # Lock member nodes of all locked groups
10088 self.needed_locks[locking.LEVEL_NODE] = [node_name
10089 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10090 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10092 self._LockInstancesNodes()
10093 elif level == locking.LEVEL_NODE_RES:
10095 self.needed_locks[locking.LEVEL_NODE_RES] = \
10096 self.needed_locks[locking.LEVEL_NODE]
10098 def BuildHooksEnv(self):
10099 """Build hooks env.
10101 This runs on the master, the primary and all the secondaries.
10104 instance = self.replacer.instance
10106 "MODE": self.op.mode,
10107 "NEW_SECONDARY": self.op.remote_node,
10108 "OLD_SECONDARY": instance.secondary_nodes[0],
10110 env.update(_BuildInstanceHookEnvByObject(self, instance))
10113 def BuildHooksNodes(self):
10114 """Build hooks nodes.
10117 instance = self.replacer.instance
10119 self.cfg.GetMasterNode(),
10120 instance.primary_node,
10122 if self.op.remote_node is not None:
10123 nl.append(self.op.remote_node)
10126 def CheckPrereq(self):
10127 """Check prerequisites.
10130 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10131 self.op.iallocator is None)
10133 # Verify if node group locks are still correct
10134 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10136 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10138 return LogicalUnit.CheckPrereq(self)
10141 class TLReplaceDisks(Tasklet):
10142 """Replaces disks for an instance.
10144 Note: Locking is not within the scope of this class.
10147 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10148 disks, delay_iallocator, early_release):
10149 """Initializes this class.
10152 Tasklet.__init__(self, lu)
10155 self.instance_name = instance_name
10157 self.iallocator_name = iallocator_name
10158 self.remote_node = remote_node
10160 self.delay_iallocator = delay_iallocator
10161 self.early_release = early_release
10164 self.instance = None
10165 self.new_node = None
10166 self.target_node = None
10167 self.other_node = None
10168 self.remote_node_info = None
10169 self.node_secondary_ip = None
10172 def CheckArguments(mode, remote_node, iallocator):
10173 """Helper function for users of this class.
10176 # check for valid parameter combination
10177 if mode == constants.REPLACE_DISK_CHG:
10178 if remote_node is None and iallocator is None:
10179 raise errors.OpPrereqError("When changing the secondary either an"
10180 " iallocator script must be used or the"
10181 " new node given", errors.ECODE_INVAL)
10183 if remote_node is not None and iallocator is not None:
10184 raise errors.OpPrereqError("Give either the iallocator or the new"
10185 " secondary, not both", errors.ECODE_INVAL)
10187 elif remote_node is not None or iallocator is not None:
10188 # Not replacing the secondary
10189 raise errors.OpPrereqError("The iallocator and new node options can"
10190 " only be used when changing the"
10191 " secondary node", errors.ECODE_INVAL)
10194 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10195 """Compute a new secondary node using an IAllocator.
10198 ial = IAllocator(lu.cfg, lu.rpc,
10199 mode=constants.IALLOCATOR_MODE_RELOC,
10200 name=instance_name,
10201 relocate_from=list(relocate_from))
10203 ial.Run(iallocator_name)
10205 if not ial.success:
10206 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10207 " %s" % (iallocator_name, ial.info),
10208 errors.ECODE_NORES)
10210 if len(ial.result) != ial.required_nodes:
10211 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10212 " of nodes (%s), required %s" %
10214 len(ial.result), ial.required_nodes),
10215 errors.ECODE_FAULT)
10217 remote_node_name = ial.result[0]
10219 lu.LogInfo("Selected new secondary for instance '%s': %s",
10220 instance_name, remote_node_name)
10222 return remote_node_name
10224 def _FindFaultyDisks(self, node_name):
10225 """Wrapper for L{_FindFaultyInstanceDisks}.
10228 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10231 def _CheckDisksActivated(self, instance):
10232 """Checks if the instance disks are activated.
10234 @param instance: The instance to check disks
10235 @return: True if they are activated, False otherwise
10238 nodes = instance.all_nodes
10240 for idx, dev in enumerate(instance.disks):
10242 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10243 self.cfg.SetDiskID(dev, node)
10245 result = self.rpc.call_blockdev_find(node, dev)
10249 elif result.fail_msg or not result.payload:
10254 def CheckPrereq(self):
10255 """Check prerequisites.
10257 This checks that the instance is in the cluster.
10260 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10261 assert instance is not None, \
10262 "Cannot retrieve locked instance %s" % self.instance_name
10264 if instance.disk_template != constants.DT_DRBD8:
10265 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10266 " instances", errors.ECODE_INVAL)
10268 if len(instance.secondary_nodes) != 1:
10269 raise errors.OpPrereqError("The instance has a strange layout,"
10270 " expected one secondary but found %d" %
10271 len(instance.secondary_nodes),
10272 errors.ECODE_FAULT)
10274 if not self.delay_iallocator:
10275 self._CheckPrereq2()
10277 def _CheckPrereq2(self):
10278 """Check prerequisites, second part.
10280 This function should always be part of CheckPrereq. It was separated and is
10281 now called from Exec because during node evacuation iallocator was only
10282 called with an unmodified cluster model, not taking planned changes into
10286 instance = self.instance
10287 secondary_node = instance.secondary_nodes[0]
10289 if self.iallocator_name is None:
10290 remote_node = self.remote_node
10292 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10293 instance.name, instance.secondary_nodes)
10295 if remote_node is None:
10296 self.remote_node_info = None
10298 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10299 "Remote node '%s' is not locked" % remote_node
10301 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10302 assert self.remote_node_info is not None, \
10303 "Cannot retrieve locked node %s" % remote_node
10305 if remote_node == self.instance.primary_node:
10306 raise errors.OpPrereqError("The specified node is the primary node of"
10307 " the instance", errors.ECODE_INVAL)
10309 if remote_node == secondary_node:
10310 raise errors.OpPrereqError("The specified node is already the"
10311 " secondary node of the instance",
10312 errors.ECODE_INVAL)
10314 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10315 constants.REPLACE_DISK_CHG):
10316 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10317 errors.ECODE_INVAL)
10319 if self.mode == constants.REPLACE_DISK_AUTO:
10320 if not self._CheckDisksActivated(instance):
10321 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10322 " first" % self.instance_name,
10323 errors.ECODE_STATE)
10324 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10325 faulty_secondary = self._FindFaultyDisks(secondary_node)
10327 if faulty_primary and faulty_secondary:
10328 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10329 " one node and can not be repaired"
10330 " automatically" % self.instance_name,
10331 errors.ECODE_STATE)
10334 self.disks = faulty_primary
10335 self.target_node = instance.primary_node
10336 self.other_node = secondary_node
10337 check_nodes = [self.target_node, self.other_node]
10338 elif faulty_secondary:
10339 self.disks = faulty_secondary
10340 self.target_node = secondary_node
10341 self.other_node = instance.primary_node
10342 check_nodes = [self.target_node, self.other_node]
10348 # Non-automatic modes
10349 if self.mode == constants.REPLACE_DISK_PRI:
10350 self.target_node = instance.primary_node
10351 self.other_node = secondary_node
10352 check_nodes = [self.target_node, self.other_node]
10354 elif self.mode == constants.REPLACE_DISK_SEC:
10355 self.target_node = secondary_node
10356 self.other_node = instance.primary_node
10357 check_nodes = [self.target_node, self.other_node]
10359 elif self.mode == constants.REPLACE_DISK_CHG:
10360 self.new_node = remote_node
10361 self.other_node = instance.primary_node
10362 self.target_node = secondary_node
10363 check_nodes = [self.new_node, self.other_node]
10365 _CheckNodeNotDrained(self.lu, remote_node)
10366 _CheckNodeVmCapable(self.lu, remote_node)
10368 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10369 assert old_node_info is not None
10370 if old_node_info.offline and not self.early_release:
10371 # doesn't make sense to delay the release
10372 self.early_release = True
10373 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10374 " early-release mode", secondary_node)
10377 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10380 # If not specified all disks should be replaced
10382 self.disks = range(len(self.instance.disks))
10384 # TODO: compute disk parameters
10385 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10386 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10387 if primary_node_info.group != secondary_node_info.group:
10388 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10389 " different node groups; the disk parameters of the"
10390 " primary node's group will be applied.")
10392 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10394 for node in check_nodes:
10395 _CheckNodeOnline(self.lu, node)
10397 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10400 if node_name is not None)
10402 # Release unneeded node and node resource locks
10403 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10404 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10406 # Release any owned node group
10407 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10408 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10410 # Check whether disks are valid
10411 for disk_idx in self.disks:
10412 instance.FindDisk(disk_idx)
10414 # Get secondary node IP addresses
10415 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10416 in self.cfg.GetMultiNodeInfo(touched_nodes))
10418 def Exec(self, feedback_fn):
10419 """Execute disk replacement.
10421 This dispatches the disk replacement to the appropriate handler.
10424 if self.delay_iallocator:
10425 self._CheckPrereq2()
10428 # Verify owned locks before starting operation
10429 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10430 assert set(owned_nodes) == set(self.node_secondary_ip), \
10431 ("Incorrect node locks, owning %s, expected %s" %
10432 (owned_nodes, self.node_secondary_ip.keys()))
10433 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10434 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10436 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10437 assert list(owned_instances) == [self.instance_name], \
10438 "Instance '%s' not locked" % self.instance_name
10440 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10441 "Should not own any node group lock at this point"
10444 feedback_fn("No disks need replacement")
10447 feedback_fn("Replacing disk(s) %s for %s" %
10448 (utils.CommaJoin(self.disks), self.instance.name))
10450 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10452 # Activate the instance disks if we're replacing them on a down instance
10454 _StartInstanceDisks(self.lu, self.instance, True)
10457 # Should we replace the secondary node?
10458 if self.new_node is not None:
10459 fn = self._ExecDrbd8Secondary
10461 fn = self._ExecDrbd8DiskOnly
10463 result = fn(feedback_fn)
10465 # Deactivate the instance disks if we're replacing them on a
10468 _SafeShutdownInstanceDisks(self.lu, self.instance)
10470 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10473 # Verify owned locks
10474 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10475 nodes = frozenset(self.node_secondary_ip)
10476 assert ((self.early_release and not owned_nodes) or
10477 (not self.early_release and not (set(owned_nodes) - nodes))), \
10478 ("Not owning the correct locks, early_release=%s, owned=%r,"
10479 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10483 def _CheckVolumeGroup(self, nodes):
10484 self.lu.LogInfo("Checking volume groups")
10486 vgname = self.cfg.GetVGName()
10488 # Make sure volume group exists on all involved nodes
10489 results = self.rpc.call_vg_list(nodes)
10491 raise errors.OpExecError("Can't list volume groups on the nodes")
10494 res = results[node]
10495 res.Raise("Error checking node %s" % node)
10496 if vgname not in res.payload:
10497 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10500 def _CheckDisksExistence(self, nodes):
10501 # Check disk existence
10502 for idx, dev in enumerate(self.instance.disks):
10503 if idx not in self.disks:
10507 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10508 self.cfg.SetDiskID(dev, node)
10510 result = self.rpc.call_blockdev_find(node, dev)
10512 msg = result.fail_msg
10513 if msg or not result.payload:
10515 msg = "disk not found"
10516 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10519 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10520 for idx, dev in enumerate(self.instance.disks):
10521 if idx not in self.disks:
10524 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10527 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10529 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10530 " replace disks for instance %s" %
10531 (node_name, self.instance.name))
10533 def _CreateNewStorage(self, node_name):
10534 """Create new storage on the primary or secondary node.
10536 This is only used for same-node replaces, not for changing the
10537 secondary node, hence we don't want to modify the existing disk.
10542 for idx, dev in enumerate(self.instance.disks):
10543 if idx not in self.disks:
10546 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10548 self.cfg.SetDiskID(dev, node_name)
10550 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10551 names = _GenerateUniqueNames(self.lu, lv_names)
10553 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10555 vg_data = dev.children[0].logical_id[0]
10556 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10557 logical_id=(vg_data, names[0]), params=data_p)
10558 vg_meta = dev.children[1].logical_id[0]
10559 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10560 logical_id=(vg_meta, names[1]), params=meta_p)
10562 new_lvs = [lv_data, lv_meta]
10563 old_lvs = [child.Copy() for child in dev.children]
10564 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10566 # we pass force_create=True to force the LVM creation
10567 for new_lv in new_lvs:
10568 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10569 _GetInstanceInfoText(self.instance), False)
10573 def _CheckDevices(self, node_name, iv_names):
10574 for name, (dev, _, _) in iv_names.iteritems():
10575 self.cfg.SetDiskID(dev, node_name)
10577 result = self.rpc.call_blockdev_find(node_name, dev)
10579 msg = result.fail_msg
10580 if msg or not result.payload:
10582 msg = "disk not found"
10583 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10586 if result.payload.is_degraded:
10587 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10589 def _RemoveOldStorage(self, node_name, iv_names):
10590 for name, (_, old_lvs, _) in iv_names.iteritems():
10591 self.lu.LogInfo("Remove logical volumes for %s" % name)
10594 self.cfg.SetDiskID(lv, node_name)
10596 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10598 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10599 hint="remove unused LVs manually")
10601 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10602 """Replace a disk on the primary or secondary for DRBD 8.
10604 The algorithm for replace is quite complicated:
10606 1. for each disk to be replaced:
10608 1. create new LVs on the target node with unique names
10609 1. detach old LVs from the drbd device
10610 1. rename old LVs to name_replaced.<time_t>
10611 1. rename new LVs to old LVs
10612 1. attach the new LVs (with the old names now) to the drbd device
10614 1. wait for sync across all devices
10616 1. for each modified disk:
10618 1. remove old LVs (which have the name name_replaces.<time_t>)
10620 Failures are not very well handled.
10625 # Step: check device activation
10626 self.lu.LogStep(1, steps_total, "Check device existence")
10627 self._CheckDisksExistence([self.other_node, self.target_node])
10628 self._CheckVolumeGroup([self.target_node, self.other_node])
10630 # Step: check other node consistency
10631 self.lu.LogStep(2, steps_total, "Check peer consistency")
10632 self._CheckDisksConsistency(self.other_node,
10633 self.other_node == self.instance.primary_node,
10636 # Step: create new storage
10637 self.lu.LogStep(3, steps_total, "Allocate new storage")
10638 iv_names = self._CreateNewStorage(self.target_node)
10640 # Step: for each lv, detach+rename*2+attach
10641 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10642 for dev, old_lvs, new_lvs in iv_names.itervalues():
10643 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10645 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10647 result.Raise("Can't detach drbd from local storage on node"
10648 " %s for device %s" % (self.target_node, dev.iv_name))
10650 #cfg.Update(instance)
10652 # ok, we created the new LVs, so now we know we have the needed
10653 # storage; as such, we proceed on the target node to rename
10654 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10655 # using the assumption that logical_id == physical_id (which in
10656 # turn is the unique_id on that node)
10658 # FIXME(iustin): use a better name for the replaced LVs
10659 temp_suffix = int(time.time())
10660 ren_fn = lambda d, suff: (d.physical_id[0],
10661 d.physical_id[1] + "_replaced-%s" % suff)
10663 # Build the rename list based on what LVs exist on the node
10664 rename_old_to_new = []
10665 for to_ren in old_lvs:
10666 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10667 if not result.fail_msg and result.payload:
10669 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10671 self.lu.LogInfo("Renaming the old LVs on the target node")
10672 result = self.rpc.call_blockdev_rename(self.target_node,
10674 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10676 # Now we rename the new LVs to the old LVs
10677 self.lu.LogInfo("Renaming the new LVs on the target node")
10678 rename_new_to_old = [(new, old.physical_id)
10679 for old, new in zip(old_lvs, new_lvs)]
10680 result = self.rpc.call_blockdev_rename(self.target_node,
10682 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10684 # Intermediate steps of in memory modifications
10685 for old, new in zip(old_lvs, new_lvs):
10686 new.logical_id = old.logical_id
10687 self.cfg.SetDiskID(new, self.target_node)
10689 # We need to modify old_lvs so that removal later removes the
10690 # right LVs, not the newly added ones; note that old_lvs is a
10692 for disk in old_lvs:
10693 disk.logical_id = ren_fn(disk, temp_suffix)
10694 self.cfg.SetDiskID(disk, self.target_node)
10696 # Now that the new lvs have the old name, we can add them to the device
10697 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10698 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10700 msg = result.fail_msg
10702 for new_lv in new_lvs:
10703 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10706 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10707 hint=("cleanup manually the unused logical"
10709 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10711 cstep = itertools.count(5)
10713 if self.early_release:
10714 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10715 self._RemoveOldStorage(self.target_node, iv_names)
10716 # TODO: Check if releasing locks early still makes sense
10717 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10719 # Release all resource locks except those used by the instance
10720 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10721 keep=self.node_secondary_ip.keys())
10723 # Release all node locks while waiting for sync
10724 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10726 # TODO: Can the instance lock be downgraded here? Take the optional disk
10727 # shutdown in the caller into consideration.
10730 # This can fail as the old devices are degraded and _WaitForSync
10731 # does a combined result over all disks, so we don't check its return value
10732 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10733 _WaitForSync(self.lu, self.instance)
10735 # Check all devices manually
10736 self._CheckDevices(self.instance.primary_node, iv_names)
10738 # Step: remove old storage
10739 if not self.early_release:
10740 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10741 self._RemoveOldStorage(self.target_node, iv_names)
10743 def _ExecDrbd8Secondary(self, feedback_fn):
10744 """Replace the secondary node for DRBD 8.
10746 The algorithm for replace is quite complicated:
10747 - for all disks of the instance:
10748 - create new LVs on the new node with same names
10749 - shutdown the drbd device on the old secondary
10750 - disconnect the drbd network on the primary
10751 - create the drbd device on the new secondary
10752 - network attach the drbd on the primary, using an artifice:
10753 the drbd code for Attach() will connect to the network if it
10754 finds a device which is connected to the good local disks but
10755 not network enabled
10756 - wait for sync across all devices
10757 - remove all disks from the old secondary
10759 Failures are not very well handled.
10764 pnode = self.instance.primary_node
10766 # Step: check device activation
10767 self.lu.LogStep(1, steps_total, "Check device existence")
10768 self._CheckDisksExistence([self.instance.primary_node])
10769 self._CheckVolumeGroup([self.instance.primary_node])
10771 # Step: check other node consistency
10772 self.lu.LogStep(2, steps_total, "Check peer consistency")
10773 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10775 # Step: create new storage
10776 self.lu.LogStep(3, steps_total, "Allocate new storage")
10777 for idx, dev in enumerate(self.instance.disks):
10778 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10779 (self.new_node, idx))
10780 # we pass force_create=True to force LVM creation
10781 for new_lv in dev.children:
10782 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10783 _GetInstanceInfoText(self.instance), False)
10785 # Step 4: dbrd minors and drbd setups changes
10786 # after this, we must manually remove the drbd minors on both the
10787 # error and the success paths
10788 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10789 minors = self.cfg.AllocateDRBDMinor([self.new_node
10790 for dev in self.instance.disks],
10791 self.instance.name)
10792 logging.debug("Allocated minors %r", minors)
10795 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10796 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10797 (self.new_node, idx))
10798 # create new devices on new_node; note that we create two IDs:
10799 # one without port, so the drbd will be activated without
10800 # networking information on the new node at this stage, and one
10801 # with network, for the latter activation in step 4
10802 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10803 if self.instance.primary_node == o_node1:
10806 assert self.instance.primary_node == o_node2, "Three-node instance?"
10809 new_alone_id = (self.instance.primary_node, self.new_node, None,
10810 p_minor, new_minor, o_secret)
10811 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10812 p_minor, new_minor, o_secret)
10814 iv_names[idx] = (dev, dev.children, new_net_id)
10815 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10817 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10818 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10819 logical_id=new_alone_id,
10820 children=dev.children,
10822 params=drbd_params)
10824 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10825 _GetInstanceInfoText(self.instance), False)
10826 except errors.GenericError:
10827 self.cfg.ReleaseDRBDMinors(self.instance.name)
10830 # We have new devices, shutdown the drbd on the old secondary
10831 for idx, dev in enumerate(self.instance.disks):
10832 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10833 self.cfg.SetDiskID(dev, self.target_node)
10834 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10836 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10837 "node: %s" % (idx, msg),
10838 hint=("Please cleanup this device manually as"
10839 " soon as possible"))
10841 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10842 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10843 self.instance.disks)[pnode]
10845 msg = result.fail_msg
10847 # detaches didn't succeed (unlikely)
10848 self.cfg.ReleaseDRBDMinors(self.instance.name)
10849 raise errors.OpExecError("Can't detach the disks from the network on"
10850 " old node: %s" % (msg,))
10852 # if we managed to detach at least one, we update all the disks of
10853 # the instance to point to the new secondary
10854 self.lu.LogInfo("Updating instance configuration")
10855 for dev, _, new_logical_id in iv_names.itervalues():
10856 dev.logical_id = new_logical_id
10857 self.cfg.SetDiskID(dev, self.instance.primary_node)
10859 self.cfg.Update(self.instance, feedback_fn)
10861 # Release all node locks (the configuration has been updated)
10862 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10864 # and now perform the drbd attach
10865 self.lu.LogInfo("Attaching primary drbds to new secondary"
10866 " (standalone => connected)")
10867 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10869 self.node_secondary_ip,
10870 self.instance.disks,
10871 self.instance.name,
10873 for to_node, to_result in result.items():
10874 msg = to_result.fail_msg
10876 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10878 hint=("please do a gnt-instance info to see the"
10879 " status of disks"))
10881 cstep = itertools.count(5)
10883 if self.early_release:
10884 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10885 self._RemoveOldStorage(self.target_node, iv_names)
10886 # TODO: Check if releasing locks early still makes sense
10887 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10889 # Release all resource locks except those used by the instance
10890 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10891 keep=self.node_secondary_ip.keys())
10893 # TODO: Can the instance lock be downgraded here? Take the optional disk
10894 # shutdown in the caller into consideration.
10897 # This can fail as the old devices are degraded and _WaitForSync
10898 # does a combined result over all disks, so we don't check its return value
10899 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10900 _WaitForSync(self.lu, self.instance)
10902 # Check all devices manually
10903 self._CheckDevices(self.instance.primary_node, iv_names)
10905 # Step: remove old storage
10906 if not self.early_release:
10907 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10908 self._RemoveOldStorage(self.target_node, iv_names)
10911 class LURepairNodeStorage(NoHooksLU):
10912 """Repairs the volume group on a node.
10917 def CheckArguments(self):
10918 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10920 storage_type = self.op.storage_type
10922 if (constants.SO_FIX_CONSISTENCY not in
10923 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10924 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10925 " repaired" % storage_type,
10926 errors.ECODE_INVAL)
10928 def ExpandNames(self):
10929 self.needed_locks = {
10930 locking.LEVEL_NODE: [self.op.node_name],
10933 def _CheckFaultyDisks(self, instance, node_name):
10934 """Ensure faulty disks abort the opcode or at least warn."""
10936 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10938 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10939 " node '%s'" % (instance.name, node_name),
10940 errors.ECODE_STATE)
10941 except errors.OpPrereqError, err:
10942 if self.op.ignore_consistency:
10943 self.proc.LogWarning(str(err.args[0]))
10947 def CheckPrereq(self):
10948 """Check prerequisites.
10951 # Check whether any instance on this node has faulty disks
10952 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10953 if inst.admin_state != constants.ADMINST_UP:
10955 check_nodes = set(inst.all_nodes)
10956 check_nodes.discard(self.op.node_name)
10957 for inst_node_name in check_nodes:
10958 self._CheckFaultyDisks(inst, inst_node_name)
10960 def Exec(self, feedback_fn):
10961 feedback_fn("Repairing storage unit '%s' on %s ..." %
10962 (self.op.name, self.op.node_name))
10964 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10965 result = self.rpc.call_storage_execute(self.op.node_name,
10966 self.op.storage_type, st_args,
10968 constants.SO_FIX_CONSISTENCY)
10969 result.Raise("Failed to repair storage unit '%s' on %s" %
10970 (self.op.name, self.op.node_name))
10973 class LUNodeEvacuate(NoHooksLU):
10974 """Evacuates instances off a list of nodes.
10979 _MODE2IALLOCATOR = {
10980 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10981 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10982 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10984 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10985 assert (frozenset(_MODE2IALLOCATOR.values()) ==
10986 constants.IALLOCATOR_NEVAC_MODES)
10988 def CheckArguments(self):
10989 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10991 def ExpandNames(self):
10992 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10994 if self.op.remote_node is not None:
10995 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10996 assert self.op.remote_node
10998 if self.op.remote_node == self.op.node_name:
10999 raise errors.OpPrereqError("Can not use evacuated node as a new"
11000 " secondary node", errors.ECODE_INVAL)
11002 if self.op.mode != constants.NODE_EVAC_SEC:
11003 raise errors.OpPrereqError("Without the use of an iallocator only"
11004 " secondary instances can be evacuated",
11005 errors.ECODE_INVAL)
11008 self.share_locks = _ShareAll()
11009 self.needed_locks = {
11010 locking.LEVEL_INSTANCE: [],
11011 locking.LEVEL_NODEGROUP: [],
11012 locking.LEVEL_NODE: [],
11015 # Determine nodes (via group) optimistically, needs verification once locks
11016 # have been acquired
11017 self.lock_nodes = self._DetermineNodes()
11019 def _DetermineNodes(self):
11020 """Gets the list of nodes to operate on.
11023 if self.op.remote_node is None:
11024 # Iallocator will choose any node(s) in the same group
11025 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11027 group_nodes = frozenset([self.op.remote_node])
11029 # Determine nodes to be locked
11030 return set([self.op.node_name]) | group_nodes
11032 def _DetermineInstances(self):
11033 """Builds list of instances to operate on.
11036 assert self.op.mode in constants.NODE_EVAC_MODES
11038 if self.op.mode == constants.NODE_EVAC_PRI:
11039 # Primary instances only
11040 inst_fn = _GetNodePrimaryInstances
11041 assert self.op.remote_node is None, \
11042 "Evacuating primary instances requires iallocator"
11043 elif self.op.mode == constants.NODE_EVAC_SEC:
11044 # Secondary instances only
11045 inst_fn = _GetNodeSecondaryInstances
11048 assert self.op.mode == constants.NODE_EVAC_ALL
11049 inst_fn = _GetNodeInstances
11050 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11052 raise errors.OpPrereqError("Due to an issue with the iallocator"
11053 " interface it is not possible to evacuate"
11054 " all instances at once; specify explicitly"
11055 " whether to evacuate primary or secondary"
11057 errors.ECODE_INVAL)
11059 return inst_fn(self.cfg, self.op.node_name)
11061 def DeclareLocks(self, level):
11062 if level == locking.LEVEL_INSTANCE:
11063 # Lock instances optimistically, needs verification once node and group
11064 # locks have been acquired
11065 self.needed_locks[locking.LEVEL_INSTANCE] = \
11066 set(i.name for i in self._DetermineInstances())
11068 elif level == locking.LEVEL_NODEGROUP:
11069 # Lock node groups for all potential target nodes optimistically, needs
11070 # verification once nodes have been acquired
11071 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11072 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11074 elif level == locking.LEVEL_NODE:
11075 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11077 def CheckPrereq(self):
11079 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11080 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11081 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11083 need_nodes = self._DetermineNodes()
11085 if not owned_nodes.issuperset(need_nodes):
11086 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11087 " locks were acquired, current nodes are"
11088 " are '%s', used to be '%s'; retry the"
11090 (self.op.node_name,
11091 utils.CommaJoin(need_nodes),
11092 utils.CommaJoin(owned_nodes)),
11093 errors.ECODE_STATE)
11095 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11096 if owned_groups != wanted_groups:
11097 raise errors.OpExecError("Node groups changed since locks were acquired,"
11098 " current groups are '%s', used to be '%s';"
11099 " retry the operation" %
11100 (utils.CommaJoin(wanted_groups),
11101 utils.CommaJoin(owned_groups)))
11103 # Determine affected instances
11104 self.instances = self._DetermineInstances()
11105 self.instance_names = [i.name for i in self.instances]
11107 if set(self.instance_names) != owned_instances:
11108 raise errors.OpExecError("Instances on node '%s' changed since locks"
11109 " were acquired, current instances are '%s',"
11110 " used to be '%s'; retry the operation" %
11111 (self.op.node_name,
11112 utils.CommaJoin(self.instance_names),
11113 utils.CommaJoin(owned_instances)))
11115 if self.instance_names:
11116 self.LogInfo("Evacuating instances from node '%s': %s",
11118 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11120 self.LogInfo("No instances to evacuate from node '%s'",
11123 if self.op.remote_node is not None:
11124 for i in self.instances:
11125 if i.primary_node == self.op.remote_node:
11126 raise errors.OpPrereqError("Node %s is the primary node of"
11127 " instance %s, cannot use it as"
11129 (self.op.remote_node, i.name),
11130 errors.ECODE_INVAL)
11132 def Exec(self, feedback_fn):
11133 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11135 if not self.instance_names:
11136 # No instances to evacuate
11139 elif self.op.iallocator is not None:
11140 # TODO: Implement relocation to other group
11141 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11142 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11143 instances=list(self.instance_names))
11145 ial.Run(self.op.iallocator)
11147 if not ial.success:
11148 raise errors.OpPrereqError("Can't compute node evacuation using"
11149 " iallocator '%s': %s" %
11150 (self.op.iallocator, ial.info),
11151 errors.ECODE_NORES)
11153 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11155 elif self.op.remote_node is not None:
11156 assert self.op.mode == constants.NODE_EVAC_SEC
11158 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11159 remote_node=self.op.remote_node,
11161 mode=constants.REPLACE_DISK_CHG,
11162 early_release=self.op.early_release)]
11163 for instance_name in self.instance_names
11167 raise errors.ProgrammerError("No iallocator or remote node")
11169 return ResultWithJobs(jobs)
11172 def _SetOpEarlyRelease(early_release, op):
11173 """Sets C{early_release} flag on opcodes if available.
11177 op.early_release = early_release
11178 except AttributeError:
11179 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11184 def _NodeEvacDest(use_nodes, group, nodes):
11185 """Returns group or nodes depending on caller's choice.
11189 return utils.CommaJoin(nodes)
11194 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11195 """Unpacks the result of change-group and node-evacuate iallocator requests.
11197 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11198 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11200 @type lu: L{LogicalUnit}
11201 @param lu: Logical unit instance
11202 @type alloc_result: tuple/list
11203 @param alloc_result: Result from iallocator
11204 @type early_release: bool
11205 @param early_release: Whether to release locks early if possible
11206 @type use_nodes: bool
11207 @param use_nodes: Whether to display node names instead of groups
11210 (moved, failed, jobs) = alloc_result
11213 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11214 for (name, reason) in failed)
11215 lu.LogWarning("Unable to evacuate instances %s", failreason)
11216 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11219 lu.LogInfo("Instances to be moved: %s",
11220 utils.CommaJoin("%s (to %s)" %
11221 (name, _NodeEvacDest(use_nodes, group, nodes))
11222 for (name, group, nodes) in moved))
11224 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11225 map(opcodes.OpCode.LoadOpCode, ops))
11229 class LUInstanceGrowDisk(LogicalUnit):
11230 """Grow a disk of an instance.
11233 HPATH = "disk-grow"
11234 HTYPE = constants.HTYPE_INSTANCE
11237 def ExpandNames(self):
11238 self._ExpandAndLockInstance()
11239 self.needed_locks[locking.LEVEL_NODE] = []
11240 self.needed_locks[locking.LEVEL_NODE_RES] = []
11241 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11243 def DeclareLocks(self, level):
11244 if level == locking.LEVEL_NODE:
11245 self._LockInstancesNodes()
11246 elif level == locking.LEVEL_NODE_RES:
11248 self.needed_locks[locking.LEVEL_NODE_RES] = \
11249 self.needed_locks[locking.LEVEL_NODE][:]
11251 def BuildHooksEnv(self):
11252 """Build hooks env.
11254 This runs on the master, the primary and all the secondaries.
11258 "DISK": self.op.disk,
11259 "AMOUNT": self.op.amount,
11261 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11264 def BuildHooksNodes(self):
11265 """Build hooks nodes.
11268 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11271 def CheckPrereq(self):
11272 """Check prerequisites.
11274 This checks that the instance is in the cluster.
11277 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11278 assert instance is not None, \
11279 "Cannot retrieve locked instance %s" % self.op.instance_name
11280 nodenames = list(instance.all_nodes)
11281 for node in nodenames:
11282 _CheckNodeOnline(self, node)
11284 self.instance = instance
11286 if instance.disk_template not in constants.DTS_GROWABLE:
11287 raise errors.OpPrereqError("Instance's disk layout does not support"
11288 " growing", errors.ECODE_INVAL)
11290 self.disk = instance.FindDisk(self.op.disk)
11292 if instance.disk_template not in (constants.DT_FILE,
11293 constants.DT_SHARED_FILE):
11294 # TODO: check the free disk space for file, when that feature will be
11296 _CheckNodesFreeDiskPerVG(self, nodenames,
11297 self.disk.ComputeGrowth(self.op.amount))
11299 def Exec(self, feedback_fn):
11300 """Execute disk grow.
11303 instance = self.instance
11306 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11307 assert (self.owned_locks(locking.LEVEL_NODE) ==
11308 self.owned_locks(locking.LEVEL_NODE_RES))
11310 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11312 raise errors.OpExecError("Cannot activate block device to grow")
11314 feedback_fn("Growing disk %s of instance '%s' by %s" %
11315 (self.op.disk, instance.name,
11316 utils.FormatUnit(self.op.amount, "h")))
11318 # First run all grow ops in dry-run mode
11319 for node in instance.all_nodes:
11320 self.cfg.SetDiskID(disk, node)
11321 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11322 result.Raise("Grow request failed to node %s" % node)
11324 # We know that (as far as we can test) operations across different
11325 # nodes will succeed, time to run it for real
11326 for node in instance.all_nodes:
11327 self.cfg.SetDiskID(disk, node)
11328 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11329 result.Raise("Grow request failed to node %s" % node)
11331 # TODO: Rewrite code to work properly
11332 # DRBD goes into sync mode for a short amount of time after executing the
11333 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11334 # calling "resize" in sync mode fails. Sleeping for a short amount of
11335 # time is a work-around.
11338 disk.RecordGrow(self.op.amount)
11339 self.cfg.Update(instance, feedback_fn)
11341 # Changes have been recorded, release node lock
11342 _ReleaseLocks(self, locking.LEVEL_NODE)
11344 # Downgrade lock while waiting for sync
11345 self.glm.downgrade(locking.LEVEL_INSTANCE)
11347 if self.op.wait_for_sync:
11348 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11350 self.proc.LogWarning("Disk sync-ing has not returned a good"
11351 " status; please check the instance")
11352 if instance.admin_state != constants.ADMINST_UP:
11353 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11354 elif instance.admin_state != constants.ADMINST_UP:
11355 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11356 " not supposed to be running because no wait for"
11357 " sync mode was requested")
11359 assert self.owned_locks(locking.LEVEL_NODE_RES)
11360 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11363 class LUInstanceQueryData(NoHooksLU):
11364 """Query runtime instance data.
11369 def ExpandNames(self):
11370 self.needed_locks = {}
11372 # Use locking if requested or when non-static information is wanted
11373 if not (self.op.static or self.op.use_locking):
11374 self.LogWarning("Non-static data requested, locks need to be acquired")
11375 self.op.use_locking = True
11377 if self.op.instances or not self.op.use_locking:
11378 # Expand instance names right here
11379 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11381 # Will use acquired locks
11382 self.wanted_names = None
11384 if self.op.use_locking:
11385 self.share_locks = _ShareAll()
11387 if self.wanted_names is None:
11388 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11390 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11392 self.needed_locks[locking.LEVEL_NODE] = []
11393 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11395 def DeclareLocks(self, level):
11396 if self.op.use_locking and level == locking.LEVEL_NODE:
11397 self._LockInstancesNodes()
11399 def CheckPrereq(self):
11400 """Check prerequisites.
11402 This only checks the optional instance list against the existing names.
11405 if self.wanted_names is None:
11406 assert self.op.use_locking, "Locking was not used"
11407 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11409 self.wanted_instances = \
11410 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11412 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11413 """Returns the status of a block device
11416 if self.op.static or not node:
11419 self.cfg.SetDiskID(dev, node)
11421 result = self.rpc.call_blockdev_find(node, dev)
11425 result.Raise("Can't compute disk status for %s" % instance_name)
11427 status = result.payload
11431 return (status.dev_path, status.major, status.minor,
11432 status.sync_percent, status.estimated_time,
11433 status.is_degraded, status.ldisk_status)
11435 def _ComputeDiskStatus(self, instance, snode, dev):
11436 """Compute block device status.
11439 if dev.dev_type in constants.LDS_DRBD:
11440 # we change the snode then (otherwise we use the one passed in)
11441 if dev.logical_id[0] == instance.primary_node:
11442 snode = dev.logical_id[1]
11444 snode = dev.logical_id[0]
11446 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11447 instance.name, dev)
11448 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11451 dev_children = map(compat.partial(self._ComputeDiskStatus,
11458 "iv_name": dev.iv_name,
11459 "dev_type": dev.dev_type,
11460 "logical_id": dev.logical_id,
11461 "physical_id": dev.physical_id,
11462 "pstatus": dev_pstatus,
11463 "sstatus": dev_sstatus,
11464 "children": dev_children,
11469 def Exec(self, feedback_fn):
11470 """Gather and return data"""
11473 cluster = self.cfg.GetClusterInfo()
11475 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11476 for i in self.wanted_instances)
11477 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11478 if self.op.static or pnode.offline:
11479 remote_state = None
11481 self.LogWarning("Primary node %s is marked offline, returning static"
11482 " information only for instance %s" %
11483 (pnode.name, instance.name))
11485 remote_info = self.rpc.call_instance_info(instance.primary_node,
11487 instance.hypervisor)
11488 remote_info.Raise("Error checking node %s" % instance.primary_node)
11489 remote_info = remote_info.payload
11490 if remote_info and "state" in remote_info:
11491 remote_state = "up"
11493 if instance.admin_state == constants.ADMINST_UP:
11494 remote_state = "down"
11496 remote_state = instance.admin_state
11498 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11501 result[instance.name] = {
11502 "name": instance.name,
11503 "config_state": instance.admin_state,
11504 "run_state": remote_state,
11505 "pnode": instance.primary_node,
11506 "snodes": instance.secondary_nodes,
11508 # this happens to be the same format used for hooks
11509 "nics": _NICListToTuple(self, instance.nics),
11510 "disk_template": instance.disk_template,
11512 "hypervisor": instance.hypervisor,
11513 "network_port": instance.network_port,
11514 "hv_instance": instance.hvparams,
11515 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11516 "be_instance": instance.beparams,
11517 "be_actual": cluster.FillBE(instance),
11518 "os_instance": instance.osparams,
11519 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11520 "serial_no": instance.serial_no,
11521 "mtime": instance.mtime,
11522 "ctime": instance.ctime,
11523 "uuid": instance.uuid,
11529 class LUInstanceSetParams(LogicalUnit):
11530 """Modifies an instances's parameters.
11533 HPATH = "instance-modify"
11534 HTYPE = constants.HTYPE_INSTANCE
11537 def CheckArguments(self):
11538 if not (self.op.nics or self.op.disks or self.op.disk_template or
11539 self.op.hvparams or self.op.beparams or self.op.os_name or
11540 self.op.online_inst or self.op.offline_inst):
11541 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11543 if self.op.hvparams:
11544 _CheckGlobalHvParams(self.op.hvparams)
11548 for disk_op, disk_dict in self.op.disks:
11549 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11550 if disk_op == constants.DDM_REMOVE:
11551 disk_addremove += 1
11553 elif disk_op == constants.DDM_ADD:
11554 disk_addremove += 1
11556 if not isinstance(disk_op, int):
11557 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11558 if not isinstance(disk_dict, dict):
11559 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11560 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11562 if disk_op == constants.DDM_ADD:
11563 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11564 if mode not in constants.DISK_ACCESS_SET:
11565 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11566 errors.ECODE_INVAL)
11567 size = disk_dict.get(constants.IDISK_SIZE, None)
11569 raise errors.OpPrereqError("Required disk parameter size missing",
11570 errors.ECODE_INVAL)
11573 except (TypeError, ValueError), err:
11574 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11575 str(err), errors.ECODE_INVAL)
11576 disk_dict[constants.IDISK_SIZE] = size
11578 # modification of disk
11579 if constants.IDISK_SIZE in disk_dict:
11580 raise errors.OpPrereqError("Disk size change not possible, use"
11581 " grow-disk", errors.ECODE_INVAL)
11583 if disk_addremove > 1:
11584 raise errors.OpPrereqError("Only one disk add or remove operation"
11585 " supported at a time", errors.ECODE_INVAL)
11587 if self.op.disks and self.op.disk_template is not None:
11588 raise errors.OpPrereqError("Disk template conversion and other disk"
11589 " changes not supported at the same time",
11590 errors.ECODE_INVAL)
11592 if (self.op.disk_template and
11593 self.op.disk_template in constants.DTS_INT_MIRROR and
11594 self.op.remote_node is None):
11595 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11596 " one requires specifying a secondary node",
11597 errors.ECODE_INVAL)
11601 for nic_op, nic_dict in self.op.nics:
11602 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11603 if nic_op == constants.DDM_REMOVE:
11606 elif nic_op == constants.DDM_ADD:
11609 if not isinstance(nic_op, int):
11610 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11611 if not isinstance(nic_dict, dict):
11612 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11613 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11615 # nic_dict should be a dict
11616 nic_ip = nic_dict.get(constants.INIC_IP, None)
11617 if nic_ip is not None:
11618 if nic_ip.lower() == constants.VALUE_NONE:
11619 nic_dict[constants.INIC_IP] = None
11621 if not netutils.IPAddress.IsValid(nic_ip):
11622 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11623 errors.ECODE_INVAL)
11625 nic_bridge = nic_dict.get("bridge", None)
11626 nic_link = nic_dict.get(constants.INIC_LINK, None)
11627 if nic_bridge and nic_link:
11628 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11629 " at the same time", errors.ECODE_INVAL)
11630 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11631 nic_dict["bridge"] = None
11632 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11633 nic_dict[constants.INIC_LINK] = None
11635 if nic_op == constants.DDM_ADD:
11636 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11637 if nic_mac is None:
11638 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11640 if constants.INIC_MAC in nic_dict:
11641 nic_mac = nic_dict[constants.INIC_MAC]
11642 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11643 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11645 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11646 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11647 " modifying an existing nic",
11648 errors.ECODE_INVAL)
11650 if nic_addremove > 1:
11651 raise errors.OpPrereqError("Only one NIC add or remove operation"
11652 " supported at a time", errors.ECODE_INVAL)
11654 def ExpandNames(self):
11655 self._ExpandAndLockInstance()
11656 # Can't even acquire node locks in shared mode as upcoming changes in
11657 # Ganeti 2.6 will start to modify the node object on disk conversion
11658 self.needed_locks[locking.LEVEL_NODE] = []
11659 self.needed_locks[locking.LEVEL_NODE_RES] = []
11660 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11662 def DeclareLocks(self, level):
11663 if level == locking.LEVEL_NODE:
11664 self._LockInstancesNodes()
11665 if self.op.disk_template and self.op.remote_node:
11666 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11667 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11668 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11670 self.needed_locks[locking.LEVEL_NODE_RES] = \
11671 self.needed_locks[locking.LEVEL_NODE][:]
11673 def BuildHooksEnv(self):
11674 """Build hooks env.
11676 This runs on the master, primary and secondaries.
11680 if constants.BE_MINMEM in self.be_new:
11681 args["minmem"] = self.be_new[constants.BE_MINMEM]
11682 if constants.BE_MAXMEM in self.be_new:
11683 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11684 if constants.BE_VCPUS in self.be_new:
11685 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11686 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11687 # information at all.
11690 nic_override = dict(self.op.nics)
11691 for idx, nic in enumerate(self.instance.nics):
11692 if idx in nic_override:
11693 this_nic_override = nic_override[idx]
11695 this_nic_override = {}
11696 if constants.INIC_IP in this_nic_override:
11697 ip = this_nic_override[constants.INIC_IP]
11700 if constants.INIC_MAC in this_nic_override:
11701 mac = this_nic_override[constants.INIC_MAC]
11704 if idx in self.nic_pnew:
11705 nicparams = self.nic_pnew[idx]
11707 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11708 mode = nicparams[constants.NIC_MODE]
11709 link = nicparams[constants.NIC_LINK]
11710 args["nics"].append((ip, mac, mode, link))
11711 if constants.DDM_ADD in nic_override:
11712 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11713 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11714 nicparams = self.nic_pnew[constants.DDM_ADD]
11715 mode = nicparams[constants.NIC_MODE]
11716 link = nicparams[constants.NIC_LINK]
11717 args["nics"].append((ip, mac, mode, link))
11718 elif constants.DDM_REMOVE in nic_override:
11719 del args["nics"][-1]
11721 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11722 if self.op.disk_template:
11723 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11727 def BuildHooksNodes(self):
11728 """Build hooks nodes.
11731 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11734 def CheckPrereq(self):
11735 """Check prerequisites.
11737 This only checks the instance list against the existing names.
11740 # checking the new params on the primary/secondary nodes
11742 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11743 cluster = self.cluster = self.cfg.GetClusterInfo()
11744 assert self.instance is not None, \
11745 "Cannot retrieve locked instance %s" % self.op.instance_name
11746 pnode = instance.primary_node
11747 nodelist = list(instance.all_nodes)
11748 pnode_info = self.cfg.GetNodeInfo(pnode)
11749 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11752 if self.op.os_name and not self.op.force:
11753 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11754 self.op.force_variant)
11755 instance_os = self.op.os_name
11757 instance_os = instance.os
11759 if self.op.disk_template:
11760 if instance.disk_template == self.op.disk_template:
11761 raise errors.OpPrereqError("Instance already has disk template %s" %
11762 instance.disk_template, errors.ECODE_INVAL)
11764 if (instance.disk_template,
11765 self.op.disk_template) not in self._DISK_CONVERSIONS:
11766 raise errors.OpPrereqError("Unsupported disk template conversion from"
11767 " %s to %s" % (instance.disk_template,
11768 self.op.disk_template),
11769 errors.ECODE_INVAL)
11770 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11771 msg="cannot change disk template")
11772 if self.op.disk_template in constants.DTS_INT_MIRROR:
11773 if self.op.remote_node == pnode:
11774 raise errors.OpPrereqError("Given new secondary node %s is the same"
11775 " as the primary node of the instance" %
11776 self.op.remote_node, errors.ECODE_STATE)
11777 _CheckNodeOnline(self, self.op.remote_node)
11778 _CheckNodeNotDrained(self, self.op.remote_node)
11779 # FIXME: here we assume that the old instance type is DT_PLAIN
11780 assert instance.disk_template == constants.DT_PLAIN
11781 disks = [{constants.IDISK_SIZE: d.size,
11782 constants.IDISK_VG: d.logical_id[0]}
11783 for d in instance.disks]
11784 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11785 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11787 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11788 if pnode_info.group != snode_info.group:
11789 self.LogWarning("The primary and secondary nodes are in two"
11790 " different node groups; the disk parameters"
11791 " from the first disk's node group will be"
11794 # hvparams processing
11795 if self.op.hvparams:
11796 hv_type = instance.hypervisor
11797 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11798 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11799 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11802 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11803 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11804 self.hv_proposed = self.hv_new = hv_new # the new actual values
11805 self.hv_inst = i_hvdict # the new dict (without defaults)
11807 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11809 self.hv_new = self.hv_inst = {}
11811 # beparams processing
11812 if self.op.beparams:
11813 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11815 objects.UpgradeBeParams(i_bedict)
11816 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11817 be_new = cluster.SimpleFillBE(i_bedict)
11818 self.be_proposed = self.be_new = be_new # the new actual values
11819 self.be_inst = i_bedict # the new dict (without defaults)
11821 self.be_new = self.be_inst = {}
11822 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11823 be_old = cluster.FillBE(instance)
11825 # CPU param validation -- checking every time a paramtere is
11826 # changed to cover all cases where either CPU mask or vcpus have
11828 if (constants.BE_VCPUS in self.be_proposed and
11829 constants.HV_CPU_MASK in self.hv_proposed):
11831 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11832 # Verify mask is consistent with number of vCPUs. Can skip this
11833 # test if only 1 entry in the CPU mask, which means same mask
11834 # is applied to all vCPUs.
11835 if (len(cpu_list) > 1 and
11836 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11837 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11839 (self.be_proposed[constants.BE_VCPUS],
11840 self.hv_proposed[constants.HV_CPU_MASK]),
11841 errors.ECODE_INVAL)
11843 # Only perform this test if a new CPU mask is given
11844 if constants.HV_CPU_MASK in self.hv_new:
11845 # Calculate the largest CPU number requested
11846 max_requested_cpu = max(map(max, cpu_list))
11847 # Check that all of the instance's nodes have enough physical CPUs to
11848 # satisfy the requested CPU mask
11849 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11850 max_requested_cpu + 1, instance.hypervisor)
11852 # osparams processing
11853 if self.op.osparams:
11854 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11855 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11856 self.os_inst = i_osdict # the new dict (without defaults)
11862 #TODO(dynmem): do the appropriate check involving MINMEM
11863 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11864 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11865 mem_check_list = [pnode]
11866 if be_new[constants.BE_AUTO_BALANCE]:
11867 # either we changed auto_balance to yes or it was from before
11868 mem_check_list.extend(instance.secondary_nodes)
11869 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11870 instance.hypervisor)
11871 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11872 [instance.hypervisor])
11873 pninfo = nodeinfo[pnode]
11874 msg = pninfo.fail_msg
11876 # Assume the primary node is unreachable and go ahead
11877 self.warn.append("Can't get info from primary node %s: %s" %
11880 (_, _, (pnhvinfo, )) = pninfo.payload
11881 if not isinstance(pnhvinfo.get("memory_free", None), int):
11882 self.warn.append("Node data from primary node %s doesn't contain"
11883 " free memory information" % pnode)
11884 elif instance_info.fail_msg:
11885 self.warn.append("Can't get instance runtime information: %s" %
11886 instance_info.fail_msg)
11888 if instance_info.payload:
11889 current_mem = int(instance_info.payload["memory"])
11891 # Assume instance not running
11892 # (there is a slight race condition here, but it's not very
11893 # probable, and we have no other way to check)
11894 # TODO: Describe race condition
11896 #TODO(dynmem): do the appropriate check involving MINMEM
11897 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11898 pnhvinfo["memory_free"])
11900 raise errors.OpPrereqError("This change will prevent the instance"
11901 " from starting, due to %d MB of memory"
11902 " missing on its primary node" %
11904 errors.ECODE_NORES)
11906 if be_new[constants.BE_AUTO_BALANCE]:
11907 for node, nres in nodeinfo.items():
11908 if node not in instance.secondary_nodes:
11910 nres.Raise("Can't get info from secondary node %s" % node,
11911 prereq=True, ecode=errors.ECODE_STATE)
11912 (_, _, (nhvinfo, )) = nres.payload
11913 if not isinstance(nhvinfo.get("memory_free", None), int):
11914 raise errors.OpPrereqError("Secondary node %s didn't return free"
11915 " memory information" % node,
11916 errors.ECODE_STATE)
11917 #TODO(dynmem): do the appropriate check involving MINMEM
11918 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11919 raise errors.OpPrereqError("This change will prevent the instance"
11920 " from failover to its secondary node"
11921 " %s, due to not enough memory" % node,
11922 errors.ECODE_STATE)
11926 self.nic_pinst = {}
11927 for nic_op, nic_dict in self.op.nics:
11928 if nic_op == constants.DDM_REMOVE:
11929 if not instance.nics:
11930 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11931 errors.ECODE_INVAL)
11933 if nic_op != constants.DDM_ADD:
11935 if not instance.nics:
11936 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11937 " no NICs" % nic_op,
11938 errors.ECODE_INVAL)
11939 if nic_op < 0 or nic_op >= len(instance.nics):
11940 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11942 (nic_op, len(instance.nics) - 1),
11943 errors.ECODE_INVAL)
11944 old_nic_params = instance.nics[nic_op].nicparams
11945 old_nic_ip = instance.nics[nic_op].ip
11947 old_nic_params = {}
11950 update_params_dict = dict([(key, nic_dict[key])
11951 for key in constants.NICS_PARAMETERS
11952 if key in nic_dict])
11954 if "bridge" in nic_dict:
11955 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11957 new_nic_params = _GetUpdatedParams(old_nic_params,
11958 update_params_dict)
11959 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11960 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11961 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11962 self.nic_pinst[nic_op] = new_nic_params
11963 self.nic_pnew[nic_op] = new_filled_nic_params
11964 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11966 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11967 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11968 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11970 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11972 self.warn.append(msg)
11974 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11975 if new_nic_mode == constants.NIC_MODE_ROUTED:
11976 if constants.INIC_IP in nic_dict:
11977 nic_ip = nic_dict[constants.INIC_IP]
11979 nic_ip = old_nic_ip
11981 raise errors.OpPrereqError("Cannot set the nic ip to None"
11982 " on a routed nic", errors.ECODE_INVAL)
11983 if constants.INIC_MAC in nic_dict:
11984 nic_mac = nic_dict[constants.INIC_MAC]
11985 if nic_mac is None:
11986 raise errors.OpPrereqError("Cannot set the nic mac to None",
11987 errors.ECODE_INVAL)
11988 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11989 # otherwise generate the mac
11990 nic_dict[constants.INIC_MAC] = \
11991 self.cfg.GenerateMAC(self.proc.GetECId())
11993 # or validate/reserve the current one
11995 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11996 except errors.ReservationError:
11997 raise errors.OpPrereqError("MAC address %s already in use"
11998 " in cluster" % nic_mac,
11999 errors.ECODE_NOTUNIQUE)
12002 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12003 raise errors.OpPrereqError("Disk operations not supported for"
12004 " diskless instances",
12005 errors.ECODE_INVAL)
12006 for disk_op, _ in self.op.disks:
12007 if disk_op == constants.DDM_REMOVE:
12008 if len(instance.disks) == 1:
12009 raise errors.OpPrereqError("Cannot remove the last disk of"
12010 " an instance", errors.ECODE_INVAL)
12011 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12012 msg="cannot remove disks")
12014 if (disk_op == constants.DDM_ADD and
12015 len(instance.disks) >= constants.MAX_DISKS):
12016 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12017 " add more" % constants.MAX_DISKS,
12018 errors.ECODE_STATE)
12019 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12021 if disk_op < 0 or disk_op >= len(instance.disks):
12022 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12024 (disk_op, len(instance.disks)),
12025 errors.ECODE_INVAL)
12027 # disabling the instance
12028 if self.op.offline_inst:
12029 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12030 msg="cannot change instance state to offline")
12032 # enabling the instance
12033 if self.op.online_inst:
12034 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12035 msg="cannot make instance go online")
12037 def _ConvertPlainToDrbd(self, feedback_fn):
12038 """Converts an instance from plain to drbd.
12041 feedback_fn("Converting template to drbd")
12042 instance = self.instance
12043 pnode = instance.primary_node
12044 snode = self.op.remote_node
12046 assert instance.disk_template == constants.DT_PLAIN
12048 # create a fake disk info for _GenerateDiskTemplate
12049 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12050 constants.IDISK_VG: d.logical_id[0]}
12051 for d in instance.disks]
12052 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12053 instance.name, pnode, [snode],
12054 disk_info, None, None, 0, feedback_fn,
12056 info = _GetInstanceInfoText(instance)
12057 feedback_fn("Creating aditional volumes...")
12058 # first, create the missing data and meta devices
12059 for disk in new_disks:
12060 # unfortunately this is... not too nice
12061 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12063 for child in disk.children:
12064 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12065 # at this stage, all new LVs have been created, we can rename the
12067 feedback_fn("Renaming original volumes...")
12068 rename_list = [(o, n.children[0].logical_id)
12069 for (o, n) in zip(instance.disks, new_disks)]
12070 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12071 result.Raise("Failed to rename original LVs")
12073 feedback_fn("Initializing DRBD devices...")
12074 # all child devices are in place, we can now create the DRBD devices
12075 for disk in new_disks:
12076 for node in [pnode, snode]:
12077 f_create = node == pnode
12078 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12080 # at this point, the instance has been modified
12081 instance.disk_template = constants.DT_DRBD8
12082 instance.disks = new_disks
12083 self.cfg.Update(instance, feedback_fn)
12085 # Release node locks while waiting for sync
12086 _ReleaseLocks(self, locking.LEVEL_NODE)
12088 # disks are created, waiting for sync
12089 disk_abort = not _WaitForSync(self, instance,
12090 oneshot=not self.op.wait_for_sync)
12092 raise errors.OpExecError("There are some degraded disks for"
12093 " this instance, please cleanup manually")
12095 # Node resource locks will be released by caller
12097 def _ConvertDrbdToPlain(self, feedback_fn):
12098 """Converts an instance from drbd to plain.
12101 instance = self.instance
12103 assert len(instance.secondary_nodes) == 1
12104 assert instance.disk_template == constants.DT_DRBD8
12106 pnode = instance.primary_node
12107 snode = instance.secondary_nodes[0]
12108 feedback_fn("Converting template to plain")
12110 old_disks = instance.disks
12111 new_disks = [d.children[0] for d in old_disks]
12113 # copy over size and mode
12114 for parent, child in zip(old_disks, new_disks):
12115 child.size = parent.size
12116 child.mode = parent.mode
12118 # update instance structure
12119 instance.disks = new_disks
12120 instance.disk_template = constants.DT_PLAIN
12121 self.cfg.Update(instance, feedback_fn)
12123 # Release locks in case removing disks takes a while
12124 _ReleaseLocks(self, locking.LEVEL_NODE)
12126 feedback_fn("Removing volumes on the secondary node...")
12127 for disk in old_disks:
12128 self.cfg.SetDiskID(disk, snode)
12129 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12131 self.LogWarning("Could not remove block device %s on node %s,"
12132 " continuing anyway: %s", disk.iv_name, snode, msg)
12134 feedback_fn("Removing unneeded volumes on the primary node...")
12135 for idx, disk in enumerate(old_disks):
12136 meta = disk.children[1]
12137 self.cfg.SetDiskID(meta, pnode)
12138 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12140 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12141 " continuing anyway: %s", idx, pnode, msg)
12143 # this is a DRBD disk, return its port to the pool
12144 for disk in old_disks:
12145 tcp_port = disk.logical_id[2]
12146 self.cfg.AddTcpUdpPort(tcp_port)
12148 # Node resource locks will be released by caller
12150 def Exec(self, feedback_fn):
12151 """Modifies an instance.
12153 All parameters take effect only at the next restart of the instance.
12156 # Process here the warnings from CheckPrereq, as we don't have a
12157 # feedback_fn there.
12158 for warn in self.warn:
12159 feedback_fn("WARNING: %s" % warn)
12161 assert ((self.op.disk_template is None) ^
12162 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12163 "Not owning any node resource locks"
12166 instance = self.instance
12168 for disk_op, disk_dict in self.op.disks:
12169 if disk_op == constants.DDM_REMOVE:
12170 # remove the last disk
12171 device = instance.disks.pop()
12172 device_idx = len(instance.disks)
12173 for node, disk in device.ComputeNodeTree(instance.primary_node):
12174 self.cfg.SetDiskID(disk, node)
12175 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12177 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12178 " continuing anyway", device_idx, node, msg)
12179 result.append(("disk/%d" % device_idx, "remove"))
12181 # if this is a DRBD disk, return its port to the pool
12182 if device.dev_type in constants.LDS_DRBD:
12183 tcp_port = device.logical_id[2]
12184 self.cfg.AddTcpUdpPort(tcp_port)
12185 elif disk_op == constants.DDM_ADD:
12187 if instance.disk_template in (constants.DT_FILE,
12188 constants.DT_SHARED_FILE):
12189 file_driver, file_path = instance.disks[0].logical_id
12190 file_path = os.path.dirname(file_path)
12192 file_driver = file_path = None
12193 disk_idx_base = len(instance.disks)
12194 new_disk = _GenerateDiskTemplate(self,
12195 instance.disk_template,
12196 instance.name, instance.primary_node,
12197 instance.secondary_nodes,
12203 self.diskparams)[0]
12204 instance.disks.append(new_disk)
12205 info = _GetInstanceInfoText(instance)
12207 logging.info("Creating volume %s for instance %s",
12208 new_disk.iv_name, instance.name)
12209 # Note: this needs to be kept in sync with _CreateDisks
12211 for node in instance.all_nodes:
12212 f_create = node == instance.primary_node
12214 _CreateBlockDev(self, node, instance, new_disk,
12215 f_create, info, f_create)
12216 except errors.OpExecError, err:
12217 self.LogWarning("Failed to create volume %s (%s) on"
12219 new_disk.iv_name, new_disk, node, err)
12220 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12221 (new_disk.size, new_disk.mode)))
12223 # change a given disk
12224 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12225 result.append(("disk.mode/%d" % disk_op,
12226 disk_dict[constants.IDISK_MODE]))
12228 if self.op.disk_template:
12230 check_nodes = set(instance.all_nodes)
12231 if self.op.remote_node:
12232 check_nodes.add(self.op.remote_node)
12233 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12234 owned = self.owned_locks(level)
12235 assert not (check_nodes - owned), \
12236 ("Not owning the correct locks, owning %r, expected at least %r" %
12237 (owned, check_nodes))
12239 r_shut = _ShutdownInstanceDisks(self, instance)
12241 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12242 " proceed with disk template conversion")
12243 mode = (instance.disk_template, self.op.disk_template)
12245 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12247 self.cfg.ReleaseDRBDMinors(instance.name)
12249 result.append(("disk_template", self.op.disk_template))
12251 assert instance.disk_template == self.op.disk_template, \
12252 ("Expected disk template '%s', found '%s'" %
12253 (self.op.disk_template, instance.disk_template))
12255 # Release node and resource locks if there are any (they might already have
12256 # been released during disk conversion)
12257 _ReleaseLocks(self, locking.LEVEL_NODE)
12258 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12261 for nic_op, nic_dict in self.op.nics:
12262 if nic_op == constants.DDM_REMOVE:
12263 # remove the last nic
12264 del instance.nics[-1]
12265 result.append(("nic.%d" % len(instance.nics), "remove"))
12266 elif nic_op == constants.DDM_ADD:
12267 # mac and bridge should be set, by now
12268 mac = nic_dict[constants.INIC_MAC]
12269 ip = nic_dict.get(constants.INIC_IP, None)
12270 nicparams = self.nic_pinst[constants.DDM_ADD]
12271 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12272 instance.nics.append(new_nic)
12273 result.append(("nic.%d" % (len(instance.nics) - 1),
12274 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12275 (new_nic.mac, new_nic.ip,
12276 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12277 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12280 for key in (constants.INIC_MAC, constants.INIC_IP):
12281 if key in nic_dict:
12282 setattr(instance.nics[nic_op], key, nic_dict[key])
12283 if nic_op in self.nic_pinst:
12284 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12285 for key, val in nic_dict.iteritems():
12286 result.append(("nic.%s/%d" % (key, nic_op), val))
12289 if self.op.hvparams:
12290 instance.hvparams = self.hv_inst
12291 for key, val in self.op.hvparams.iteritems():
12292 result.append(("hv/%s" % key, val))
12295 if self.op.beparams:
12296 instance.beparams = self.be_inst
12297 for key, val in self.op.beparams.iteritems():
12298 result.append(("be/%s" % key, val))
12301 if self.op.os_name:
12302 instance.os = self.op.os_name
12305 if self.op.osparams:
12306 instance.osparams = self.os_inst
12307 for key, val in self.op.osparams.iteritems():
12308 result.append(("os/%s" % key, val))
12310 # online/offline instance
12311 if self.op.online_inst:
12312 self.cfg.MarkInstanceDown(instance.name)
12313 result.append(("admin_state", constants.ADMINST_DOWN))
12314 if self.op.offline_inst:
12315 self.cfg.MarkInstanceOffline(instance.name)
12316 result.append(("admin_state", constants.ADMINST_OFFLINE))
12318 self.cfg.Update(instance, feedback_fn)
12320 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12321 self.owned_locks(locking.LEVEL_NODE)), \
12322 "All node locks should have been released by now"
12326 _DISK_CONVERSIONS = {
12327 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12328 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12332 class LUInstanceChangeGroup(LogicalUnit):
12333 HPATH = "instance-change-group"
12334 HTYPE = constants.HTYPE_INSTANCE
12337 def ExpandNames(self):
12338 self.share_locks = _ShareAll()
12339 self.needed_locks = {
12340 locking.LEVEL_NODEGROUP: [],
12341 locking.LEVEL_NODE: [],
12344 self._ExpandAndLockInstance()
12346 if self.op.target_groups:
12347 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12348 self.op.target_groups)
12350 self.req_target_uuids = None
12352 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12354 def DeclareLocks(self, level):
12355 if level == locking.LEVEL_NODEGROUP:
12356 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12358 if self.req_target_uuids:
12359 lock_groups = set(self.req_target_uuids)
12361 # Lock all groups used by instance optimistically; this requires going
12362 # via the node before it's locked, requiring verification later on
12363 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12364 lock_groups.update(instance_groups)
12366 # No target groups, need to lock all of them
12367 lock_groups = locking.ALL_SET
12369 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12371 elif level == locking.LEVEL_NODE:
12372 if self.req_target_uuids:
12373 # Lock all nodes used by instances
12374 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12375 self._LockInstancesNodes()
12377 # Lock all nodes in all potential target groups
12378 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12379 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12380 member_nodes = [node_name
12381 for group in lock_groups
12382 for node_name in self.cfg.GetNodeGroup(group).members]
12383 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12385 # Lock all nodes as all groups are potential targets
12386 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12388 def CheckPrereq(self):
12389 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12390 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12391 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12393 assert (self.req_target_uuids is None or
12394 owned_groups.issuperset(self.req_target_uuids))
12395 assert owned_instances == set([self.op.instance_name])
12397 # Get instance information
12398 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12400 # Check if node groups for locked instance are still correct
12401 assert owned_nodes.issuperset(self.instance.all_nodes), \
12402 ("Instance %s's nodes changed while we kept the lock" %
12403 self.op.instance_name)
12405 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12408 if self.req_target_uuids:
12409 # User requested specific target groups
12410 self.target_uuids = self.req_target_uuids
12412 # All groups except those used by the instance are potential targets
12413 self.target_uuids = owned_groups - inst_groups
12415 conflicting_groups = self.target_uuids & inst_groups
12416 if conflicting_groups:
12417 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12418 " used by the instance '%s'" %
12419 (utils.CommaJoin(conflicting_groups),
12420 self.op.instance_name),
12421 errors.ECODE_INVAL)
12423 if not self.target_uuids:
12424 raise errors.OpPrereqError("There are no possible target groups",
12425 errors.ECODE_INVAL)
12427 def BuildHooksEnv(self):
12428 """Build hooks env.
12431 assert self.target_uuids
12434 "TARGET_GROUPS": " ".join(self.target_uuids),
12437 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12441 def BuildHooksNodes(self):
12442 """Build hooks nodes.
12445 mn = self.cfg.GetMasterNode()
12446 return ([mn], [mn])
12448 def Exec(self, feedback_fn):
12449 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12451 assert instances == [self.op.instance_name], "Instance not locked"
12453 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12454 instances=instances, target_groups=list(self.target_uuids))
12456 ial.Run(self.op.iallocator)
12458 if not ial.success:
12459 raise errors.OpPrereqError("Can't compute solution for changing group of"
12460 " instance '%s' using iallocator '%s': %s" %
12461 (self.op.instance_name, self.op.iallocator,
12463 errors.ECODE_NORES)
12465 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12467 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12468 " instance '%s'", len(jobs), self.op.instance_name)
12470 return ResultWithJobs(jobs)
12473 class LUBackupQuery(NoHooksLU):
12474 """Query the exports list
12479 def ExpandNames(self):
12480 self.needed_locks = {}
12481 self.share_locks[locking.LEVEL_NODE] = 1
12482 if not self.op.nodes:
12483 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12485 self.needed_locks[locking.LEVEL_NODE] = \
12486 _GetWantedNodes(self, self.op.nodes)
12488 def Exec(self, feedback_fn):
12489 """Compute the list of all the exported system images.
12492 @return: a dictionary with the structure node->(export-list)
12493 where export-list is a list of the instances exported on
12497 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12498 rpcresult = self.rpc.call_export_list(self.nodes)
12500 for node in rpcresult:
12501 if rpcresult[node].fail_msg:
12502 result[node] = False
12504 result[node] = rpcresult[node].payload
12509 class LUBackupPrepare(NoHooksLU):
12510 """Prepares an instance for an export and returns useful information.
12515 def ExpandNames(self):
12516 self._ExpandAndLockInstance()
12518 def CheckPrereq(self):
12519 """Check prerequisites.
12522 instance_name = self.op.instance_name
12524 self.instance = self.cfg.GetInstanceInfo(instance_name)
12525 assert self.instance is not None, \
12526 "Cannot retrieve locked instance %s" % self.op.instance_name
12527 _CheckNodeOnline(self, self.instance.primary_node)
12529 self._cds = _GetClusterDomainSecret()
12531 def Exec(self, feedback_fn):
12532 """Prepares an instance for an export.
12535 instance = self.instance
12537 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12538 salt = utils.GenerateSecret(8)
12540 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12541 result = self.rpc.call_x509_cert_create(instance.primary_node,
12542 constants.RIE_CERT_VALIDITY)
12543 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12545 (name, cert_pem) = result.payload
12547 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12551 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12552 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12554 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12560 class LUBackupExport(LogicalUnit):
12561 """Export an instance to an image in the cluster.
12564 HPATH = "instance-export"
12565 HTYPE = constants.HTYPE_INSTANCE
12568 def CheckArguments(self):
12569 """Check the arguments.
12572 self.x509_key_name = self.op.x509_key_name
12573 self.dest_x509_ca_pem = self.op.destination_x509_ca
12575 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12576 if not self.x509_key_name:
12577 raise errors.OpPrereqError("Missing X509 key name for encryption",
12578 errors.ECODE_INVAL)
12580 if not self.dest_x509_ca_pem:
12581 raise errors.OpPrereqError("Missing destination X509 CA",
12582 errors.ECODE_INVAL)
12584 def ExpandNames(self):
12585 self._ExpandAndLockInstance()
12587 # Lock all nodes for local exports
12588 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12589 # FIXME: lock only instance primary and destination node
12591 # Sad but true, for now we have do lock all nodes, as we don't know where
12592 # the previous export might be, and in this LU we search for it and
12593 # remove it from its current node. In the future we could fix this by:
12594 # - making a tasklet to search (share-lock all), then create the
12595 # new one, then one to remove, after
12596 # - removing the removal operation altogether
12597 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12599 def DeclareLocks(self, level):
12600 """Last minute lock declaration."""
12601 # All nodes are locked anyway, so nothing to do here.
12603 def BuildHooksEnv(self):
12604 """Build hooks env.
12606 This will run on the master, primary node and target node.
12610 "EXPORT_MODE": self.op.mode,
12611 "EXPORT_NODE": self.op.target_node,
12612 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12613 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12614 # TODO: Generic function for boolean env variables
12615 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12618 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12622 def BuildHooksNodes(self):
12623 """Build hooks nodes.
12626 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12628 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12629 nl.append(self.op.target_node)
12633 def CheckPrereq(self):
12634 """Check prerequisites.
12636 This checks that the instance and node names are valid.
12639 instance_name = self.op.instance_name
12641 self.instance = self.cfg.GetInstanceInfo(instance_name)
12642 assert self.instance is not None, \
12643 "Cannot retrieve locked instance %s" % self.op.instance_name
12644 _CheckNodeOnline(self, self.instance.primary_node)
12646 if (self.op.remove_instance and
12647 self.instance.admin_state == constants.ADMINST_UP and
12648 not self.op.shutdown):
12649 raise errors.OpPrereqError("Can not remove instance without shutting it"
12652 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12653 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12654 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12655 assert self.dst_node is not None
12657 _CheckNodeOnline(self, self.dst_node.name)
12658 _CheckNodeNotDrained(self, self.dst_node.name)
12661 self.dest_disk_info = None
12662 self.dest_x509_ca = None
12664 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12665 self.dst_node = None
12667 if len(self.op.target_node) != len(self.instance.disks):
12668 raise errors.OpPrereqError(("Received destination information for %s"
12669 " disks, but instance %s has %s disks") %
12670 (len(self.op.target_node), instance_name,
12671 len(self.instance.disks)),
12672 errors.ECODE_INVAL)
12674 cds = _GetClusterDomainSecret()
12676 # Check X509 key name
12678 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12679 except (TypeError, ValueError), err:
12680 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12682 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12683 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12684 errors.ECODE_INVAL)
12686 # Load and verify CA
12688 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12689 except OpenSSL.crypto.Error, err:
12690 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12691 (err, ), errors.ECODE_INVAL)
12693 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12694 if errcode is not None:
12695 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12696 (msg, ), errors.ECODE_INVAL)
12698 self.dest_x509_ca = cert
12700 # Verify target information
12702 for idx, disk_data in enumerate(self.op.target_node):
12704 (host, port, magic) = \
12705 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12706 except errors.GenericError, err:
12707 raise errors.OpPrereqError("Target info for disk %s: %s" %
12708 (idx, err), errors.ECODE_INVAL)
12710 disk_info.append((host, port, magic))
12712 assert len(disk_info) == len(self.op.target_node)
12713 self.dest_disk_info = disk_info
12716 raise errors.ProgrammerError("Unhandled export mode %r" %
12719 # instance disk type verification
12720 # TODO: Implement export support for file-based disks
12721 for disk in self.instance.disks:
12722 if disk.dev_type == constants.LD_FILE:
12723 raise errors.OpPrereqError("Export not supported for instances with"
12724 " file-based disks", errors.ECODE_INVAL)
12726 def _CleanupExports(self, feedback_fn):
12727 """Removes exports of current instance from all other nodes.
12729 If an instance in a cluster with nodes A..D was exported to node C, its
12730 exports will be removed from the nodes A, B and D.
12733 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12735 nodelist = self.cfg.GetNodeList()
12736 nodelist.remove(self.dst_node.name)
12738 # on one-node clusters nodelist will be empty after the removal
12739 # if we proceed the backup would be removed because OpBackupQuery
12740 # substitutes an empty list with the full cluster node list.
12741 iname = self.instance.name
12743 feedback_fn("Removing old exports for instance %s" % iname)
12744 exportlist = self.rpc.call_export_list(nodelist)
12745 for node in exportlist:
12746 if exportlist[node].fail_msg:
12748 if iname in exportlist[node].payload:
12749 msg = self.rpc.call_export_remove(node, iname).fail_msg
12751 self.LogWarning("Could not remove older export for instance %s"
12752 " on node %s: %s", iname, node, msg)
12754 def Exec(self, feedback_fn):
12755 """Export an instance to an image in the cluster.
12758 assert self.op.mode in constants.EXPORT_MODES
12760 instance = self.instance
12761 src_node = instance.primary_node
12763 if self.op.shutdown:
12764 # shutdown the instance, but not the disks
12765 feedback_fn("Shutting down instance %s" % instance.name)
12766 result = self.rpc.call_instance_shutdown(src_node, instance,
12767 self.op.shutdown_timeout)
12768 # TODO: Maybe ignore failures if ignore_remove_failures is set
12769 result.Raise("Could not shutdown instance %s on"
12770 " node %s" % (instance.name, src_node))
12772 # set the disks ID correctly since call_instance_start needs the
12773 # correct drbd minor to create the symlinks
12774 for disk in instance.disks:
12775 self.cfg.SetDiskID(disk, src_node)
12777 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12780 # Activate the instance disks if we'exporting a stopped instance
12781 feedback_fn("Activating disks for %s" % instance.name)
12782 _StartInstanceDisks(self, instance, None)
12785 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12788 helper.CreateSnapshots()
12790 if (self.op.shutdown and
12791 instance.admin_state == constants.ADMINST_UP and
12792 not self.op.remove_instance):
12793 assert not activate_disks
12794 feedback_fn("Starting instance %s" % instance.name)
12795 result = self.rpc.call_instance_start(src_node,
12796 (instance, None, None), False)
12797 msg = result.fail_msg
12799 feedback_fn("Failed to start instance: %s" % msg)
12800 _ShutdownInstanceDisks(self, instance)
12801 raise errors.OpExecError("Could not start instance: %s" % msg)
12803 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12804 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12805 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12806 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12807 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12809 (key_name, _, _) = self.x509_key_name
12812 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12815 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12816 key_name, dest_ca_pem,
12821 # Check for backwards compatibility
12822 assert len(dresults) == len(instance.disks)
12823 assert compat.all(isinstance(i, bool) for i in dresults), \
12824 "Not all results are boolean: %r" % dresults
12828 feedback_fn("Deactivating disks for %s" % instance.name)
12829 _ShutdownInstanceDisks(self, instance)
12831 if not (compat.all(dresults) and fin_resu):
12834 failures.append("export finalization")
12835 if not compat.all(dresults):
12836 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12838 failures.append("disk export: disk(s) %s" % fdsk)
12840 raise errors.OpExecError("Export failed, errors in %s" %
12841 utils.CommaJoin(failures))
12843 # At this point, the export was successful, we can cleanup/finish
12845 # Remove instance if requested
12846 if self.op.remove_instance:
12847 feedback_fn("Removing instance %s" % instance.name)
12848 _RemoveInstance(self, feedback_fn, instance,
12849 self.op.ignore_remove_failures)
12851 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12852 self._CleanupExports(feedback_fn)
12854 return fin_resu, dresults
12857 class LUBackupRemove(NoHooksLU):
12858 """Remove exports related to the named instance.
12863 def ExpandNames(self):
12864 self.needed_locks = {}
12865 # We need all nodes to be locked in order for RemoveExport to work, but we
12866 # don't need to lock the instance itself, as nothing will happen to it (and
12867 # we can remove exports also for a removed instance)
12868 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12870 def Exec(self, feedback_fn):
12871 """Remove any export.
12874 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12875 # If the instance was not found we'll try with the name that was passed in.
12876 # This will only work if it was an FQDN, though.
12878 if not instance_name:
12880 instance_name = self.op.instance_name
12882 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12883 exportlist = self.rpc.call_export_list(locked_nodes)
12885 for node in exportlist:
12886 msg = exportlist[node].fail_msg
12888 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12890 if instance_name in exportlist[node].payload:
12892 result = self.rpc.call_export_remove(node, instance_name)
12893 msg = result.fail_msg
12895 logging.error("Could not remove export for instance %s"
12896 " on node %s: %s", instance_name, node, msg)
12898 if fqdn_warn and not found:
12899 feedback_fn("Export not found. If trying to remove an export belonging"
12900 " to a deleted instance please use its Fully Qualified"
12904 class LUGroupAdd(LogicalUnit):
12905 """Logical unit for creating node groups.
12908 HPATH = "group-add"
12909 HTYPE = constants.HTYPE_GROUP
12912 def ExpandNames(self):
12913 # We need the new group's UUID here so that we can create and acquire the
12914 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12915 # that it should not check whether the UUID exists in the configuration.
12916 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12917 self.needed_locks = {}
12918 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12920 def CheckPrereq(self):
12921 """Check prerequisites.
12923 This checks that the given group name is not an existing node group
12928 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12929 except errors.OpPrereqError:
12932 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12933 " node group (UUID: %s)" %
12934 (self.op.group_name, existing_uuid),
12935 errors.ECODE_EXISTS)
12937 if self.op.ndparams:
12938 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12940 if self.op.hv_state:
12941 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12943 self.new_hv_state = None
12945 if self.op.disk_state:
12946 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12948 self.new_disk_state = None
12950 if self.op.diskparams:
12951 for templ in constants.DISK_TEMPLATES:
12952 if templ not in self.op.diskparams:
12953 self.op.diskparams[templ] = {}
12954 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12956 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12958 if self.op.ipolicy:
12959 cluster = self.cfg.GetClusterInfo()
12960 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12961 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12963 def BuildHooksEnv(self):
12964 """Build hooks env.
12968 "GROUP_NAME": self.op.group_name,
12971 def BuildHooksNodes(self):
12972 """Build hooks nodes.
12975 mn = self.cfg.GetMasterNode()
12976 return ([mn], [mn])
12978 def Exec(self, feedback_fn):
12979 """Add the node group to the cluster.
12982 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12983 uuid=self.group_uuid,
12984 alloc_policy=self.op.alloc_policy,
12985 ndparams=self.op.ndparams,
12986 diskparams=self.op.diskparams,
12987 ipolicy=self.op.ipolicy,
12988 hv_state_static=self.new_hv_state,
12989 disk_state_static=self.new_disk_state)
12991 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12992 del self.remove_locks[locking.LEVEL_NODEGROUP]
12995 class LUGroupAssignNodes(NoHooksLU):
12996 """Logical unit for assigning nodes to groups.
13001 def ExpandNames(self):
13002 # These raise errors.OpPrereqError on their own:
13003 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13004 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13006 # We want to lock all the affected nodes and groups. We have readily
13007 # available the list of nodes, and the *destination* group. To gather the
13008 # list of "source" groups, we need to fetch node information later on.
13009 self.needed_locks = {
13010 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13011 locking.LEVEL_NODE: self.op.nodes,
13014 def DeclareLocks(self, level):
13015 if level == locking.LEVEL_NODEGROUP:
13016 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13018 # Try to get all affected nodes' groups without having the group or node
13019 # lock yet. Needs verification later in the code flow.
13020 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13022 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13024 def CheckPrereq(self):
13025 """Check prerequisites.
13028 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13029 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13030 frozenset(self.op.nodes))
13032 expected_locks = (set([self.group_uuid]) |
13033 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13034 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13035 if actual_locks != expected_locks:
13036 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13037 " current groups are '%s', used to be '%s'" %
13038 (utils.CommaJoin(expected_locks),
13039 utils.CommaJoin(actual_locks)))
13041 self.node_data = self.cfg.GetAllNodesInfo()
13042 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13043 instance_data = self.cfg.GetAllInstancesInfo()
13045 if self.group is None:
13046 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13047 (self.op.group_name, self.group_uuid))
13049 (new_splits, previous_splits) = \
13050 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13051 for node in self.op.nodes],
13052 self.node_data, instance_data)
13055 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13057 if not self.op.force:
13058 raise errors.OpExecError("The following instances get split by this"
13059 " change and --force was not given: %s" %
13062 self.LogWarning("This operation will split the following instances: %s",
13065 if previous_splits:
13066 self.LogWarning("In addition, these already-split instances continue"
13067 " to be split across groups: %s",
13068 utils.CommaJoin(utils.NiceSort(previous_splits)))
13070 def Exec(self, feedback_fn):
13071 """Assign nodes to a new group.
13074 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13076 self.cfg.AssignGroupNodes(mods)
13079 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13080 """Check for split instances after a node assignment.
13082 This method considers a series of node assignments as an atomic operation,
13083 and returns information about split instances after applying the set of
13086 In particular, it returns information about newly split instances, and
13087 instances that were already split, and remain so after the change.
13089 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13092 @type changes: list of (node_name, new_group_uuid) pairs.
13093 @param changes: list of node assignments to consider.
13094 @param node_data: a dict with data for all nodes
13095 @param instance_data: a dict with all instances to consider
13096 @rtype: a two-tuple
13097 @return: a list of instances that were previously okay and result split as a
13098 consequence of this change, and a list of instances that were previously
13099 split and this change does not fix.
13102 changed_nodes = dict((node, group) for node, group in changes
13103 if node_data[node].group != group)
13105 all_split_instances = set()
13106 previously_split_instances = set()
13108 def InstanceNodes(instance):
13109 return [instance.primary_node] + list(instance.secondary_nodes)
13111 for inst in instance_data.values():
13112 if inst.disk_template not in constants.DTS_INT_MIRROR:
13115 instance_nodes = InstanceNodes(inst)
13117 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13118 previously_split_instances.add(inst.name)
13120 if len(set(changed_nodes.get(node, node_data[node].group)
13121 for node in instance_nodes)) > 1:
13122 all_split_instances.add(inst.name)
13124 return (list(all_split_instances - previously_split_instances),
13125 list(previously_split_instances & all_split_instances))
13128 class _GroupQuery(_QueryBase):
13129 FIELDS = query.GROUP_FIELDS
13131 def ExpandNames(self, lu):
13132 lu.needed_locks = {}
13134 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13135 self._cluster = lu.cfg.GetClusterInfo()
13136 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13139 self.wanted = [name_to_uuid[name]
13140 for name in utils.NiceSort(name_to_uuid.keys())]
13142 # Accept names to be either names or UUIDs.
13145 all_uuid = frozenset(self._all_groups.keys())
13147 for name in self.names:
13148 if name in all_uuid:
13149 self.wanted.append(name)
13150 elif name in name_to_uuid:
13151 self.wanted.append(name_to_uuid[name])
13153 missing.append(name)
13156 raise errors.OpPrereqError("Some groups do not exist: %s" %
13157 utils.CommaJoin(missing),
13158 errors.ECODE_NOENT)
13160 def DeclareLocks(self, lu, level):
13163 def _GetQueryData(self, lu):
13164 """Computes the list of node groups and their attributes.
13167 do_nodes = query.GQ_NODE in self.requested_data
13168 do_instances = query.GQ_INST in self.requested_data
13170 group_to_nodes = None
13171 group_to_instances = None
13173 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13174 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13175 # latter GetAllInstancesInfo() is not enough, for we have to go through
13176 # instance->node. Hence, we will need to process nodes even if we only need
13177 # instance information.
13178 if do_nodes or do_instances:
13179 all_nodes = lu.cfg.GetAllNodesInfo()
13180 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13183 for node in all_nodes.values():
13184 if node.group in group_to_nodes:
13185 group_to_nodes[node.group].append(node.name)
13186 node_to_group[node.name] = node.group
13189 all_instances = lu.cfg.GetAllInstancesInfo()
13190 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13192 for instance in all_instances.values():
13193 node = instance.primary_node
13194 if node in node_to_group:
13195 group_to_instances[node_to_group[node]].append(instance.name)
13198 # Do not pass on node information if it was not requested.
13199 group_to_nodes = None
13201 return query.GroupQueryData(self._cluster,
13202 [self._all_groups[uuid]
13203 for uuid in self.wanted],
13204 group_to_nodes, group_to_instances)
13207 class LUGroupQuery(NoHooksLU):
13208 """Logical unit for querying node groups.
13213 def CheckArguments(self):
13214 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13215 self.op.output_fields, False)
13217 def ExpandNames(self):
13218 self.gq.ExpandNames(self)
13220 def DeclareLocks(self, level):
13221 self.gq.DeclareLocks(self, level)
13223 def Exec(self, feedback_fn):
13224 return self.gq.OldStyleQuery(self)
13227 class LUGroupSetParams(LogicalUnit):
13228 """Modifies the parameters of a node group.
13231 HPATH = "group-modify"
13232 HTYPE = constants.HTYPE_GROUP
13235 def CheckArguments(self):
13238 self.op.diskparams,
13239 self.op.alloc_policy,
13241 self.op.disk_state,
13245 if all_changes.count(None) == len(all_changes):
13246 raise errors.OpPrereqError("Please pass at least one modification",
13247 errors.ECODE_INVAL)
13249 def ExpandNames(self):
13250 # This raises errors.OpPrereqError on its own:
13251 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13253 self.needed_locks = {
13254 locking.LEVEL_NODEGROUP: [self.group_uuid],
13257 def CheckPrereq(self):
13258 """Check prerequisites.
13261 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13263 if self.group is None:
13264 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13265 (self.op.group_name, self.group_uuid))
13267 if self.op.ndparams:
13268 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13269 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13270 self.new_ndparams = new_ndparams
13272 if self.op.diskparams:
13273 self.new_diskparams = dict()
13274 for templ in constants.DISK_TEMPLATES:
13275 if templ not in self.op.diskparams:
13276 self.op.diskparams[templ] = {}
13277 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13278 self.op.diskparams[templ])
13279 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13280 self.new_diskparams[templ] = new_templ_params
13282 if self.op.hv_state:
13283 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13284 self.group.hv_state_static)
13286 if self.op.disk_state:
13287 self.new_disk_state = \
13288 _MergeAndVerifyDiskState(self.op.disk_state,
13289 self.group.disk_state_static)
13291 if self.op.ipolicy:
13293 for key, value in self.op.ipolicy.iteritems():
13294 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13297 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13298 self.new_ipolicy = g_ipolicy
13299 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13301 def BuildHooksEnv(self):
13302 """Build hooks env.
13306 "GROUP_NAME": self.op.group_name,
13307 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13310 def BuildHooksNodes(self):
13311 """Build hooks nodes.
13314 mn = self.cfg.GetMasterNode()
13315 return ([mn], [mn])
13317 def Exec(self, feedback_fn):
13318 """Modifies the node group.
13323 if self.op.ndparams:
13324 self.group.ndparams = self.new_ndparams
13325 result.append(("ndparams", str(self.group.ndparams)))
13327 if self.op.diskparams:
13328 self.group.diskparams = self.new_diskparams
13329 result.append(("diskparams", str(self.group.diskparams)))
13331 if self.op.alloc_policy:
13332 self.group.alloc_policy = self.op.alloc_policy
13334 if self.op.hv_state:
13335 self.group.hv_state_static = self.new_hv_state
13337 if self.op.disk_state:
13338 self.group.disk_state_static = self.new_disk_state
13340 if self.op.ipolicy:
13341 self.group.ipolicy = self.new_ipolicy
13343 self.cfg.Update(self.group, feedback_fn)
13347 class LUGroupRemove(LogicalUnit):
13348 HPATH = "group-remove"
13349 HTYPE = constants.HTYPE_GROUP
13352 def ExpandNames(self):
13353 # This will raises errors.OpPrereqError on its own:
13354 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13355 self.needed_locks = {
13356 locking.LEVEL_NODEGROUP: [self.group_uuid],
13359 def CheckPrereq(self):
13360 """Check prerequisites.
13362 This checks that the given group name exists as a node group, that is
13363 empty (i.e., contains no nodes), and that is not the last group of the
13367 # Verify that the group is empty.
13368 group_nodes = [node.name
13369 for node in self.cfg.GetAllNodesInfo().values()
13370 if node.group == self.group_uuid]
13373 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13375 (self.op.group_name,
13376 utils.CommaJoin(utils.NiceSort(group_nodes))),
13377 errors.ECODE_STATE)
13379 # Verify the cluster would not be left group-less.
13380 if len(self.cfg.GetNodeGroupList()) == 1:
13381 raise errors.OpPrereqError("Group '%s' is the only group,"
13382 " cannot be removed" %
13383 self.op.group_name,
13384 errors.ECODE_STATE)
13386 def BuildHooksEnv(self):
13387 """Build hooks env.
13391 "GROUP_NAME": self.op.group_name,
13394 def BuildHooksNodes(self):
13395 """Build hooks nodes.
13398 mn = self.cfg.GetMasterNode()
13399 return ([mn], [mn])
13401 def Exec(self, feedback_fn):
13402 """Remove the node group.
13406 self.cfg.RemoveNodeGroup(self.group_uuid)
13407 except errors.ConfigurationError:
13408 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13409 (self.op.group_name, self.group_uuid))
13411 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13414 class LUGroupRename(LogicalUnit):
13415 HPATH = "group-rename"
13416 HTYPE = constants.HTYPE_GROUP
13419 def ExpandNames(self):
13420 # This raises errors.OpPrereqError on its own:
13421 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13423 self.needed_locks = {
13424 locking.LEVEL_NODEGROUP: [self.group_uuid],
13427 def CheckPrereq(self):
13428 """Check prerequisites.
13430 Ensures requested new name is not yet used.
13434 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13435 except errors.OpPrereqError:
13438 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13439 " node group (UUID: %s)" %
13440 (self.op.new_name, new_name_uuid),
13441 errors.ECODE_EXISTS)
13443 def BuildHooksEnv(self):
13444 """Build hooks env.
13448 "OLD_NAME": self.op.group_name,
13449 "NEW_NAME": self.op.new_name,
13452 def BuildHooksNodes(self):
13453 """Build hooks nodes.
13456 mn = self.cfg.GetMasterNode()
13458 all_nodes = self.cfg.GetAllNodesInfo()
13459 all_nodes.pop(mn, None)
13462 run_nodes.extend(node.name for node in all_nodes.values()
13463 if node.group == self.group_uuid)
13465 return (run_nodes, run_nodes)
13467 def Exec(self, feedback_fn):
13468 """Rename the node group.
13471 group = self.cfg.GetNodeGroup(self.group_uuid)
13474 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13475 (self.op.group_name, self.group_uuid))
13477 group.name = self.op.new_name
13478 self.cfg.Update(group, feedback_fn)
13480 return self.op.new_name
13483 class LUGroupEvacuate(LogicalUnit):
13484 HPATH = "group-evacuate"
13485 HTYPE = constants.HTYPE_GROUP
13488 def ExpandNames(self):
13489 # This raises errors.OpPrereqError on its own:
13490 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13492 if self.op.target_groups:
13493 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13494 self.op.target_groups)
13496 self.req_target_uuids = []
13498 if self.group_uuid in self.req_target_uuids:
13499 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13500 " as a target group (targets are %s)" %
13502 utils.CommaJoin(self.req_target_uuids)),
13503 errors.ECODE_INVAL)
13505 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13507 self.share_locks = _ShareAll()
13508 self.needed_locks = {
13509 locking.LEVEL_INSTANCE: [],
13510 locking.LEVEL_NODEGROUP: [],
13511 locking.LEVEL_NODE: [],
13514 def DeclareLocks(self, level):
13515 if level == locking.LEVEL_INSTANCE:
13516 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13518 # Lock instances optimistically, needs verification once node and group
13519 # locks have been acquired
13520 self.needed_locks[locking.LEVEL_INSTANCE] = \
13521 self.cfg.GetNodeGroupInstances(self.group_uuid)
13523 elif level == locking.LEVEL_NODEGROUP:
13524 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13526 if self.req_target_uuids:
13527 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13529 # Lock all groups used by instances optimistically; this requires going
13530 # via the node before it's locked, requiring verification later on
13531 lock_groups.update(group_uuid
13532 for instance_name in
13533 self.owned_locks(locking.LEVEL_INSTANCE)
13535 self.cfg.GetInstanceNodeGroups(instance_name))
13537 # No target groups, need to lock all of them
13538 lock_groups = locking.ALL_SET
13540 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13542 elif level == locking.LEVEL_NODE:
13543 # This will only lock the nodes in the group to be evacuated which
13544 # contain actual instances
13545 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13546 self._LockInstancesNodes()
13548 # Lock all nodes in group to be evacuated and target groups
13549 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13550 assert self.group_uuid in owned_groups
13551 member_nodes = [node_name
13552 for group in owned_groups
13553 for node_name in self.cfg.GetNodeGroup(group).members]
13554 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13556 def CheckPrereq(self):
13557 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13558 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13559 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13561 assert owned_groups.issuperset(self.req_target_uuids)
13562 assert self.group_uuid in owned_groups
13564 # Check if locked instances are still correct
13565 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13567 # Get instance information
13568 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13570 # Check if node groups for locked instances are still correct
13571 for instance_name in owned_instances:
13572 inst = self.instances[instance_name]
13573 assert owned_nodes.issuperset(inst.all_nodes), \
13574 "Instance %s's nodes changed while we kept the lock" % instance_name
13576 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13579 assert self.group_uuid in inst_groups, \
13580 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13582 if self.req_target_uuids:
13583 # User requested specific target groups
13584 self.target_uuids = self.req_target_uuids
13586 # All groups except the one to be evacuated are potential targets
13587 self.target_uuids = [group_uuid for group_uuid in owned_groups
13588 if group_uuid != self.group_uuid]
13590 if not self.target_uuids:
13591 raise errors.OpPrereqError("There are no possible target groups",
13592 errors.ECODE_INVAL)
13594 def BuildHooksEnv(self):
13595 """Build hooks env.
13599 "GROUP_NAME": self.op.group_name,
13600 "TARGET_GROUPS": " ".join(self.target_uuids),
13603 def BuildHooksNodes(self):
13604 """Build hooks nodes.
13607 mn = self.cfg.GetMasterNode()
13609 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13611 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13613 return (run_nodes, run_nodes)
13615 def Exec(self, feedback_fn):
13616 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13618 assert self.group_uuid not in self.target_uuids
13620 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13621 instances=instances, target_groups=self.target_uuids)
13623 ial.Run(self.op.iallocator)
13625 if not ial.success:
13626 raise errors.OpPrereqError("Can't compute group evacuation using"
13627 " iallocator '%s': %s" %
13628 (self.op.iallocator, ial.info),
13629 errors.ECODE_NORES)
13631 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13633 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13634 len(jobs), self.op.group_name)
13636 return ResultWithJobs(jobs)
13639 class TagsLU(NoHooksLU): # pylint: disable=W0223
13640 """Generic tags LU.
13642 This is an abstract class which is the parent of all the other tags LUs.
13645 def ExpandNames(self):
13646 self.group_uuid = None
13647 self.needed_locks = {}
13648 if self.op.kind == constants.TAG_NODE:
13649 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13650 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13651 elif self.op.kind == constants.TAG_INSTANCE:
13652 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13653 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13654 elif self.op.kind == constants.TAG_NODEGROUP:
13655 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13657 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13658 # not possible to acquire the BGL based on opcode parameters)
13660 def CheckPrereq(self):
13661 """Check prerequisites.
13664 if self.op.kind == constants.TAG_CLUSTER:
13665 self.target = self.cfg.GetClusterInfo()
13666 elif self.op.kind == constants.TAG_NODE:
13667 self.target = self.cfg.GetNodeInfo(self.op.name)
13668 elif self.op.kind == constants.TAG_INSTANCE:
13669 self.target = self.cfg.GetInstanceInfo(self.op.name)
13670 elif self.op.kind == constants.TAG_NODEGROUP:
13671 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13673 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13674 str(self.op.kind), errors.ECODE_INVAL)
13677 class LUTagsGet(TagsLU):
13678 """Returns the tags of a given object.
13683 def ExpandNames(self):
13684 TagsLU.ExpandNames(self)
13686 # Share locks as this is only a read operation
13687 self.share_locks = _ShareAll()
13689 def Exec(self, feedback_fn):
13690 """Returns the tag list.
13693 return list(self.target.GetTags())
13696 class LUTagsSearch(NoHooksLU):
13697 """Searches the tags for a given pattern.
13702 def ExpandNames(self):
13703 self.needed_locks = {}
13705 def CheckPrereq(self):
13706 """Check prerequisites.
13708 This checks the pattern passed for validity by compiling it.
13712 self.re = re.compile(self.op.pattern)
13713 except re.error, err:
13714 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13715 (self.op.pattern, err), errors.ECODE_INVAL)
13717 def Exec(self, feedback_fn):
13718 """Returns the tag list.
13722 tgts = [("/cluster", cfg.GetClusterInfo())]
13723 ilist = cfg.GetAllInstancesInfo().values()
13724 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13725 nlist = cfg.GetAllNodesInfo().values()
13726 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13727 tgts.extend(("/nodegroup/%s" % n.name, n)
13728 for n in cfg.GetAllNodeGroupsInfo().values())
13730 for path, target in tgts:
13731 for tag in target.GetTags():
13732 if self.re.search(tag):
13733 results.append((path, tag))
13737 class LUTagsSet(TagsLU):
13738 """Sets a tag on a given object.
13743 def CheckPrereq(self):
13744 """Check prerequisites.
13746 This checks the type and length of the tag name and value.
13749 TagsLU.CheckPrereq(self)
13750 for tag in self.op.tags:
13751 objects.TaggableObject.ValidateTag(tag)
13753 def Exec(self, feedback_fn):
13758 for tag in self.op.tags:
13759 self.target.AddTag(tag)
13760 except errors.TagError, err:
13761 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13762 self.cfg.Update(self.target, feedback_fn)
13765 class LUTagsDel(TagsLU):
13766 """Delete a list of tags from a given object.
13771 def CheckPrereq(self):
13772 """Check prerequisites.
13774 This checks that we have the given tag.
13777 TagsLU.CheckPrereq(self)
13778 for tag in self.op.tags:
13779 objects.TaggableObject.ValidateTag(tag)
13780 del_tags = frozenset(self.op.tags)
13781 cur_tags = self.target.GetTags()
13783 diff_tags = del_tags - cur_tags
13785 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13786 raise errors.OpPrereqError("Tag(s) %s not found" %
13787 (utils.CommaJoin(diff_names), ),
13788 errors.ECODE_NOENT)
13790 def Exec(self, feedback_fn):
13791 """Remove the tag from the object.
13794 for tag in self.op.tags:
13795 self.target.RemoveTag(tag)
13796 self.cfg.Update(self.target, feedback_fn)
13799 class LUTestDelay(NoHooksLU):
13800 """Sleep for a specified amount of time.
13802 This LU sleeps on the master and/or nodes for a specified amount of
13808 def ExpandNames(self):
13809 """Expand names and set required locks.
13811 This expands the node list, if any.
13814 self.needed_locks = {}
13815 if self.op.on_nodes:
13816 # _GetWantedNodes can be used here, but is not always appropriate to use
13817 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13818 # more information.
13819 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13820 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13822 def _TestDelay(self):
13823 """Do the actual sleep.
13826 if self.op.on_master:
13827 if not utils.TestDelay(self.op.duration):
13828 raise errors.OpExecError("Error during master delay test")
13829 if self.op.on_nodes:
13830 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13831 for node, node_result in result.items():
13832 node_result.Raise("Failure during rpc call to node %s" % node)
13834 def Exec(self, feedback_fn):
13835 """Execute the test delay opcode, with the wanted repetitions.
13838 if self.op.repeat == 0:
13841 top_value = self.op.repeat - 1
13842 for i in range(self.op.repeat):
13843 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13847 class LUTestJqueue(NoHooksLU):
13848 """Utility LU to test some aspects of the job queue.
13853 # Must be lower than default timeout for WaitForJobChange to see whether it
13854 # notices changed jobs
13855 _CLIENT_CONNECT_TIMEOUT = 20.0
13856 _CLIENT_CONFIRM_TIMEOUT = 60.0
13859 def _NotifyUsingSocket(cls, cb, errcls):
13860 """Opens a Unix socket and waits for another program to connect.
13863 @param cb: Callback to send socket name to client
13864 @type errcls: class
13865 @param errcls: Exception class to use for errors
13868 # Using a temporary directory as there's no easy way to create temporary
13869 # sockets without writing a custom loop around tempfile.mktemp and
13871 tmpdir = tempfile.mkdtemp()
13873 tmpsock = utils.PathJoin(tmpdir, "sock")
13875 logging.debug("Creating temporary socket at %s", tmpsock)
13876 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13881 # Send details to client
13884 # Wait for client to connect before continuing
13885 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13887 (conn, _) = sock.accept()
13888 except socket.error, err:
13889 raise errcls("Client didn't connect in time (%s)" % err)
13893 # Remove as soon as client is connected
13894 shutil.rmtree(tmpdir)
13896 # Wait for client to close
13899 # pylint: disable=E1101
13900 # Instance of '_socketobject' has no ... member
13901 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13903 except socket.error, err:
13904 raise errcls("Client failed to confirm notification (%s)" % err)
13908 def _SendNotification(self, test, arg, sockname):
13909 """Sends a notification to the client.
13912 @param test: Test name
13913 @param arg: Test argument (depends on test)
13914 @type sockname: string
13915 @param sockname: Socket path
13918 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13920 def _Notify(self, prereq, test, arg):
13921 """Notifies the client of a test.
13924 @param prereq: Whether this is a prereq-phase test
13926 @param test: Test name
13927 @param arg: Test argument (depends on test)
13931 errcls = errors.OpPrereqError
13933 errcls = errors.OpExecError
13935 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13939 def CheckArguments(self):
13940 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13941 self.expandnames_calls = 0
13943 def ExpandNames(self):
13944 checkargs_calls = getattr(self, "checkargs_calls", 0)
13945 if checkargs_calls < 1:
13946 raise errors.ProgrammerError("CheckArguments was not called")
13948 self.expandnames_calls += 1
13950 if self.op.notify_waitlock:
13951 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13953 self.LogInfo("Expanding names")
13955 # Get lock on master node (just to get a lock, not for a particular reason)
13956 self.needed_locks = {
13957 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13960 def Exec(self, feedback_fn):
13961 if self.expandnames_calls < 1:
13962 raise errors.ProgrammerError("ExpandNames was not called")
13964 if self.op.notify_exec:
13965 self._Notify(False, constants.JQT_EXEC, None)
13967 self.LogInfo("Executing")
13969 if self.op.log_messages:
13970 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13971 for idx, msg in enumerate(self.op.log_messages):
13972 self.LogInfo("Sending log message %s", idx + 1)
13973 feedback_fn(constants.JQT_MSGPREFIX + msg)
13974 # Report how many test messages have been sent
13975 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13978 raise errors.OpExecError("Opcode failure was requested")
13983 class IAllocator(object):
13984 """IAllocator framework.
13986 An IAllocator instance has three sets of attributes:
13987 - cfg that is needed to query the cluster
13988 - input data (all members of the _KEYS class attribute are required)
13989 - four buffer attributes (in|out_data|text), that represent the
13990 input (to the external script) in text and data structure format,
13991 and the output from it, again in two formats
13992 - the result variables from the script (success, info, nodes) for
13996 # pylint: disable=R0902
13997 # lots of instance attributes
13999 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14001 self.rpc = rpc_runner
14002 # init buffer variables
14003 self.in_text = self.out_text = self.in_data = self.out_data = None
14004 # init all input fields so that pylint is happy
14006 self.memory = self.disks = self.disk_template = None
14007 self.os = self.tags = self.nics = self.vcpus = None
14008 self.hypervisor = None
14009 self.relocate_from = None
14011 self.instances = None
14012 self.evac_mode = None
14013 self.target_groups = []
14015 self.required_nodes = None
14016 # init result fields
14017 self.success = self.info = self.result = None
14020 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14022 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14023 " IAllocator" % self.mode)
14025 keyset = [n for (n, _) in keydata]
14028 if key not in keyset:
14029 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14030 " IAllocator" % key)
14031 setattr(self, key, kwargs[key])
14034 if key not in kwargs:
14035 raise errors.ProgrammerError("Missing input parameter '%s' to"
14036 " IAllocator" % key)
14037 self._BuildInputData(compat.partial(fn, self), keydata)
14039 def _ComputeClusterData(self):
14040 """Compute the generic allocator input data.
14042 This is the data that is independent of the actual operation.
14046 cluster_info = cfg.GetClusterInfo()
14049 "version": constants.IALLOCATOR_VERSION,
14050 "cluster_name": cfg.GetClusterName(),
14051 "cluster_tags": list(cluster_info.GetTags()),
14052 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14053 # we don't have job IDs
14055 ninfo = cfg.GetAllNodesInfo()
14056 iinfo = cfg.GetAllInstancesInfo().values()
14057 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14060 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14062 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14063 hypervisor_name = self.hypervisor
14064 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14065 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14067 hypervisor_name = cluster_info.primary_hypervisor
14069 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14072 self.rpc.call_all_instances_info(node_list,
14073 cluster_info.enabled_hypervisors)
14075 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14077 config_ndata = self._ComputeBasicNodeData(ninfo)
14078 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14079 i_list, config_ndata)
14080 assert len(data["nodes"]) == len(ninfo), \
14081 "Incomplete node data computed"
14083 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14085 self.in_data = data
14088 def _ComputeNodeGroupData(cfg):
14089 """Compute node groups data.
14092 ng = dict((guuid, {
14093 "name": gdata.name,
14094 "alloc_policy": gdata.alloc_policy,
14096 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14101 def _ComputeBasicNodeData(node_cfg):
14102 """Compute global node data.
14105 @returns: a dict of name: (node dict, node config)
14108 # fill in static (config-based) values
14109 node_results = dict((ninfo.name, {
14110 "tags": list(ninfo.GetTags()),
14111 "primary_ip": ninfo.primary_ip,
14112 "secondary_ip": ninfo.secondary_ip,
14113 "offline": ninfo.offline,
14114 "drained": ninfo.drained,
14115 "master_candidate": ninfo.master_candidate,
14116 "group": ninfo.group,
14117 "master_capable": ninfo.master_capable,
14118 "vm_capable": ninfo.vm_capable,
14120 for ninfo in node_cfg.values())
14122 return node_results
14125 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14127 """Compute global node data.
14129 @param node_results: the basic node structures as filled from the config
14132 #TODO(dynmem): compute the right data on MAX and MIN memory
14133 # make a copy of the current dict
14134 node_results = dict(node_results)
14135 for nname, nresult in node_data.items():
14136 assert nname in node_results, "Missing basic data for node %s" % nname
14137 ninfo = node_cfg[nname]
14139 if not (ninfo.offline or ninfo.drained):
14140 nresult.Raise("Can't get data for node %s" % nname)
14141 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14143 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14145 for attr in ["memory_total", "memory_free", "memory_dom0",
14146 "vg_size", "vg_free", "cpu_total"]:
14147 if attr not in remote_info:
14148 raise errors.OpExecError("Node '%s' didn't return attribute"
14149 " '%s'" % (nname, attr))
14150 if not isinstance(remote_info[attr], int):
14151 raise errors.OpExecError("Node '%s' returned invalid value"
14153 (nname, attr, remote_info[attr]))
14154 # compute memory used by primary instances
14155 i_p_mem = i_p_up_mem = 0
14156 for iinfo, beinfo in i_list:
14157 if iinfo.primary_node == nname:
14158 i_p_mem += beinfo[constants.BE_MAXMEM]
14159 if iinfo.name not in node_iinfo[nname].payload:
14162 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14163 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14164 remote_info["memory_free"] -= max(0, i_mem_diff)
14166 if iinfo.admin_state == constants.ADMINST_UP:
14167 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14169 # compute memory used by instances
14171 "total_memory": remote_info["memory_total"],
14172 "reserved_memory": remote_info["memory_dom0"],
14173 "free_memory": remote_info["memory_free"],
14174 "total_disk": remote_info["vg_size"],
14175 "free_disk": remote_info["vg_free"],
14176 "total_cpus": remote_info["cpu_total"],
14177 "i_pri_memory": i_p_mem,
14178 "i_pri_up_memory": i_p_up_mem,
14180 pnr_dyn.update(node_results[nname])
14181 node_results[nname] = pnr_dyn
14183 return node_results
14186 def _ComputeInstanceData(cluster_info, i_list):
14187 """Compute global instance data.
14191 for iinfo, beinfo in i_list:
14193 for nic in iinfo.nics:
14194 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14198 "mode": filled_params[constants.NIC_MODE],
14199 "link": filled_params[constants.NIC_LINK],
14201 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14202 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14203 nic_data.append(nic_dict)
14205 "tags": list(iinfo.GetTags()),
14206 "admin_state": iinfo.admin_state,
14207 "vcpus": beinfo[constants.BE_VCPUS],
14208 "memory": beinfo[constants.BE_MAXMEM],
14210 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14212 "disks": [{constants.IDISK_SIZE: dsk.size,
14213 constants.IDISK_MODE: dsk.mode}
14214 for dsk in iinfo.disks],
14215 "disk_template": iinfo.disk_template,
14216 "hypervisor": iinfo.hypervisor,
14218 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14220 instance_data[iinfo.name] = pir
14222 return instance_data
14224 def _AddNewInstance(self):
14225 """Add new instance data to allocator structure.
14227 This in combination with _AllocatorGetClusterData will create the
14228 correct structure needed as input for the allocator.
14230 The checks for the completeness of the opcode must have already been
14234 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14236 if self.disk_template in constants.DTS_INT_MIRROR:
14237 self.required_nodes = 2
14239 self.required_nodes = 1
14243 "disk_template": self.disk_template,
14246 "vcpus": self.vcpus,
14247 "memory": self.memory,
14248 "disks": self.disks,
14249 "disk_space_total": disk_space,
14251 "required_nodes": self.required_nodes,
14252 "hypervisor": self.hypervisor,
14257 def _AddRelocateInstance(self):
14258 """Add relocate instance data to allocator structure.
14260 This in combination with _IAllocatorGetClusterData will create the
14261 correct structure needed as input for the allocator.
14263 The checks for the completeness of the opcode must have already been
14267 instance = self.cfg.GetInstanceInfo(self.name)
14268 if instance is None:
14269 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14270 " IAllocator" % self.name)
14272 if instance.disk_template not in constants.DTS_MIRRORED:
14273 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14274 errors.ECODE_INVAL)
14276 if instance.disk_template in constants.DTS_INT_MIRROR and \
14277 len(instance.secondary_nodes) != 1:
14278 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14279 errors.ECODE_STATE)
14281 self.required_nodes = 1
14282 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14283 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14287 "disk_space_total": disk_space,
14288 "required_nodes": self.required_nodes,
14289 "relocate_from": self.relocate_from,
14293 def _AddNodeEvacuate(self):
14294 """Get data for node-evacuate requests.
14298 "instances": self.instances,
14299 "evac_mode": self.evac_mode,
14302 def _AddChangeGroup(self):
14303 """Get data for node-evacuate requests.
14307 "instances": self.instances,
14308 "target_groups": self.target_groups,
14311 def _BuildInputData(self, fn, keydata):
14312 """Build input data structures.
14315 self._ComputeClusterData()
14318 request["type"] = self.mode
14319 for keyname, keytype in keydata:
14320 if keyname not in request:
14321 raise errors.ProgrammerError("Request parameter %s is missing" %
14323 val = request[keyname]
14324 if not keytype(val):
14325 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14326 " validation, value %s, expected"
14327 " type %s" % (keyname, val, keytype))
14328 self.in_data["request"] = request
14330 self.in_text = serializer.Dump(self.in_data)
14332 _STRING_LIST = ht.TListOf(ht.TString)
14333 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14334 # pylint: disable=E1101
14335 # Class '...' has no 'OP_ID' member
14336 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14337 opcodes.OpInstanceMigrate.OP_ID,
14338 opcodes.OpInstanceReplaceDisks.OP_ID])
14342 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14343 ht.TItems([ht.TNonEmptyString,
14344 ht.TNonEmptyString,
14345 ht.TListOf(ht.TNonEmptyString),
14348 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14349 ht.TItems([ht.TNonEmptyString,
14352 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14353 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14356 constants.IALLOCATOR_MODE_ALLOC:
14359 ("name", ht.TString),
14360 ("memory", ht.TInt),
14361 ("disks", ht.TListOf(ht.TDict)),
14362 ("disk_template", ht.TString),
14363 ("os", ht.TString),
14364 ("tags", _STRING_LIST),
14365 ("nics", ht.TListOf(ht.TDict)),
14366 ("vcpus", ht.TInt),
14367 ("hypervisor", ht.TString),
14369 constants.IALLOCATOR_MODE_RELOC:
14370 (_AddRelocateInstance,
14371 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14373 constants.IALLOCATOR_MODE_NODE_EVAC:
14374 (_AddNodeEvacuate, [
14375 ("instances", _STRING_LIST),
14376 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14378 constants.IALLOCATOR_MODE_CHG_GROUP:
14379 (_AddChangeGroup, [
14380 ("instances", _STRING_LIST),
14381 ("target_groups", _STRING_LIST),
14385 def Run(self, name, validate=True, call_fn=None):
14386 """Run an instance allocator and return the results.
14389 if call_fn is None:
14390 call_fn = self.rpc.call_iallocator_runner
14392 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14393 result.Raise("Failure while running the iallocator script")
14395 self.out_text = result.payload
14397 self._ValidateResult()
14399 def _ValidateResult(self):
14400 """Process the allocator results.
14402 This will process and if successful save the result in
14403 self.out_data and the other parameters.
14407 rdict = serializer.Load(self.out_text)
14408 except Exception, err:
14409 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14411 if not isinstance(rdict, dict):
14412 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14414 # TODO: remove backwards compatiblity in later versions
14415 if "nodes" in rdict and "result" not in rdict:
14416 rdict["result"] = rdict["nodes"]
14419 for key in "success", "info", "result":
14420 if key not in rdict:
14421 raise errors.OpExecError("Can't parse iallocator results:"
14422 " missing key '%s'" % key)
14423 setattr(self, key, rdict[key])
14425 if not self._result_check(self.result):
14426 raise errors.OpExecError("Iallocator returned invalid result,"
14427 " expected %s, got %s" %
14428 (self._result_check, self.result),
14429 errors.ECODE_INVAL)
14431 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14432 assert self.relocate_from is not None
14433 assert self.required_nodes == 1
14435 node2group = dict((name, ndata["group"])
14436 for (name, ndata) in self.in_data["nodes"].items())
14438 fn = compat.partial(self._NodesToGroups, node2group,
14439 self.in_data["nodegroups"])
14441 instance = self.cfg.GetInstanceInfo(self.name)
14442 request_groups = fn(self.relocate_from + [instance.primary_node])
14443 result_groups = fn(rdict["result"] + [instance.primary_node])
14445 if self.success and not set(result_groups).issubset(request_groups):
14446 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14447 " differ from original groups (%s)" %
14448 (utils.CommaJoin(result_groups),
14449 utils.CommaJoin(request_groups)))
14451 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14452 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14454 self.out_data = rdict
14457 def _NodesToGroups(node2group, groups, nodes):
14458 """Returns a list of unique group names for a list of nodes.
14460 @type node2group: dict
14461 @param node2group: Map from node name to group UUID
14463 @param groups: Group information
14465 @param nodes: Node names
14472 group_uuid = node2group[node]
14474 # Ignore unknown node
14478 group = groups[group_uuid]
14480 # Can't find group, let's use UUID
14481 group_name = group_uuid
14483 group_name = group["name"]
14485 result.add(group_name)
14487 return sorted(result)
14490 class LUTestAllocator(NoHooksLU):
14491 """Run allocator tests.
14493 This LU runs the allocator tests
14496 def CheckPrereq(self):
14497 """Check prerequisites.
14499 This checks the opcode parameters depending on the director and mode test.
14502 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14503 for attr in ["memory", "disks", "disk_template",
14504 "os", "tags", "nics", "vcpus"]:
14505 if not hasattr(self.op, attr):
14506 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14507 attr, errors.ECODE_INVAL)
14508 iname = self.cfg.ExpandInstanceName(self.op.name)
14509 if iname is not None:
14510 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14511 iname, errors.ECODE_EXISTS)
14512 if not isinstance(self.op.nics, list):
14513 raise errors.OpPrereqError("Invalid parameter 'nics'",
14514 errors.ECODE_INVAL)
14515 if not isinstance(self.op.disks, list):
14516 raise errors.OpPrereqError("Invalid parameter 'disks'",
14517 errors.ECODE_INVAL)
14518 for row in self.op.disks:
14519 if (not isinstance(row, dict) or
14520 constants.IDISK_SIZE not in row or
14521 not isinstance(row[constants.IDISK_SIZE], int) or
14522 constants.IDISK_MODE not in row or
14523 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14524 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14525 " parameter", errors.ECODE_INVAL)
14526 if self.op.hypervisor is None:
14527 self.op.hypervisor = self.cfg.GetHypervisorType()
14528 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14529 fname = _ExpandInstanceName(self.cfg, self.op.name)
14530 self.op.name = fname
14531 self.relocate_from = \
14532 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14533 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14534 constants.IALLOCATOR_MODE_NODE_EVAC):
14535 if not self.op.instances:
14536 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14537 self.op.instances = _GetWantedInstances(self, self.op.instances)
14539 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14540 self.op.mode, errors.ECODE_INVAL)
14542 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14543 if self.op.allocator is None:
14544 raise errors.OpPrereqError("Missing allocator name",
14545 errors.ECODE_INVAL)
14546 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14547 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14548 self.op.direction, errors.ECODE_INVAL)
14550 def Exec(self, feedback_fn):
14551 """Run the allocator test.
14554 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14555 ial = IAllocator(self.cfg, self.rpc,
14558 memory=self.op.memory,
14559 disks=self.op.disks,
14560 disk_template=self.op.disk_template,
14564 vcpus=self.op.vcpus,
14565 hypervisor=self.op.hypervisor,
14567 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14568 ial = IAllocator(self.cfg, self.rpc,
14571 relocate_from=list(self.relocate_from),
14573 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14574 ial = IAllocator(self.cfg, self.rpc,
14576 instances=self.op.instances,
14577 target_groups=self.op.target_groups)
14578 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14579 ial = IAllocator(self.cfg, self.rpc,
14581 instances=self.op.instances,
14582 evac_mode=self.op.evac_mode)
14584 raise errors.ProgrammerError("Uncatched mode %s in"
14585 " LUTestAllocator.Exec", self.op.mode)
14587 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14588 result = ial.in_text
14590 ial.Run(self.op.allocator, validate=False)
14591 result = ial.out_text
14595 #: Query type implementations
14597 constants.QR_INSTANCE: _InstanceQuery,
14598 constants.QR_NODE: _NodeQuery,
14599 constants.QR_GROUP: _GroupQuery,
14600 constants.QR_OS: _OsQuery,
14603 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14606 def _GetQueryImplementation(name):
14607 """Returns the implemtnation for a query type.
14609 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14613 return _QUERY_IMPL[name]
14615 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14616 errors.ECODE_INVAL)