4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1040 nic_count, disk_sizes,
1041 _check_spec_fn=_CheckMinMaxSpecs):
1042 """Verifies ipolicy against provided specs.
1045 @param ipolicy: The ipolicy
1047 @param mem_size: The memory size
1048 @type cpu_count: int
1049 @param cpu_count: Used cpu cores
1050 @type disk_count: int
1051 @param disk_count: Number of disks used
1052 @type nic_count: int
1053 @param nic_count: Number of nics used
1054 @type disk_sizes: list of ints
1055 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1056 @param _check_spec_fn: The checking function (unittest only)
1057 @return: A list of violations, or an empty list of no violations are found
1060 assert disk_count == len(disk_sizes)
1063 (constants.ISPEC_MEM_SIZE, mem_size),
1064 (constants.ISPEC_CPU_COUNT, cpu_count),
1065 (constants.ISPEC_DISK_COUNT, disk_count),
1066 (constants.ISPEC_NIC_COUNT, nic_count),
1067 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1070 (_check_spec_fn(name, ipolicy, value)
1071 for (name, value) in test_settings))
1074 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1075 _compute_fn=_ComputeIPolicySpecViolation):
1076 """Compute if instance meets the specs of ipolicy.
1079 @param ipolicy: The ipolicy to verify against
1080 @type instance: L{objects.Instance}
1081 @param instance: The instance to verify
1082 @param _compute_fn: The function to verify ipolicy (unittest only)
1083 @see: L{_ComputeIPolicySpecViolation}
1086 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1087 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1088 disk_count = len(instance.disks)
1089 disk_sizes = [disk.size for disk in instance.disks]
1090 nic_count = len(instance.nics)
1092 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1096 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1097 _compute_fn=_ComputeIPolicySpecViolation):
1098 """Compute if instance specs meets the specs of ipolicy.
1101 @param ipolicy: The ipolicy to verify against
1102 @param instance_spec: dict
1103 @param instance_spec: The instance spec to verify
1104 @param _compute_fn: The function to verify ipolicy (unittest only)
1105 @see: L{_ComputeIPolicySpecViolation}
1108 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1109 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1110 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1111 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1112 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1114 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1118 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1120 _compute_fn=_ComputeIPolicyInstanceViolation):
1121 """Compute if instance meets the specs of the new target group.
1123 @param ipolicy: The ipolicy to verify
1124 @param instance: The instance object to verify
1125 @param current_group: The current group of the instance
1126 @param target_group: The new group of the instance
1127 @param _compute_fn: The function to verify ipolicy (unittest only)
1128 @see: L{_ComputeIPolicySpecViolation}
1131 if current_group == target_group:
1134 return _compute_fn(ipolicy, instance)
1137 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1138 _compute_fn=_ComputeIPolicyNodeViolation):
1139 """Checks that the target node is correct in terms of instance policy.
1141 @param ipolicy: The ipolicy to verify
1142 @param instance: The instance object to verify
1143 @param node: The new node to relocate
1144 @param ignore: Ignore violations of the ipolicy
1145 @param _compute_fn: The function to verify ipolicy (unittest only)
1146 @see: L{_ComputeIPolicySpecViolation}
1149 res = _compute_fn(ipolicy, instance, instance.primary_node.group, node.group)
1152 msg = ("Instance does not meet target node group's (%s) instance"
1153 " policy: %s") % (node.group, utils.CommaJoin(res))
1157 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1160 def _ExpandItemName(fn, name, kind):
1161 """Expand an item name.
1163 @param fn: the function to use for expansion
1164 @param name: requested item name
1165 @param kind: text description ('Node' or 'Instance')
1166 @return: the resolved (full) name
1167 @raise errors.OpPrereqError: if the item is not found
1170 full_name = fn(name)
1171 if full_name is None:
1172 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1177 def _ExpandNodeName(cfg, name):
1178 """Wrapper over L{_ExpandItemName} for nodes."""
1179 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1182 def _ExpandInstanceName(cfg, name):
1183 """Wrapper over L{_ExpandItemName} for instance."""
1184 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1187 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1188 minmem, maxmem, vcpus, nics, disk_template, disks,
1189 bep, hvp, hypervisor_name, tags):
1190 """Builds instance related env variables for hooks
1192 This builds the hook environment from individual variables.
1195 @param name: the name of the instance
1196 @type primary_node: string
1197 @param primary_node: the name of the instance's primary node
1198 @type secondary_nodes: list
1199 @param secondary_nodes: list of secondary nodes as strings
1200 @type os_type: string
1201 @param os_type: the name of the instance's OS
1202 @type status: string
1203 @param status: the desired status of the instance
1204 @type minmem: string
1205 @param minmem: the minimum memory size of the instance
1206 @type maxmem: string
1207 @param maxmem: the maximum memory size of the instance
1209 @param vcpus: the count of VCPUs the instance has
1211 @param nics: list of tuples (ip, mac, mode, link) representing
1212 the NICs the instance has
1213 @type disk_template: string
1214 @param disk_template: the disk template of the instance
1216 @param disks: the list of (size, mode) pairs
1218 @param bep: the backend parameters for the instance
1220 @param hvp: the hypervisor parameters for the instance
1221 @type hypervisor_name: string
1222 @param hypervisor_name: the hypervisor for the instance
1224 @param tags: list of instance tags as strings
1226 @return: the hook environment for this instance
1231 "INSTANCE_NAME": name,
1232 "INSTANCE_PRIMARY": primary_node,
1233 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1234 "INSTANCE_OS_TYPE": os_type,
1235 "INSTANCE_STATUS": status,
1236 "INSTANCE_MINMEM": minmem,
1237 "INSTANCE_MAXMEM": maxmem,
1238 # TODO(2.7) remove deprecated "memory" value
1239 "INSTANCE_MEMORY": maxmem,
1240 "INSTANCE_VCPUS": vcpus,
1241 "INSTANCE_DISK_TEMPLATE": disk_template,
1242 "INSTANCE_HYPERVISOR": hypervisor_name,
1245 nic_count = len(nics)
1246 for idx, (ip, mac, mode, link) in enumerate(nics):
1249 env["INSTANCE_NIC%d_IP" % idx] = ip
1250 env["INSTANCE_NIC%d_MAC" % idx] = mac
1251 env["INSTANCE_NIC%d_MODE" % idx] = mode
1252 env["INSTANCE_NIC%d_LINK" % idx] = link
1253 if mode == constants.NIC_MODE_BRIDGED:
1254 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1258 env["INSTANCE_NIC_COUNT"] = nic_count
1261 disk_count = len(disks)
1262 for idx, (size, mode) in enumerate(disks):
1263 env["INSTANCE_DISK%d_SIZE" % idx] = size
1264 env["INSTANCE_DISK%d_MODE" % idx] = mode
1268 env["INSTANCE_DISK_COUNT"] = disk_count
1273 env["INSTANCE_TAGS"] = " ".join(tags)
1275 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1276 for key, value in source.items():
1277 env["INSTANCE_%s_%s" % (kind, key)] = value
1282 def _NICListToTuple(lu, nics):
1283 """Build a list of nic information tuples.
1285 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1286 value in LUInstanceQueryData.
1288 @type lu: L{LogicalUnit}
1289 @param lu: the logical unit on whose behalf we execute
1290 @type nics: list of L{objects.NIC}
1291 @param nics: list of nics to convert to hooks tuples
1295 cluster = lu.cfg.GetClusterInfo()
1299 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1300 mode = filled_params[constants.NIC_MODE]
1301 link = filled_params[constants.NIC_LINK]
1302 hooks_nics.append((ip, mac, mode, link))
1306 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1307 """Builds instance related env variables for hooks from an object.
1309 @type lu: L{LogicalUnit}
1310 @param lu: the logical unit on whose behalf we execute
1311 @type instance: L{objects.Instance}
1312 @param instance: the instance for which we should build the
1314 @type override: dict
1315 @param override: dictionary with key/values that will override
1318 @return: the hook environment dictionary
1321 cluster = lu.cfg.GetClusterInfo()
1322 bep = cluster.FillBE(instance)
1323 hvp = cluster.FillHV(instance)
1325 "name": instance.name,
1326 "primary_node": instance.primary_node,
1327 "secondary_nodes": instance.secondary_nodes,
1328 "os_type": instance.os,
1329 "status": instance.admin_state,
1330 "maxmem": bep[constants.BE_MAXMEM],
1331 "minmem": bep[constants.BE_MINMEM],
1332 "vcpus": bep[constants.BE_VCPUS],
1333 "nics": _NICListToTuple(lu, instance.nics),
1334 "disk_template": instance.disk_template,
1335 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1338 "hypervisor_name": instance.hypervisor,
1339 "tags": instance.tags,
1342 args.update(override)
1343 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1346 def _AdjustCandidatePool(lu, exceptions):
1347 """Adjust the candidate pool after node operations.
1350 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1352 lu.LogInfo("Promoted nodes to master candidate role: %s",
1353 utils.CommaJoin(node.name for node in mod_list))
1354 for name in mod_list:
1355 lu.context.ReaddNode(name)
1356 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1358 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1362 def _DecideSelfPromotion(lu, exceptions=None):
1363 """Decide whether I should promote myself as a master candidate.
1366 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1367 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1368 # the new node will increase mc_max with one, so:
1369 mc_should = min(mc_should + 1, cp_size)
1370 return mc_now < mc_should
1373 def _CalculateGroupIPolicy(cluster, group):
1374 """Calculate instance policy for group.
1377 return cluster.SimpleFillIPolicy(group.ipolicy)
1380 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1381 """Check that the brigdes needed by a list of nics exist.
1384 cluster = lu.cfg.GetClusterInfo()
1385 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1386 brlist = [params[constants.NIC_LINK] for params in paramslist
1387 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1389 result = lu.rpc.call_bridges_exist(target_node, brlist)
1390 result.Raise("Error checking bridges on destination node '%s'" %
1391 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1394 def _CheckInstanceBridgesExist(lu, instance, node=None):
1395 """Check that the brigdes needed by an instance exist.
1399 node = instance.primary_node
1400 _CheckNicsBridgesExist(lu, instance.nics, node)
1403 def _CheckOSVariant(os_obj, name):
1404 """Check whether an OS name conforms to the os variants specification.
1406 @type os_obj: L{objects.OS}
1407 @param os_obj: OS object to check
1409 @param name: OS name passed by the user, to check for validity
1412 variant = objects.OS.GetVariant(name)
1413 if not os_obj.supported_variants:
1415 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1416 " passed)" % (os_obj.name, variant),
1420 raise errors.OpPrereqError("OS name must include a variant",
1423 if variant not in os_obj.supported_variants:
1424 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1427 def _GetNodeInstancesInner(cfg, fn):
1428 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1431 def _GetNodeInstances(cfg, node_name):
1432 """Returns a list of all primary and secondary instances on a node.
1436 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1439 def _GetNodePrimaryInstances(cfg, node_name):
1440 """Returns primary instances on a node.
1443 return _GetNodeInstancesInner(cfg,
1444 lambda inst: node_name == inst.primary_node)
1447 def _GetNodeSecondaryInstances(cfg, node_name):
1448 """Returns secondary instances on a node.
1451 return _GetNodeInstancesInner(cfg,
1452 lambda inst: node_name in inst.secondary_nodes)
1455 def _GetStorageTypeArgs(cfg, storage_type):
1456 """Returns the arguments for a storage type.
1459 # Special case for file storage
1460 if storage_type == constants.ST_FILE:
1461 # storage.FileStorage wants a list of storage directories
1462 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1467 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1470 for dev in instance.disks:
1471 cfg.SetDiskID(dev, node_name)
1473 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1474 result.Raise("Failed to get disk status from node %s" % node_name,
1475 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1477 for idx, bdev_status in enumerate(result.payload):
1478 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1484 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1485 """Check the sanity of iallocator and node arguments and use the
1486 cluster-wide iallocator if appropriate.
1488 Check that at most one of (iallocator, node) is specified. If none is
1489 specified, then the LU's opcode's iallocator slot is filled with the
1490 cluster-wide default iallocator.
1492 @type iallocator_slot: string
1493 @param iallocator_slot: the name of the opcode iallocator slot
1494 @type node_slot: string
1495 @param node_slot: the name of the opcode target node slot
1498 node = getattr(lu.op, node_slot, None)
1499 iallocator = getattr(lu.op, iallocator_slot, None)
1501 if node is not None and iallocator is not None:
1502 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1504 elif node is None and iallocator is None:
1505 default_iallocator = lu.cfg.GetDefaultIAllocator()
1506 if default_iallocator:
1507 setattr(lu.op, iallocator_slot, default_iallocator)
1509 raise errors.OpPrereqError("No iallocator or node given and no"
1510 " cluster-wide default iallocator found;"
1511 " please specify either an iallocator or a"
1512 " node, or set a cluster-wide default"
1516 def _GetDefaultIAllocator(cfg, iallocator):
1517 """Decides on which iallocator to use.
1519 @type cfg: L{config.ConfigWriter}
1520 @param cfg: Cluster configuration object
1521 @type iallocator: string or None
1522 @param iallocator: Iallocator specified in opcode
1524 @return: Iallocator name
1528 # Use default iallocator
1529 iallocator = cfg.GetDefaultIAllocator()
1532 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1533 " opcode nor as a cluster-wide default",
1539 class LUClusterPostInit(LogicalUnit):
1540 """Logical unit for running hooks after cluster initialization.
1543 HPATH = "cluster-init"
1544 HTYPE = constants.HTYPE_CLUSTER
1546 def BuildHooksEnv(self):
1551 "OP_TARGET": self.cfg.GetClusterName(),
1554 def BuildHooksNodes(self):
1555 """Build hooks nodes.
1558 return ([], [self.cfg.GetMasterNode()])
1560 def Exec(self, feedback_fn):
1567 class LUClusterDestroy(LogicalUnit):
1568 """Logical unit for destroying the cluster.
1571 HPATH = "cluster-destroy"
1572 HTYPE = constants.HTYPE_CLUSTER
1574 def BuildHooksEnv(self):
1579 "OP_TARGET": self.cfg.GetClusterName(),
1582 def BuildHooksNodes(self):
1583 """Build hooks nodes.
1588 def CheckPrereq(self):
1589 """Check prerequisites.
1591 This checks whether the cluster is empty.
1593 Any errors are signaled by raising errors.OpPrereqError.
1596 master = self.cfg.GetMasterNode()
1598 nodelist = self.cfg.GetNodeList()
1599 if len(nodelist) != 1 or nodelist[0] != master:
1600 raise errors.OpPrereqError("There are still %d node(s) in"
1601 " this cluster." % (len(nodelist) - 1),
1603 instancelist = self.cfg.GetInstanceList()
1605 raise errors.OpPrereqError("There are still %d instance(s) in"
1606 " this cluster." % len(instancelist),
1609 def Exec(self, feedback_fn):
1610 """Destroys the cluster.
1613 master_params = self.cfg.GetMasterNetworkParameters()
1615 # Run post hooks on master node before it's removed
1616 _RunPostHook(self, master_params.name)
1618 ems = self.cfg.GetUseExternalMipScript()
1619 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1622 self.LogWarning("Error disabling the master IP address: %s",
1625 return master_params.name
1628 def _VerifyCertificate(filename):
1629 """Verifies a certificate for L{LUClusterVerifyConfig}.
1631 @type filename: string
1632 @param filename: Path to PEM file
1636 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1637 utils.ReadFile(filename))
1638 except Exception, err: # pylint: disable=W0703
1639 return (LUClusterVerifyConfig.ETYPE_ERROR,
1640 "Failed to load X509 certificate %s: %s" % (filename, err))
1643 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1644 constants.SSL_CERT_EXPIRATION_ERROR)
1647 fnamemsg = "While verifying %s: %s" % (filename, msg)
1652 return (None, fnamemsg)
1653 elif errcode == utils.CERT_WARNING:
1654 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1655 elif errcode == utils.CERT_ERROR:
1656 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1658 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1661 def _GetAllHypervisorParameters(cluster, instances):
1662 """Compute the set of all hypervisor parameters.
1664 @type cluster: L{objects.Cluster}
1665 @param cluster: the cluster object
1666 @param instances: list of L{objects.Instance}
1667 @param instances: additional instances from which to obtain parameters
1668 @rtype: list of (origin, hypervisor, parameters)
1669 @return: a list with all parameters found, indicating the hypervisor they
1670 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1675 for hv_name in cluster.enabled_hypervisors:
1676 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1678 for os_name, os_hvp in cluster.os_hvp.items():
1679 for hv_name, hv_params in os_hvp.items():
1681 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1682 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1684 # TODO: collapse identical parameter values in a single one
1685 for instance in instances:
1686 if instance.hvparams:
1687 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1688 cluster.FillHV(instance)))
1693 class _VerifyErrors(object):
1694 """Mix-in for cluster/group verify LUs.
1696 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1697 self.op and self._feedback_fn to be available.)
1701 ETYPE_FIELD = "code"
1702 ETYPE_ERROR = "ERROR"
1703 ETYPE_WARNING = "WARNING"
1705 def _Error(self, ecode, item, msg, *args, **kwargs):
1706 """Format an error message.
1708 Based on the opcode's error_codes parameter, either format a
1709 parseable error code, or a simpler error string.
1711 This must be called only from Exec and functions called from Exec.
1714 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1715 itype, etxt, _ = ecode
1716 # first complete the msg
1719 # then format the whole message
1720 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1721 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1727 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1728 # and finally report it via the feedback_fn
1729 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1731 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1732 """Log an error message if the passed condition is True.
1736 or self.op.debug_simulate_errors) # pylint: disable=E1101
1738 # If the error code is in the list of ignored errors, demote the error to a
1740 (_, etxt, _) = ecode
1741 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1742 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1745 self._Error(ecode, *args, **kwargs)
1747 # do not mark the operation as failed for WARN cases only
1748 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1749 self.bad = self.bad or cond
1752 class LUClusterVerify(NoHooksLU):
1753 """Submits all jobs necessary to verify the cluster.
1758 def ExpandNames(self):
1759 self.needed_locks = {}
1761 def Exec(self, feedback_fn):
1764 if self.op.group_name:
1765 groups = [self.op.group_name]
1766 depends_fn = lambda: None
1768 groups = self.cfg.GetNodeGroupList()
1770 # Verify global configuration
1772 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1775 # Always depend on global verification
1776 depends_fn = lambda: [(-len(jobs), [])]
1778 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1779 ignore_errors=self.op.ignore_errors,
1780 depends=depends_fn())]
1781 for group in groups)
1783 # Fix up all parameters
1784 for op in itertools.chain(*jobs): # pylint: disable=W0142
1785 op.debug_simulate_errors = self.op.debug_simulate_errors
1786 op.verbose = self.op.verbose
1787 op.error_codes = self.op.error_codes
1789 op.skip_checks = self.op.skip_checks
1790 except AttributeError:
1791 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1793 return ResultWithJobs(jobs)
1796 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1797 """Verifies the cluster config.
1802 def _VerifyHVP(self, hvp_data):
1803 """Verifies locally the syntax of the hypervisor parameters.
1806 for item, hv_name, hv_params in hvp_data:
1807 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1810 hv_class = hypervisor.GetHypervisor(hv_name)
1811 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1812 hv_class.CheckParameterSyntax(hv_params)
1813 except errors.GenericError, err:
1814 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1816 def ExpandNames(self):
1817 # Information can be safely retrieved as the BGL is acquired in exclusive
1819 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1820 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1821 self.all_node_info = self.cfg.GetAllNodesInfo()
1822 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1823 self.needed_locks = {}
1825 def Exec(self, feedback_fn):
1826 """Verify integrity of cluster, performing various test on nodes.
1830 self._feedback_fn = feedback_fn
1832 feedback_fn("* Verifying cluster config")
1834 for msg in self.cfg.VerifyConfig():
1835 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1837 feedback_fn("* Verifying cluster certificate files")
1839 for cert_filename in constants.ALL_CERT_FILES:
1840 (errcode, msg) = _VerifyCertificate(cert_filename)
1841 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1843 feedback_fn("* Verifying hypervisor parameters")
1845 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1846 self.all_inst_info.values()))
1848 feedback_fn("* Verifying all nodes belong to an existing group")
1850 # We do this verification here because, should this bogus circumstance
1851 # occur, it would never be caught by VerifyGroup, which only acts on
1852 # nodes/instances reachable from existing node groups.
1854 dangling_nodes = set(node.name for node in self.all_node_info.values()
1855 if node.group not in self.all_group_info)
1857 dangling_instances = {}
1858 no_node_instances = []
1860 for inst in self.all_inst_info.values():
1861 if inst.primary_node in dangling_nodes:
1862 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1863 elif inst.primary_node not in self.all_node_info:
1864 no_node_instances.append(inst.name)
1869 utils.CommaJoin(dangling_instances.get(node.name,
1871 for node in dangling_nodes]
1873 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1875 "the following nodes (and their instances) belong to a non"
1876 " existing group: %s", utils.CommaJoin(pretty_dangling))
1878 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1880 "the following instances have a non-existing primary-node:"
1881 " %s", utils.CommaJoin(no_node_instances))
1886 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1887 """Verifies the status of a node group.
1890 HPATH = "cluster-verify"
1891 HTYPE = constants.HTYPE_CLUSTER
1894 _HOOKS_INDENT_RE = re.compile("^", re.M)
1896 class NodeImage(object):
1897 """A class representing the logical and physical status of a node.
1900 @ivar name: the node name to which this object refers
1901 @ivar volumes: a structure as returned from
1902 L{ganeti.backend.GetVolumeList} (runtime)
1903 @ivar instances: a list of running instances (runtime)
1904 @ivar pinst: list of configured primary instances (config)
1905 @ivar sinst: list of configured secondary instances (config)
1906 @ivar sbp: dictionary of {primary-node: list of instances} for all
1907 instances for which this node is secondary (config)
1908 @ivar mfree: free memory, as reported by hypervisor (runtime)
1909 @ivar dfree: free disk, as reported by the node (runtime)
1910 @ivar offline: the offline status (config)
1911 @type rpc_fail: boolean
1912 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1913 not whether the individual keys were correct) (runtime)
1914 @type lvm_fail: boolean
1915 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1916 @type hyp_fail: boolean
1917 @ivar hyp_fail: whether the RPC call didn't return the instance list
1918 @type ghost: boolean
1919 @ivar ghost: whether this is a known node or not (config)
1920 @type os_fail: boolean
1921 @ivar os_fail: whether the RPC call didn't return valid OS data
1923 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1924 @type vm_capable: boolean
1925 @ivar vm_capable: whether the node can host instances
1928 def __init__(self, offline=False, name=None, vm_capable=True):
1937 self.offline = offline
1938 self.vm_capable = vm_capable
1939 self.rpc_fail = False
1940 self.lvm_fail = False
1941 self.hyp_fail = False
1943 self.os_fail = False
1946 def ExpandNames(self):
1947 # This raises errors.OpPrereqError on its own:
1948 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1950 # Get instances in node group; this is unsafe and needs verification later
1951 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1953 self.needed_locks = {
1954 locking.LEVEL_INSTANCE: inst_names,
1955 locking.LEVEL_NODEGROUP: [self.group_uuid],
1956 locking.LEVEL_NODE: [],
1959 self.share_locks = _ShareAll()
1961 def DeclareLocks(self, level):
1962 if level == locking.LEVEL_NODE:
1963 # Get members of node group; this is unsafe and needs verification later
1964 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1966 all_inst_info = self.cfg.GetAllInstancesInfo()
1968 # In Exec(), we warn about mirrored instances that have primary and
1969 # secondary living in separate node groups. To fully verify that
1970 # volumes for these instances are healthy, we will need to do an
1971 # extra call to their secondaries. We ensure here those nodes will
1973 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1974 # Important: access only the instances whose lock is owned
1975 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1976 nodes.update(all_inst_info[inst].secondary_nodes)
1978 self.needed_locks[locking.LEVEL_NODE] = nodes
1980 def CheckPrereq(self):
1981 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1982 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1984 group_nodes = set(self.group_info.members)
1985 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1988 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1990 unlocked_instances = \
1991 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1994 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1995 utils.CommaJoin(unlocked_nodes))
1997 if unlocked_instances:
1998 raise errors.OpPrereqError("Missing lock for instances: %s" %
1999 utils.CommaJoin(unlocked_instances))
2001 self.all_node_info = self.cfg.GetAllNodesInfo()
2002 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2004 self.my_node_names = utils.NiceSort(group_nodes)
2005 self.my_inst_names = utils.NiceSort(group_instances)
2007 self.my_node_info = dict((name, self.all_node_info[name])
2008 for name in self.my_node_names)
2010 self.my_inst_info = dict((name, self.all_inst_info[name])
2011 for name in self.my_inst_names)
2013 # We detect here the nodes that will need the extra RPC calls for verifying
2014 # split LV volumes; they should be locked.
2015 extra_lv_nodes = set()
2017 for inst in self.my_inst_info.values():
2018 if inst.disk_template in constants.DTS_INT_MIRROR:
2019 group = self.my_node_info[inst.primary_node].group
2020 for nname in inst.secondary_nodes:
2021 if self.all_node_info[nname].group != group:
2022 extra_lv_nodes.add(nname)
2024 unlocked_lv_nodes = \
2025 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2027 if unlocked_lv_nodes:
2028 raise errors.OpPrereqError("these nodes could be locked: %s" %
2029 utils.CommaJoin(unlocked_lv_nodes))
2030 self.extra_lv_nodes = list(extra_lv_nodes)
2032 def _VerifyNode(self, ninfo, nresult):
2033 """Perform some basic validation on data returned from a node.
2035 - check the result data structure is well formed and has all the
2037 - check ganeti version
2039 @type ninfo: L{objects.Node}
2040 @param ninfo: the node to check
2041 @param nresult: the results from the node
2043 @return: whether overall this call was successful (and we can expect
2044 reasonable values in the respose)
2048 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2050 # main result, nresult should be a non-empty dict
2051 test = not nresult or not isinstance(nresult, dict)
2052 _ErrorIf(test, constants.CV_ENODERPC, node,
2053 "unable to verify node: no data returned")
2057 # compares ganeti version
2058 local_version = constants.PROTOCOL_VERSION
2059 remote_version = nresult.get("version", None)
2060 test = not (remote_version and
2061 isinstance(remote_version, (list, tuple)) and
2062 len(remote_version) == 2)
2063 _ErrorIf(test, constants.CV_ENODERPC, node,
2064 "connection to node returned invalid data")
2068 test = local_version != remote_version[0]
2069 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2070 "incompatible protocol versions: master %s,"
2071 " node %s", local_version, remote_version[0])
2075 # node seems compatible, we can actually try to look into its results
2077 # full package version
2078 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2079 constants.CV_ENODEVERSION, node,
2080 "software version mismatch: master %s, node %s",
2081 constants.RELEASE_VERSION, remote_version[1],
2082 code=self.ETYPE_WARNING)
2084 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2085 if ninfo.vm_capable and isinstance(hyp_result, dict):
2086 for hv_name, hv_result in hyp_result.iteritems():
2087 test = hv_result is not None
2088 _ErrorIf(test, constants.CV_ENODEHV, node,
2089 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2091 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2092 if ninfo.vm_capable and isinstance(hvp_result, list):
2093 for item, hv_name, hv_result in hvp_result:
2094 _ErrorIf(True, constants.CV_ENODEHV, node,
2095 "hypervisor %s parameter verify failure (source %s): %s",
2096 hv_name, item, hv_result)
2098 test = nresult.get(constants.NV_NODESETUP,
2099 ["Missing NODESETUP results"])
2100 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2105 def _VerifyNodeTime(self, ninfo, nresult,
2106 nvinfo_starttime, nvinfo_endtime):
2107 """Check the node time.
2109 @type ninfo: L{objects.Node}
2110 @param ninfo: the node to check
2111 @param nresult: the remote results for the node
2112 @param nvinfo_starttime: the start time of the RPC call
2113 @param nvinfo_endtime: the end time of the RPC call
2117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119 ntime = nresult.get(constants.NV_TIME, None)
2121 ntime_merged = utils.MergeTime(ntime)
2122 except (ValueError, TypeError):
2123 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2126 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2127 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2128 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2129 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2133 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2134 "Node time diverges by at least %s from master node time",
2137 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2138 """Check the node LVM results.
2140 @type ninfo: L{objects.Node}
2141 @param ninfo: the node to check
2142 @param nresult: the remote results for the node
2143 @param vg_name: the configured VG name
2150 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2152 # checks vg existence and size > 20G
2153 vglist = nresult.get(constants.NV_VGLIST, None)
2155 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2157 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2158 constants.MIN_VG_SIZE)
2159 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2162 pvlist = nresult.get(constants.NV_PVLIST, None)
2163 test = pvlist is None
2164 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2166 # check that ':' is not present in PV names, since it's a
2167 # special character for lvcreate (denotes the range of PEs to
2169 for _, pvname, owner_vg in pvlist:
2170 test = ":" in pvname
2171 _ErrorIf(test, constants.CV_ENODELVM, node,
2172 "Invalid character ':' in PV '%s' of VG '%s'",
2175 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2176 """Check the node bridges.
2178 @type ninfo: L{objects.Node}
2179 @param ninfo: the node to check
2180 @param nresult: the remote results for the node
2181 @param bridges: the expected list of bridges
2188 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2190 missing = nresult.get(constants.NV_BRIDGES, None)
2191 test = not isinstance(missing, list)
2192 _ErrorIf(test, constants.CV_ENODENET, node,
2193 "did not return valid bridge information")
2195 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2196 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2198 def _VerifyNodeUserScripts(self, ninfo, nresult):
2199 """Check the results of user scripts presence and executability on the node
2201 @type ninfo: L{objects.Node}
2202 @param ninfo: the node to check
2203 @param nresult: the remote results for the node
2208 test = not constants.NV_USERSCRIPTS in nresult
2209 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2210 "did not return user scripts information")
2212 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2214 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2215 "user scripts not present or not executable: %s" %
2216 utils.CommaJoin(sorted(broken_scripts)))
2218 def _VerifyNodeNetwork(self, ninfo, nresult):
2219 """Check the node network connectivity results.
2221 @type ninfo: L{objects.Node}
2222 @param ninfo: the node to check
2223 @param nresult: the remote results for the node
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 test = constants.NV_NODELIST not in nresult
2230 _ErrorIf(test, constants.CV_ENODESSH, node,
2231 "node hasn't returned node ssh connectivity data")
2233 if nresult[constants.NV_NODELIST]:
2234 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2235 _ErrorIf(True, constants.CV_ENODESSH, node,
2236 "ssh communication with node '%s': %s", a_node, a_msg)
2238 test = constants.NV_NODENETTEST not in nresult
2239 _ErrorIf(test, constants.CV_ENODENET, node,
2240 "node hasn't returned node tcp connectivity data")
2242 if nresult[constants.NV_NODENETTEST]:
2243 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2245 _ErrorIf(True, constants.CV_ENODENET, node,
2246 "tcp communication with node '%s': %s",
2247 anode, nresult[constants.NV_NODENETTEST][anode])
2249 test = constants.NV_MASTERIP not in nresult
2250 _ErrorIf(test, constants.CV_ENODENET, node,
2251 "node hasn't returned node master IP reachability data")
2253 if not nresult[constants.NV_MASTERIP]:
2254 if node == self.master_node:
2255 msg = "the master node cannot reach the master IP (not configured?)"
2257 msg = "cannot reach the master IP"
2258 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2260 def _VerifyInstancePolicy(self, instance):
2261 """Verify instance specs against instance policy set on node group level.
2265 cluster = self.cfg.GetClusterInfo()
2266 full_beparams = cluster.FillBE(instance)
2267 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2269 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2270 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2271 disk_count = len(instance.disks)
2272 disk_sizes = [disk.size for disk in instance.disks]
2273 nic_count = len(instance.nics)
2276 (constants.ISPEC_MEM_SIZE, mem_size),
2277 (constants.ISPEC_CPU_COUNT, cpu_count),
2278 (constants.ISPEC_DISK_COUNT, disk_count),
2279 (constants.ISPEC_NIC_COUNT, nic_count),
2280 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2282 for (name, value) in test_settings:
2283 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2284 self._ErrorIf(test_result is not None,
2285 constants.CV_EINSTANCEPOLICY, instance.name,
2288 def _VerifyInstance(self, instance, instanceconfig, node_image,
2290 """Verify an instance.
2292 This function checks to see if the required block devices are
2293 available on the instance's node.
2296 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2297 node_current = instanceconfig.primary_node
2299 node_vol_should = {}
2300 instanceconfig.MapLVsByNode(node_vol_should)
2302 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2303 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2304 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2306 for node in node_vol_should:
2307 n_img = node_image[node]
2308 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2309 # ignore missing volumes on offline or broken nodes
2311 for volume in node_vol_should[node]:
2312 test = volume not in n_img.volumes
2313 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2314 "volume %s missing on node %s", volume, node)
2316 if instanceconfig.admin_state == constants.ADMINST_UP:
2317 pri_img = node_image[node_current]
2318 test = instance not in pri_img.instances and not pri_img.offline
2319 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2320 "instance not running on its primary node %s",
2323 diskdata = [(nname, success, status, idx)
2324 for (nname, disks) in diskstatus.items()
2325 for idx, (success, status) in enumerate(disks)]
2327 for nname, success, bdev_status, idx in diskdata:
2328 # the 'ghost node' construction in Exec() ensures that we have a
2330 snode = node_image[nname]
2331 bad_snode = snode.ghost or snode.offline
2332 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2333 not success and not bad_snode,
2334 constants.CV_EINSTANCEFAULTYDISK, instance,
2335 "couldn't retrieve status for disk/%s on %s: %s",
2336 idx, nname, bdev_status)
2337 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2338 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2339 constants.CV_EINSTANCEFAULTYDISK, instance,
2340 "disk/%s on %s is faulty", idx, nname)
2342 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2343 """Verify if there are any unknown volumes in the cluster.
2345 The .os, .swap and backup volumes are ignored. All other volumes are
2346 reported as unknown.
2348 @type reserved: L{ganeti.utils.FieldSet}
2349 @param reserved: a FieldSet of reserved volume names
2352 for node, n_img in node_image.items():
2353 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2354 # skip non-healthy nodes
2356 for volume in n_img.volumes:
2357 test = ((node not in node_vol_should or
2358 volume not in node_vol_should[node]) and
2359 not reserved.Matches(volume))
2360 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2361 "volume %s is unknown", volume)
2363 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2364 """Verify N+1 Memory Resilience.
2366 Check that if one single node dies we can still start all the
2367 instances it was primary for.
2370 cluster_info = self.cfg.GetClusterInfo()
2371 for node, n_img in node_image.items():
2372 # This code checks that every node which is now listed as
2373 # secondary has enough memory to host all instances it is
2374 # supposed to should a single other node in the cluster fail.
2375 # FIXME: not ready for failover to an arbitrary node
2376 # FIXME: does not support file-backed instances
2377 # WARNING: we currently take into account down instances as well
2378 # as up ones, considering that even if they're down someone
2379 # might want to start them even in the event of a node failure.
2381 # we're skipping offline nodes from the N+1 warning, since
2382 # most likely we don't have good memory infromation from them;
2383 # we already list instances living on such nodes, and that's
2386 #TODO(dynmem): use MINMEM for checking
2387 #TODO(dynmem): also consider ballooning out other instances
2388 for prinode, instances in n_img.sbp.items():
2390 for instance in instances:
2391 bep = cluster_info.FillBE(instance_cfg[instance])
2392 if bep[constants.BE_AUTO_BALANCE]:
2393 needed_mem += bep[constants.BE_MAXMEM]
2394 test = n_img.mfree < needed_mem
2395 self._ErrorIf(test, constants.CV_ENODEN1, node,
2396 "not enough memory to accomodate instance failovers"
2397 " should node %s fail (%dMiB needed, %dMiB available)",
2398 prinode, needed_mem, n_img.mfree)
2401 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2402 (files_all, files_opt, files_mc, files_vm)):
2403 """Verifies file checksums collected from all nodes.
2405 @param errorif: Callback for reporting errors
2406 @param nodeinfo: List of L{objects.Node} objects
2407 @param master_node: Name of master node
2408 @param all_nvinfo: RPC results
2411 # Define functions determining which nodes to consider for a file
2414 (files_mc, lambda node: (node.master_candidate or
2415 node.name == master_node)),
2416 (files_vm, lambda node: node.vm_capable),
2419 # Build mapping from filename to list of nodes which should have the file
2421 for (files, fn) in files2nodefn:
2423 filenodes = nodeinfo
2425 filenodes = filter(fn, nodeinfo)
2426 nodefiles.update((filename,
2427 frozenset(map(operator.attrgetter("name"), filenodes)))
2428 for filename in files)
2430 assert set(nodefiles) == (files_all | files_mc | files_vm)
2432 fileinfo = dict((filename, {}) for filename in nodefiles)
2433 ignore_nodes = set()
2435 for node in nodeinfo:
2437 ignore_nodes.add(node.name)
2440 nresult = all_nvinfo[node.name]
2442 if nresult.fail_msg or not nresult.payload:
2445 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2447 test = not (node_files and isinstance(node_files, dict))
2448 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2449 "Node did not return file checksum data")
2451 ignore_nodes.add(node.name)
2454 # Build per-checksum mapping from filename to nodes having it
2455 for (filename, checksum) in node_files.items():
2456 assert filename in nodefiles
2457 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2459 for (filename, checksums) in fileinfo.items():
2460 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2462 # Nodes having the file
2463 with_file = frozenset(node_name
2464 for nodes in fileinfo[filename].values()
2465 for node_name in nodes) - ignore_nodes
2467 expected_nodes = nodefiles[filename] - ignore_nodes
2469 # Nodes missing file
2470 missing_file = expected_nodes - with_file
2472 if filename in files_opt:
2474 errorif(missing_file and missing_file != expected_nodes,
2475 constants.CV_ECLUSTERFILECHECK, None,
2476 "File %s is optional, but it must exist on all or no"
2477 " nodes (not found on %s)",
2478 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2480 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2481 "File %s is missing from node(s) %s", filename,
2482 utils.CommaJoin(utils.NiceSort(missing_file)))
2484 # Warn if a node has a file it shouldn't
2485 unexpected = with_file - expected_nodes
2487 constants.CV_ECLUSTERFILECHECK, None,
2488 "File %s should not exist on node(s) %s",
2489 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2491 # See if there are multiple versions of the file
2492 test = len(checksums) > 1
2494 variants = ["variant %s on %s" %
2495 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2496 for (idx, (checksum, nodes)) in
2497 enumerate(sorted(checksums.items()))]
2501 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2502 "File %s found with %s different checksums (%s)",
2503 filename, len(checksums), "; ".join(variants))
2505 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2507 """Verifies and the node DRBD status.
2509 @type ninfo: L{objects.Node}
2510 @param ninfo: the node to check
2511 @param nresult: the remote results for the node
2512 @param instanceinfo: the dict of instances
2513 @param drbd_helper: the configured DRBD usermode helper
2514 @param drbd_map: the DRBD map as returned by
2515 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2519 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2522 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2523 test = (helper_result == None)
2524 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2525 "no drbd usermode helper returned")
2527 status, payload = helper_result
2529 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2530 "drbd usermode helper check unsuccessful: %s", payload)
2531 test = status and (payload != drbd_helper)
2532 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2533 "wrong drbd usermode helper: %s", payload)
2535 # compute the DRBD minors
2537 for minor, instance in drbd_map[node].items():
2538 test = instance not in instanceinfo
2539 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2540 "ghost instance '%s' in temporary DRBD map", instance)
2541 # ghost instance should not be running, but otherwise we
2542 # don't give double warnings (both ghost instance and
2543 # unallocated minor in use)
2545 node_drbd[minor] = (instance, False)
2547 instance = instanceinfo[instance]
2548 node_drbd[minor] = (instance.name,
2549 instance.admin_state == constants.ADMINST_UP)
2551 # and now check them
2552 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2553 test = not isinstance(used_minors, (tuple, list))
2554 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2555 "cannot parse drbd status file: %s", str(used_minors))
2557 # we cannot check drbd status
2560 for minor, (iname, must_exist) in node_drbd.items():
2561 test = minor not in used_minors and must_exist
2562 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2563 "drbd minor %d of instance %s is not active", minor, iname)
2564 for minor in used_minors:
2565 test = minor not in node_drbd
2566 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2567 "unallocated drbd minor %d is in use", minor)
2569 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2570 """Builds the node OS structures.
2572 @type ninfo: L{objects.Node}
2573 @param ninfo: the node to check
2574 @param nresult: the remote results for the node
2575 @param nimg: the node image object
2579 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581 remote_os = nresult.get(constants.NV_OSLIST, None)
2582 test = (not isinstance(remote_os, list) or
2583 not compat.all(isinstance(v, list) and len(v) == 7
2584 for v in remote_os))
2586 _ErrorIf(test, constants.CV_ENODEOS, node,
2587 "node hasn't returned valid OS data")
2596 for (name, os_path, status, diagnose,
2597 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2599 if name not in os_dict:
2602 # parameters is a list of lists instead of list of tuples due to
2603 # JSON lacking a real tuple type, fix it:
2604 parameters = [tuple(v) for v in parameters]
2605 os_dict[name].append((os_path, status, diagnose,
2606 set(variants), set(parameters), set(api_ver)))
2608 nimg.oslist = os_dict
2610 def _VerifyNodeOS(self, ninfo, nimg, base):
2611 """Verifies the node OS list.
2613 @type ninfo: L{objects.Node}
2614 @param ninfo: the node to check
2615 @param nimg: the node image object
2616 @param base: the 'template' node we match against (e.g. from the master)
2620 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2622 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2624 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2625 for os_name, os_data in nimg.oslist.items():
2626 assert os_data, "Empty OS status for OS %s?!" % os_name
2627 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2628 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2629 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2630 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2631 "OS '%s' has multiple entries (first one shadows the rest): %s",
2632 os_name, utils.CommaJoin([v[0] for v in os_data]))
2633 # comparisons with the 'base' image
2634 test = os_name not in base.oslist
2635 _ErrorIf(test, constants.CV_ENODEOS, node,
2636 "Extra OS %s not present on reference node (%s)",
2640 assert base.oslist[os_name], "Base node has empty OS status?"
2641 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2643 # base OS is invalid, skipping
2645 for kind, a, b in [("API version", f_api, b_api),
2646 ("variants list", f_var, b_var),
2647 ("parameters", beautify_params(f_param),
2648 beautify_params(b_param))]:
2649 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2650 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2651 kind, os_name, base.name,
2652 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2654 # check any missing OSes
2655 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2656 _ErrorIf(missing, constants.CV_ENODEOS, node,
2657 "OSes present on reference node %s but missing on this node: %s",
2658 base.name, utils.CommaJoin(missing))
2660 def _VerifyOob(self, ninfo, nresult):
2661 """Verifies out of band functionality of a node.
2663 @type ninfo: L{objects.Node}
2664 @param ninfo: the node to check
2665 @param nresult: the remote results for the node
2669 # We just have to verify the paths on master and/or master candidates
2670 # as the oob helper is invoked on the master
2671 if ((ninfo.master_candidate or ninfo.master_capable) and
2672 constants.NV_OOB_PATHS in nresult):
2673 for path_result in nresult[constants.NV_OOB_PATHS]:
2674 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2676 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2677 """Verifies and updates the node volume data.
2679 This function will update a L{NodeImage}'s internal structures
2680 with data from the remote call.
2682 @type ninfo: L{objects.Node}
2683 @param ninfo: the node to check
2684 @param nresult: the remote results for the node
2685 @param nimg: the node image object
2686 @param vg_name: the configured VG name
2690 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2692 nimg.lvm_fail = True
2693 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2696 elif isinstance(lvdata, basestring):
2697 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2698 utils.SafeEncode(lvdata))
2699 elif not isinstance(lvdata, dict):
2700 _ErrorIf(True, constants.CV_ENODELVM, node,
2701 "rpc call to node failed (lvlist)")
2703 nimg.volumes = lvdata
2704 nimg.lvm_fail = False
2706 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2707 """Verifies and updates the node instance list.
2709 If the listing was successful, then updates this node's instance
2710 list. Otherwise, it marks the RPC call as failed for the instance
2713 @type ninfo: L{objects.Node}
2714 @param ninfo: the node to check
2715 @param nresult: the remote results for the node
2716 @param nimg: the node image object
2719 idata = nresult.get(constants.NV_INSTANCELIST, None)
2720 test = not isinstance(idata, list)
2721 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2722 "rpc call to node failed (instancelist): %s",
2723 utils.SafeEncode(str(idata)))
2725 nimg.hyp_fail = True
2727 nimg.instances = idata
2729 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2730 """Verifies and computes a node information map
2732 @type ninfo: L{objects.Node}
2733 @param ninfo: the node to check
2734 @param nresult: the remote results for the node
2735 @param nimg: the node image object
2736 @param vg_name: the configured VG name
2740 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2742 # try to read free memory (from the hypervisor)
2743 hv_info = nresult.get(constants.NV_HVINFO, None)
2744 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2745 _ErrorIf(test, constants.CV_ENODEHV, node,
2746 "rpc call to node failed (hvinfo)")
2749 nimg.mfree = int(hv_info["memory_free"])
2750 except (ValueError, TypeError):
2751 _ErrorIf(True, constants.CV_ENODERPC, node,
2752 "node returned invalid nodeinfo, check hypervisor")
2754 # FIXME: devise a free space model for file based instances as well
2755 if vg_name is not None:
2756 test = (constants.NV_VGLIST not in nresult or
2757 vg_name not in nresult[constants.NV_VGLIST])
2758 _ErrorIf(test, constants.CV_ENODELVM, node,
2759 "node didn't return data for the volume group '%s'"
2760 " - it is either missing or broken", vg_name)
2763 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2764 except (ValueError, TypeError):
2765 _ErrorIf(True, constants.CV_ENODERPC, node,
2766 "node returned invalid LVM info, check LVM status")
2768 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2769 """Gets per-disk status information for all instances.
2771 @type nodelist: list of strings
2772 @param nodelist: Node names
2773 @type node_image: dict of (name, L{objects.Node})
2774 @param node_image: Node objects
2775 @type instanceinfo: dict of (name, L{objects.Instance})
2776 @param instanceinfo: Instance objects
2777 @rtype: {instance: {node: [(succes, payload)]}}
2778 @return: a dictionary of per-instance dictionaries with nodes as
2779 keys and disk information as values; the disk information is a
2780 list of tuples (success, payload)
2783 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2786 node_disks_devonly = {}
2787 diskless_instances = set()
2788 diskless = constants.DT_DISKLESS
2790 for nname in nodelist:
2791 node_instances = list(itertools.chain(node_image[nname].pinst,
2792 node_image[nname].sinst))
2793 diskless_instances.update(inst for inst in node_instances
2794 if instanceinfo[inst].disk_template == diskless)
2795 disks = [(inst, disk)
2796 for inst in node_instances
2797 for disk in instanceinfo[inst].disks]
2800 # No need to collect data
2803 node_disks[nname] = disks
2805 # Creating copies as SetDiskID below will modify the objects and that can
2806 # lead to incorrect data returned from nodes
2807 devonly = [dev.Copy() for (_, dev) in disks]
2810 self.cfg.SetDiskID(dev, nname)
2812 node_disks_devonly[nname] = devonly
2814 assert len(node_disks) == len(node_disks_devonly)
2816 # Collect data from all nodes with disks
2817 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2820 assert len(result) == len(node_disks)
2824 for (nname, nres) in result.items():
2825 disks = node_disks[nname]
2828 # No data from this node
2829 data = len(disks) * [(False, "node offline")]
2832 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2833 "while getting disk information: %s", msg)
2835 # No data from this node
2836 data = len(disks) * [(False, msg)]
2839 for idx, i in enumerate(nres.payload):
2840 if isinstance(i, (tuple, list)) and len(i) == 2:
2843 logging.warning("Invalid result from node %s, entry %d: %s",
2845 data.append((False, "Invalid result from the remote node"))
2847 for ((inst, _), status) in zip(disks, data):
2848 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2850 # Add empty entries for diskless instances.
2851 for inst in diskless_instances:
2852 assert inst not in instdisk
2855 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2856 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2857 compat.all(isinstance(s, (tuple, list)) and
2858 len(s) == 2 for s in statuses)
2859 for inst, nnames in instdisk.items()
2860 for nname, statuses in nnames.items())
2861 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2866 def _SshNodeSelector(group_uuid, all_nodes):
2867 """Create endless iterators for all potential SSH check hosts.
2870 nodes = [node for node in all_nodes
2871 if (node.group != group_uuid and
2873 keyfunc = operator.attrgetter("group")
2875 return map(itertools.cycle,
2876 [sorted(map(operator.attrgetter("name"), names))
2877 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2881 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2882 """Choose which nodes should talk to which other nodes.
2884 We will make nodes contact all nodes in their group, and one node from
2887 @warning: This algorithm has a known issue if one node group is much
2888 smaller than others (e.g. just one node). In such a case all other
2889 nodes will talk to the single node.
2892 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2893 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2895 return (online_nodes,
2896 dict((name, sorted([i.next() for i in sel]))
2897 for name in online_nodes))
2899 def BuildHooksEnv(self):
2902 Cluster-Verify hooks just ran in the post phase and their failure makes
2903 the output be logged in the verify output and the verification to fail.
2907 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2910 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2911 for node in self.my_node_info.values())
2915 def BuildHooksNodes(self):
2916 """Build hooks nodes.
2919 return ([], self.my_node_names)
2921 def Exec(self, feedback_fn):
2922 """Verify integrity of the node group, performing various test on nodes.
2925 # This method has too many local variables. pylint: disable=R0914
2926 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2928 if not self.my_node_names:
2930 feedback_fn("* Empty node group, skipping verification")
2934 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2935 verbose = self.op.verbose
2936 self._feedback_fn = feedback_fn
2938 vg_name = self.cfg.GetVGName()
2939 drbd_helper = self.cfg.GetDRBDHelper()
2940 cluster = self.cfg.GetClusterInfo()
2941 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2942 hypervisors = cluster.enabled_hypervisors
2943 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2945 i_non_redundant = [] # Non redundant instances
2946 i_non_a_balanced = [] # Non auto-balanced instances
2947 i_offline = 0 # Count of offline instances
2948 n_offline = 0 # Count of offline nodes
2949 n_drained = 0 # Count of nodes being drained
2950 node_vol_should = {}
2952 # FIXME: verify OS list
2955 filemap = _ComputeAncillaryFiles(cluster, False)
2957 # do local checksums
2958 master_node = self.master_node = self.cfg.GetMasterNode()
2959 master_ip = self.cfg.GetMasterIP()
2961 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2964 if self.cfg.GetUseExternalMipScript():
2965 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2967 node_verify_param = {
2968 constants.NV_FILELIST:
2969 utils.UniqueSequence(filename
2970 for files in filemap
2971 for filename in files),
2972 constants.NV_NODELIST:
2973 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2974 self.all_node_info.values()),
2975 constants.NV_HYPERVISOR: hypervisors,
2976 constants.NV_HVPARAMS:
2977 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2978 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2979 for node in node_data_list
2980 if not node.offline],
2981 constants.NV_INSTANCELIST: hypervisors,
2982 constants.NV_VERSION: None,
2983 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2984 constants.NV_NODESETUP: None,
2985 constants.NV_TIME: None,
2986 constants.NV_MASTERIP: (master_node, master_ip),
2987 constants.NV_OSLIST: None,
2988 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2989 constants.NV_USERSCRIPTS: user_scripts,
2992 if vg_name is not None:
2993 node_verify_param[constants.NV_VGLIST] = None
2994 node_verify_param[constants.NV_LVLIST] = vg_name
2995 node_verify_param[constants.NV_PVLIST] = [vg_name]
2996 node_verify_param[constants.NV_DRBDLIST] = None
2999 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3002 # FIXME: this needs to be changed per node-group, not cluster-wide
3004 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3005 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3006 bridges.add(default_nicpp[constants.NIC_LINK])
3007 for instance in self.my_inst_info.values():
3008 for nic in instance.nics:
3009 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3010 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3011 bridges.add(full_nic[constants.NIC_LINK])
3014 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3016 # Build our expected cluster state
3017 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3019 vm_capable=node.vm_capable))
3020 for node in node_data_list)
3024 for node in self.all_node_info.values():
3025 path = _SupportsOob(self.cfg, node)
3026 if path and path not in oob_paths:
3027 oob_paths.append(path)
3030 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3032 for instance in self.my_inst_names:
3033 inst_config = self.my_inst_info[instance]
3035 for nname in inst_config.all_nodes:
3036 if nname not in node_image:
3037 gnode = self.NodeImage(name=nname)
3038 gnode.ghost = (nname not in self.all_node_info)
3039 node_image[nname] = gnode
3041 inst_config.MapLVsByNode(node_vol_should)
3043 pnode = inst_config.primary_node
3044 node_image[pnode].pinst.append(instance)
3046 for snode in inst_config.secondary_nodes:
3047 nimg = node_image[snode]
3048 nimg.sinst.append(instance)
3049 if pnode not in nimg.sbp:
3050 nimg.sbp[pnode] = []
3051 nimg.sbp[pnode].append(instance)
3053 # At this point, we have the in-memory data structures complete,
3054 # except for the runtime information, which we'll gather next
3056 # Due to the way our RPC system works, exact response times cannot be
3057 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3058 # time before and after executing the request, we can at least have a time
3060 nvinfo_starttime = time.time()
3061 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3063 self.cfg.GetClusterName())
3064 nvinfo_endtime = time.time()
3066 if self.extra_lv_nodes and vg_name is not None:
3068 self.rpc.call_node_verify(self.extra_lv_nodes,
3069 {constants.NV_LVLIST: vg_name},
3070 self.cfg.GetClusterName())
3072 extra_lv_nvinfo = {}
3074 all_drbd_map = self.cfg.ComputeDRBDMap()
3076 feedback_fn("* Gathering disk information (%s nodes)" %
3077 len(self.my_node_names))
3078 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3081 feedback_fn("* Verifying configuration file consistency")
3083 # If not all nodes are being checked, we need to make sure the master node
3084 # and a non-checked vm_capable node are in the list.
3085 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3087 vf_nvinfo = all_nvinfo.copy()
3088 vf_node_info = list(self.my_node_info.values())
3089 additional_nodes = []
3090 if master_node not in self.my_node_info:
3091 additional_nodes.append(master_node)
3092 vf_node_info.append(self.all_node_info[master_node])
3093 # Add the first vm_capable node we find which is not included
3094 for node in absent_nodes:
3095 nodeinfo = self.all_node_info[node]
3096 if nodeinfo.vm_capable and not nodeinfo.offline:
3097 additional_nodes.append(node)
3098 vf_node_info.append(self.all_node_info[node])
3100 key = constants.NV_FILELIST
3101 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3102 {key: node_verify_param[key]},
3103 self.cfg.GetClusterName()))
3105 vf_nvinfo = all_nvinfo
3106 vf_node_info = self.my_node_info.values()
3108 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3110 feedback_fn("* Verifying node status")
3114 for node_i in node_data_list:
3116 nimg = node_image[node]
3120 feedback_fn("* Skipping offline node %s" % (node,))
3124 if node == master_node:
3126 elif node_i.master_candidate:
3127 ntype = "master candidate"
3128 elif node_i.drained:
3134 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3136 msg = all_nvinfo[node].fail_msg
3137 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3140 nimg.rpc_fail = True
3143 nresult = all_nvinfo[node].payload
3145 nimg.call_ok = self._VerifyNode(node_i, nresult)
3146 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3147 self._VerifyNodeNetwork(node_i, nresult)
3148 self._VerifyNodeUserScripts(node_i, nresult)
3149 self._VerifyOob(node_i, nresult)
3152 self._VerifyNodeLVM(node_i, nresult, vg_name)
3153 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3156 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3157 self._UpdateNodeInstances(node_i, nresult, nimg)
3158 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3159 self._UpdateNodeOS(node_i, nresult, nimg)
3161 if not nimg.os_fail:
3162 if refos_img is None:
3164 self._VerifyNodeOS(node_i, nimg, refos_img)
3165 self._VerifyNodeBridges(node_i, nresult, bridges)
3167 # Check whether all running instancies are primary for the node. (This
3168 # can no longer be done from _VerifyInstance below, since some of the
3169 # wrong instances could be from other node groups.)
3170 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3172 for inst in non_primary_inst:
3173 # FIXME: investigate best way to handle offline insts
3174 if inst.admin_state == constants.ADMINST_OFFLINE:
3176 feedback_fn("* Skipping offline instance %s" % inst.name)
3179 test = inst in self.all_inst_info
3180 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3181 "instance should not run on node %s", node_i.name)
3182 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3183 "node is running unknown instance %s", inst)
3185 for node, result in extra_lv_nvinfo.items():
3186 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3187 node_image[node], vg_name)
3189 feedback_fn("* Verifying instance status")
3190 for instance in self.my_inst_names:
3192 feedback_fn("* Verifying instance %s" % instance)
3193 inst_config = self.my_inst_info[instance]
3194 self._VerifyInstance(instance, inst_config, node_image,
3196 inst_nodes_offline = []
3198 pnode = inst_config.primary_node
3199 pnode_img = node_image[pnode]
3200 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3201 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3202 " primary node failed", instance)
3204 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3206 constants.CV_EINSTANCEBADNODE, instance,
3207 "instance is marked as running and lives on offline node %s",
3208 inst_config.primary_node)
3210 # If the instance is non-redundant we cannot survive losing its primary
3211 # node, so we are not N+1 compliant. On the other hand we have no disk
3212 # templates with more than one secondary so that situation is not well
3214 # FIXME: does not support file-backed instances
3215 if not inst_config.secondary_nodes:
3216 i_non_redundant.append(instance)
3218 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3219 constants.CV_EINSTANCELAYOUT,
3220 instance, "instance has multiple secondary nodes: %s",
3221 utils.CommaJoin(inst_config.secondary_nodes),
3222 code=self.ETYPE_WARNING)
3224 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3225 pnode = inst_config.primary_node
3226 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3227 instance_groups = {}
3229 for node in instance_nodes:
3230 instance_groups.setdefault(self.all_node_info[node].group,
3234 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3235 # Sort so that we always list the primary node first.
3236 for group, nodes in sorted(instance_groups.items(),
3237 key=lambda (_, nodes): pnode in nodes,
3240 self._ErrorIf(len(instance_groups) > 1,
3241 constants.CV_EINSTANCESPLITGROUPS,
3242 instance, "instance has primary and secondary nodes in"
3243 " different groups: %s", utils.CommaJoin(pretty_list),
3244 code=self.ETYPE_WARNING)
3246 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3247 i_non_a_balanced.append(instance)
3249 for snode in inst_config.secondary_nodes:
3250 s_img = node_image[snode]
3251 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3252 snode, "instance %s, connection to secondary node failed",
3256 inst_nodes_offline.append(snode)
3258 # warn that the instance lives on offline nodes
3259 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3260 "instance has offline secondary node(s) %s",
3261 utils.CommaJoin(inst_nodes_offline))
3262 # ... or ghost/non-vm_capable nodes
3263 for node in inst_config.all_nodes:
3264 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3265 instance, "instance lives on ghost node %s", node)
3266 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3267 instance, "instance lives on non-vm_capable node %s", node)
3269 feedback_fn("* Verifying orphan volumes")
3270 reserved = utils.FieldSet(*cluster.reserved_lvs)
3272 # We will get spurious "unknown volume" warnings if any node of this group
3273 # is secondary for an instance whose primary is in another group. To avoid
3274 # them, we find these instances and add their volumes to node_vol_should.
3275 for inst in self.all_inst_info.values():
3276 for secondary in inst.secondary_nodes:
3277 if (secondary in self.my_node_info
3278 and inst.name not in self.my_inst_info):
3279 inst.MapLVsByNode(node_vol_should)
3282 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3284 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3285 feedback_fn("* Verifying N+1 Memory redundancy")
3286 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3288 feedback_fn("* Other Notes")
3290 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3291 % len(i_non_redundant))
3293 if i_non_a_balanced:
3294 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3295 % len(i_non_a_balanced))
3298 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3301 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3304 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3308 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3309 """Analyze the post-hooks' result
3311 This method analyses the hook result, handles it, and sends some
3312 nicely-formatted feedback back to the user.
3314 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3315 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3316 @param hooks_results: the results of the multi-node hooks rpc call
3317 @param feedback_fn: function used send feedback back to the caller
3318 @param lu_result: previous Exec result
3319 @return: the new Exec result, based on the previous result
3323 # We only really run POST phase hooks, only for non-empty groups,
3324 # and are only interested in their results
3325 if not self.my_node_names:
3328 elif phase == constants.HOOKS_PHASE_POST:
3329 # Used to change hooks' output to proper indentation
3330 feedback_fn("* Hooks Results")
3331 assert hooks_results, "invalid result from hooks"
3333 for node_name in hooks_results:
3334 res = hooks_results[node_name]
3336 test = msg and not res.offline
3337 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3338 "Communication failure in hooks execution: %s", msg)
3339 if res.offline or msg:
3340 # No need to investigate payload if node is offline or gave
3343 for script, hkr, output in res.payload:
3344 test = hkr == constants.HKR_FAIL
3345 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3346 "Script %s failed, output:", script)
3348 output = self._HOOKS_INDENT_RE.sub(" ", output)
3349 feedback_fn("%s" % output)
3355 class LUClusterVerifyDisks(NoHooksLU):
3356 """Verifies the cluster disks status.
3361 def ExpandNames(self):
3362 self.share_locks = _ShareAll()
3363 self.needed_locks = {
3364 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3367 def Exec(self, feedback_fn):
3368 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3370 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3371 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3372 for group in group_names])
3375 class LUGroupVerifyDisks(NoHooksLU):
3376 """Verifies the status of all disks in a node group.
3381 def ExpandNames(self):
3382 # Raises errors.OpPrereqError on its own if group can't be found
3383 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3385 self.share_locks = _ShareAll()
3386 self.needed_locks = {
3387 locking.LEVEL_INSTANCE: [],
3388 locking.LEVEL_NODEGROUP: [],
3389 locking.LEVEL_NODE: [],
3392 def DeclareLocks(self, level):
3393 if level == locking.LEVEL_INSTANCE:
3394 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3396 # Lock instances optimistically, needs verification once node and group
3397 # locks have been acquired
3398 self.needed_locks[locking.LEVEL_INSTANCE] = \
3399 self.cfg.GetNodeGroupInstances(self.group_uuid)
3401 elif level == locking.LEVEL_NODEGROUP:
3402 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3404 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3405 set([self.group_uuid] +
3406 # Lock all groups used by instances optimistically; this requires
3407 # going via the node before it's locked, requiring verification
3410 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3411 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3413 elif level == locking.LEVEL_NODE:
3414 # This will only lock the nodes in the group to be verified which contain
3416 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3417 self._LockInstancesNodes()
3419 # Lock all nodes in group to be verified
3420 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3421 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3422 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3424 def CheckPrereq(self):
3425 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3426 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3427 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3429 assert self.group_uuid in owned_groups
3431 # Check if locked instances are still correct
3432 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3434 # Get instance information
3435 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3437 # Check if node groups for locked instances are still correct
3438 for (instance_name, inst) in self.instances.items():
3439 assert owned_nodes.issuperset(inst.all_nodes), \
3440 "Instance %s's nodes changed while we kept the lock" % instance_name
3442 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3445 assert self.group_uuid in inst_groups, \
3446 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3448 def Exec(self, feedback_fn):
3449 """Verify integrity of cluster disks.
3451 @rtype: tuple of three items
3452 @return: a tuple of (dict of node-to-node_error, list of instances
3453 which need activate-disks, dict of instance: (node, volume) for
3458 res_instances = set()
3461 nv_dict = _MapInstanceDisksToNodes([inst
3462 for inst in self.instances.values()
3463 if inst.admin_state == constants.ADMINST_UP])
3466 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3467 set(self.cfg.GetVmCapableNodeList()))
3469 node_lvs = self.rpc.call_lv_list(nodes, [])
3471 for (node, node_res) in node_lvs.items():
3472 if node_res.offline:
3475 msg = node_res.fail_msg
3477 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3478 res_nodes[node] = msg
3481 for lv_name, (_, _, lv_online) in node_res.payload.items():
3482 inst = nv_dict.pop((node, lv_name), None)
3483 if not (lv_online or inst is None):
3484 res_instances.add(inst)
3486 # any leftover items in nv_dict are missing LVs, let's arrange the data
3488 for key, inst in nv_dict.iteritems():
3489 res_missing.setdefault(inst, []).append(list(key))
3491 return (res_nodes, list(res_instances), res_missing)
3494 class LUClusterRepairDiskSizes(NoHooksLU):
3495 """Verifies the cluster disks sizes.
3500 def ExpandNames(self):
3501 if self.op.instances:
3502 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3503 self.needed_locks = {
3504 locking.LEVEL_NODE_RES: [],
3505 locking.LEVEL_INSTANCE: self.wanted_names,
3507 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3509 self.wanted_names = None
3510 self.needed_locks = {
3511 locking.LEVEL_NODE_RES: locking.ALL_SET,
3512 locking.LEVEL_INSTANCE: locking.ALL_SET,
3514 self.share_locks = {
3515 locking.LEVEL_NODE_RES: 1,
3516 locking.LEVEL_INSTANCE: 0,
3519 def DeclareLocks(self, level):
3520 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3521 self._LockInstancesNodes(primary_only=True, level=level)
3523 def CheckPrereq(self):
3524 """Check prerequisites.
3526 This only checks the optional instance list against the existing names.
3529 if self.wanted_names is None:
3530 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3532 self.wanted_instances = \
3533 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3535 def _EnsureChildSizes(self, disk):
3536 """Ensure children of the disk have the needed disk size.
3538 This is valid mainly for DRBD8 and fixes an issue where the
3539 children have smaller disk size.
3541 @param disk: an L{ganeti.objects.Disk} object
3544 if disk.dev_type == constants.LD_DRBD8:
3545 assert disk.children, "Empty children for DRBD8?"
3546 fchild = disk.children[0]
3547 mismatch = fchild.size < disk.size
3549 self.LogInfo("Child disk has size %d, parent %d, fixing",
3550 fchild.size, disk.size)
3551 fchild.size = disk.size
3553 # and we recurse on this child only, not on the metadev
3554 return self._EnsureChildSizes(fchild) or mismatch
3558 def Exec(self, feedback_fn):
3559 """Verify the size of cluster disks.
3562 # TODO: check child disks too
3563 # TODO: check differences in size between primary/secondary nodes
3565 for instance in self.wanted_instances:
3566 pnode = instance.primary_node
3567 if pnode not in per_node_disks:
3568 per_node_disks[pnode] = []
3569 for idx, disk in enumerate(instance.disks):
3570 per_node_disks[pnode].append((instance, idx, disk))
3572 assert not (frozenset(per_node_disks.keys()) -
3573 self.owned_locks(locking.LEVEL_NODE_RES)), \
3574 "Not owning correct locks"
3575 assert not self.owned_locks(locking.LEVEL_NODE)
3578 for node, dskl in per_node_disks.items():
3579 newl = [v[2].Copy() for v in dskl]
3581 self.cfg.SetDiskID(dsk, node)
3582 result = self.rpc.call_blockdev_getsize(node, newl)
3584 self.LogWarning("Failure in blockdev_getsize call to node"
3585 " %s, ignoring", node)
3587 if len(result.payload) != len(dskl):
3588 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3589 " result.payload=%s", node, len(dskl), result.payload)
3590 self.LogWarning("Invalid result from node %s, ignoring node results",
3593 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3595 self.LogWarning("Disk %d of instance %s did not return size"
3596 " information, ignoring", idx, instance.name)
3598 if not isinstance(size, (int, long)):
3599 self.LogWarning("Disk %d of instance %s did not return valid"
3600 " size information, ignoring", idx, instance.name)
3603 if size != disk.size:
3604 self.LogInfo("Disk %d of instance %s has mismatched size,"
3605 " correcting: recorded %d, actual %d", idx,
3606 instance.name, disk.size, size)
3608 self.cfg.Update(instance, feedback_fn)
3609 changed.append((instance.name, idx, size))
3610 if self._EnsureChildSizes(disk):
3611 self.cfg.Update(instance, feedback_fn)
3612 changed.append((instance.name, idx, disk.size))
3616 class LUClusterRename(LogicalUnit):
3617 """Rename the cluster.
3620 HPATH = "cluster-rename"
3621 HTYPE = constants.HTYPE_CLUSTER
3623 def BuildHooksEnv(self):
3628 "OP_TARGET": self.cfg.GetClusterName(),
3629 "NEW_NAME": self.op.name,
3632 def BuildHooksNodes(self):
3633 """Build hooks nodes.
3636 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3638 def CheckPrereq(self):
3639 """Verify that the passed name is a valid one.
3642 hostname = netutils.GetHostname(name=self.op.name,
3643 family=self.cfg.GetPrimaryIPFamily())
3645 new_name = hostname.name
3646 self.ip = new_ip = hostname.ip
3647 old_name = self.cfg.GetClusterName()
3648 old_ip = self.cfg.GetMasterIP()
3649 if new_name == old_name and new_ip == old_ip:
3650 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3651 " cluster has changed",
3653 if new_ip != old_ip:
3654 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3655 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3656 " reachable on the network" %
3657 new_ip, errors.ECODE_NOTUNIQUE)
3659 self.op.name = new_name
3661 def Exec(self, feedback_fn):
3662 """Rename the cluster.
3665 clustername = self.op.name
3668 # shutdown the master IP
3669 master_params = self.cfg.GetMasterNetworkParameters()
3670 ems = self.cfg.GetUseExternalMipScript()
3671 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3673 result.Raise("Could not disable the master role")
3676 cluster = self.cfg.GetClusterInfo()
3677 cluster.cluster_name = clustername
3678 cluster.master_ip = new_ip
3679 self.cfg.Update(cluster, feedback_fn)
3681 # update the known hosts file
3682 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3683 node_list = self.cfg.GetOnlineNodeList()
3685 node_list.remove(master_params.name)
3688 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3690 master_params.ip = new_ip
3691 result = self.rpc.call_node_activate_master_ip(master_params.name,
3693 msg = result.fail_msg
3695 self.LogWarning("Could not re-enable the master role on"
3696 " the master, please restart manually: %s", msg)
3701 def _ValidateNetmask(cfg, netmask):
3702 """Checks if a netmask is valid.
3704 @type cfg: L{config.ConfigWriter}
3705 @param cfg: The cluster configuration
3707 @param netmask: the netmask to be verified
3708 @raise errors.OpPrereqError: if the validation fails
3711 ip_family = cfg.GetPrimaryIPFamily()
3713 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3714 except errors.ProgrammerError:
3715 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3717 if not ipcls.ValidateNetmask(netmask):
3718 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3722 class LUClusterSetParams(LogicalUnit):
3723 """Change the parameters of the cluster.
3726 HPATH = "cluster-modify"
3727 HTYPE = constants.HTYPE_CLUSTER
3730 def CheckArguments(self):
3734 if self.op.uid_pool:
3735 uidpool.CheckUidPool(self.op.uid_pool)
3737 if self.op.add_uids:
3738 uidpool.CheckUidPool(self.op.add_uids)
3740 if self.op.remove_uids:
3741 uidpool.CheckUidPool(self.op.remove_uids)
3743 if self.op.master_netmask is not None:
3744 _ValidateNetmask(self.cfg, self.op.master_netmask)
3746 if self.op.diskparams:
3747 for dt_params in self.op.diskparams.values():
3748 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3750 def ExpandNames(self):
3751 # FIXME: in the future maybe other cluster params won't require checking on
3752 # all nodes to be modified.
3753 self.needed_locks = {
3754 locking.LEVEL_NODE: locking.ALL_SET,
3756 self.share_locks[locking.LEVEL_NODE] = 1
3758 def BuildHooksEnv(self):
3763 "OP_TARGET": self.cfg.GetClusterName(),
3764 "NEW_VG_NAME": self.op.vg_name,
3767 def BuildHooksNodes(self):
3768 """Build hooks nodes.
3771 mn = self.cfg.GetMasterNode()
3774 def CheckPrereq(self):
3775 """Check prerequisites.
3777 This checks whether the given params don't conflict and
3778 if the given volume group is valid.
3781 if self.op.vg_name is not None and not self.op.vg_name:
3782 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3783 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3784 " instances exist", errors.ECODE_INVAL)
3786 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3787 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3788 raise errors.OpPrereqError("Cannot disable drbd helper while"
3789 " drbd-based instances exist",
3792 node_list = self.owned_locks(locking.LEVEL_NODE)
3794 # if vg_name not None, checks given volume group on all nodes
3796 vglist = self.rpc.call_vg_list(node_list)
3797 for node in node_list:
3798 msg = vglist[node].fail_msg
3800 # ignoring down node
3801 self.LogWarning("Error while gathering data on node %s"
3802 " (ignoring node): %s", node, msg)
3804 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3806 constants.MIN_VG_SIZE)
3808 raise errors.OpPrereqError("Error on node '%s': %s" %
3809 (node, vgstatus), errors.ECODE_ENVIRON)
3811 if self.op.drbd_helper:
3812 # checks given drbd helper on all nodes
3813 helpers = self.rpc.call_drbd_helper(node_list)
3814 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3816 self.LogInfo("Not checking drbd helper on offline node %s", node)
3818 msg = helpers[node].fail_msg
3820 raise errors.OpPrereqError("Error checking drbd helper on node"
3821 " '%s': %s" % (node, msg),
3822 errors.ECODE_ENVIRON)
3823 node_helper = helpers[node].payload
3824 if node_helper != self.op.drbd_helper:
3825 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3826 (node, node_helper), errors.ECODE_ENVIRON)
3828 self.cluster = cluster = self.cfg.GetClusterInfo()
3829 # validate params changes
3830 if self.op.beparams:
3831 objects.UpgradeBeParams(self.op.beparams)
3832 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3833 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3835 if self.op.ndparams:
3836 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3837 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3839 # TODO: we need a more general way to handle resetting
3840 # cluster-level parameters to default values
3841 if self.new_ndparams["oob_program"] == "":
3842 self.new_ndparams["oob_program"] = \
3843 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3845 if self.op.hv_state:
3846 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3847 self.cluster.hv_state_static)
3848 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3849 for hv, values in new_hv_state.items())
3851 if self.op.disk_state:
3852 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3853 self.cluster.disk_state_static)
3854 self.new_disk_state = \
3855 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3856 for name, values in svalues.items()))
3857 for storage, svalues in new_disk_state.items())
3861 for key, value in self.op.ipolicy.items():
3862 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3863 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3865 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3866 self.new_ipolicy = ipolicy
3868 if self.op.nicparams:
3869 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3870 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3871 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3874 # check all instances for consistency
3875 for instance in self.cfg.GetAllInstancesInfo().values():
3876 for nic_idx, nic in enumerate(instance.nics):
3877 params_copy = copy.deepcopy(nic.nicparams)
3878 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3880 # check parameter syntax
3882 objects.NIC.CheckParameterSyntax(params_filled)
3883 except errors.ConfigurationError, err:
3884 nic_errors.append("Instance %s, nic/%d: %s" %
3885 (instance.name, nic_idx, err))
3887 # if we're moving instances to routed, check that they have an ip
3888 target_mode = params_filled[constants.NIC_MODE]
3889 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3890 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3891 " address" % (instance.name, nic_idx))
3893 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3894 "\n".join(nic_errors))
3896 # hypervisor list/parameters
3897 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3898 if self.op.hvparams:
3899 for hv_name, hv_dict in self.op.hvparams.items():
3900 if hv_name not in self.new_hvparams:
3901 self.new_hvparams[hv_name] = hv_dict
3903 self.new_hvparams[hv_name].update(hv_dict)
3905 # disk template parameters
3906 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3907 if self.op.diskparams:
3908 for dt_name, dt_params in self.op.diskparams.items():
3909 if dt_name not in self.op.diskparams:
3910 self.new_diskparams[dt_name] = dt_params
3912 self.new_diskparams[dt_name].update(dt_params)
3914 # os hypervisor parameters
3915 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3917 for os_name, hvs in self.op.os_hvp.items():
3918 if os_name not in self.new_os_hvp:
3919 self.new_os_hvp[os_name] = hvs
3921 for hv_name, hv_dict in hvs.items():
3922 if hv_name not in self.new_os_hvp[os_name]:
3923 self.new_os_hvp[os_name][hv_name] = hv_dict
3925 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3928 self.new_osp = objects.FillDict(cluster.osparams, {})
3929 if self.op.osparams:
3930 for os_name, osp in self.op.osparams.items():
3931 if os_name not in self.new_osp:
3932 self.new_osp[os_name] = {}
3934 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3937 if not self.new_osp[os_name]:
3938 # we removed all parameters
3939 del self.new_osp[os_name]
3941 # check the parameter validity (remote check)
3942 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3943 os_name, self.new_osp[os_name])
3945 # changes to the hypervisor list
3946 if self.op.enabled_hypervisors is not None:
3947 self.hv_list = self.op.enabled_hypervisors
3948 for hv in self.hv_list:
3949 # if the hypervisor doesn't already exist in the cluster
3950 # hvparams, we initialize it to empty, and then (in both
3951 # cases) we make sure to fill the defaults, as we might not
3952 # have a complete defaults list if the hypervisor wasn't
3954 if hv not in new_hvp:
3956 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3957 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3959 self.hv_list = cluster.enabled_hypervisors
3961 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3962 # either the enabled list has changed, or the parameters have, validate
3963 for hv_name, hv_params in self.new_hvparams.items():
3964 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3965 (self.op.enabled_hypervisors and
3966 hv_name in self.op.enabled_hypervisors)):
3967 # either this is a new hypervisor, or its parameters have changed
3968 hv_class = hypervisor.GetHypervisor(hv_name)
3969 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3970 hv_class.CheckParameterSyntax(hv_params)
3971 _CheckHVParams(self, node_list, hv_name, hv_params)
3974 # no need to check any newly-enabled hypervisors, since the
3975 # defaults have already been checked in the above code-block
3976 for os_name, os_hvp in self.new_os_hvp.items():
3977 for hv_name, hv_params in os_hvp.items():
3978 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3979 # we need to fill in the new os_hvp on top of the actual hv_p
3980 cluster_defaults = self.new_hvparams.get(hv_name, {})
3981 new_osp = objects.FillDict(cluster_defaults, hv_params)
3982 hv_class = hypervisor.GetHypervisor(hv_name)
3983 hv_class.CheckParameterSyntax(new_osp)
3984 _CheckHVParams(self, node_list, hv_name, new_osp)
3986 if self.op.default_iallocator:
3987 alloc_script = utils.FindFile(self.op.default_iallocator,
3988 constants.IALLOCATOR_SEARCH_PATH,
3990 if alloc_script is None:
3991 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3992 " specified" % self.op.default_iallocator,
3995 def Exec(self, feedback_fn):
3996 """Change the parameters of the cluster.
3999 if self.op.vg_name is not None:
4000 new_volume = self.op.vg_name
4003 if new_volume != self.cfg.GetVGName():
4004 self.cfg.SetVGName(new_volume)
4006 feedback_fn("Cluster LVM configuration already in desired"
4007 " state, not changing")
4008 if self.op.drbd_helper is not None:
4009 new_helper = self.op.drbd_helper
4012 if new_helper != self.cfg.GetDRBDHelper():
4013 self.cfg.SetDRBDHelper(new_helper)
4015 feedback_fn("Cluster DRBD helper already in desired state,"
4017 if self.op.hvparams:
4018 self.cluster.hvparams = self.new_hvparams
4020 self.cluster.os_hvp = self.new_os_hvp
4021 if self.op.enabled_hypervisors is not None:
4022 self.cluster.hvparams = self.new_hvparams
4023 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4024 if self.op.beparams:
4025 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4026 if self.op.nicparams:
4027 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4029 self.cluster.ipolicy = self.new_ipolicy
4030 if self.op.osparams:
4031 self.cluster.osparams = self.new_osp
4032 if self.op.ndparams:
4033 self.cluster.ndparams = self.new_ndparams
4034 if self.op.diskparams:
4035 self.cluster.diskparams = self.new_diskparams
4036 if self.op.hv_state:
4037 self.cluster.hv_state_static = self.new_hv_state
4038 if self.op.disk_state:
4039 self.cluster.disk_state_static = self.new_disk_state
4041 if self.op.candidate_pool_size is not None:
4042 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4043 # we need to update the pool size here, otherwise the save will fail
4044 _AdjustCandidatePool(self, [])
4046 if self.op.maintain_node_health is not None:
4047 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4048 feedback_fn("Note: CONFD was disabled at build time, node health"
4049 " maintenance is not useful (still enabling it)")
4050 self.cluster.maintain_node_health = self.op.maintain_node_health
4052 if self.op.prealloc_wipe_disks is not None:
4053 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4055 if self.op.add_uids is not None:
4056 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4058 if self.op.remove_uids is not None:
4059 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4061 if self.op.uid_pool is not None:
4062 self.cluster.uid_pool = self.op.uid_pool
4064 if self.op.default_iallocator is not None:
4065 self.cluster.default_iallocator = self.op.default_iallocator
4067 if self.op.reserved_lvs is not None:
4068 self.cluster.reserved_lvs = self.op.reserved_lvs
4070 if self.op.use_external_mip_script is not None:
4071 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4073 def helper_os(aname, mods, desc):
4075 lst = getattr(self.cluster, aname)
4076 for key, val in mods:
4077 if key == constants.DDM_ADD:
4079 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4082 elif key == constants.DDM_REMOVE:
4086 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4088 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4090 if self.op.hidden_os:
4091 helper_os("hidden_os", self.op.hidden_os, "hidden")
4093 if self.op.blacklisted_os:
4094 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4096 if self.op.master_netdev:
4097 master_params = self.cfg.GetMasterNetworkParameters()
4098 ems = self.cfg.GetUseExternalMipScript()
4099 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4100 self.cluster.master_netdev)
4101 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4103 result.Raise("Could not disable the master ip")
4104 feedback_fn("Changing master_netdev from %s to %s" %
4105 (master_params.netdev, self.op.master_netdev))
4106 self.cluster.master_netdev = self.op.master_netdev
4108 if self.op.master_netmask:
4109 master_params = self.cfg.GetMasterNetworkParameters()
4110 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4111 result = self.rpc.call_node_change_master_netmask(master_params.name,
4112 master_params.netmask,
4113 self.op.master_netmask,
4115 master_params.netdev)
4117 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4120 self.cluster.master_netmask = self.op.master_netmask
4122 self.cfg.Update(self.cluster, feedback_fn)
4124 if self.op.master_netdev:
4125 master_params = self.cfg.GetMasterNetworkParameters()
4126 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4127 self.op.master_netdev)
4128 ems = self.cfg.GetUseExternalMipScript()
4129 result = self.rpc.call_node_activate_master_ip(master_params.name,
4132 self.LogWarning("Could not re-enable the master ip on"
4133 " the master, please restart manually: %s",
4137 def _UploadHelper(lu, nodes, fname):
4138 """Helper for uploading a file and showing warnings.
4141 if os.path.exists(fname):
4142 result = lu.rpc.call_upload_file(nodes, fname)
4143 for to_node, to_result in result.items():
4144 msg = to_result.fail_msg
4146 msg = ("Copy of file %s to node %s failed: %s" %
4147 (fname, to_node, msg))
4148 lu.proc.LogWarning(msg)
4151 def _ComputeAncillaryFiles(cluster, redist):
4152 """Compute files external to Ganeti which need to be consistent.
4154 @type redist: boolean
4155 @param redist: Whether to include files which need to be redistributed
4158 # Compute files for all nodes
4160 constants.SSH_KNOWN_HOSTS_FILE,
4161 constants.CONFD_HMAC_KEY,
4162 constants.CLUSTER_DOMAIN_SECRET_FILE,
4163 constants.SPICE_CERT_FILE,
4164 constants.SPICE_CACERT_FILE,
4165 constants.RAPI_USERS_FILE,
4169 files_all.update(constants.ALL_CERT_FILES)
4170 files_all.update(ssconf.SimpleStore().GetFileList())
4172 # we need to ship at least the RAPI certificate
4173 files_all.add(constants.RAPI_CERT_FILE)
4175 if cluster.modify_etc_hosts:
4176 files_all.add(constants.ETC_HOSTS)
4178 # Files which are optional, these must:
4179 # - be present in one other category as well
4180 # - either exist or not exist on all nodes of that category (mc, vm all)
4182 constants.RAPI_USERS_FILE,
4185 # Files which should only be on master candidates
4189 files_mc.add(constants.CLUSTER_CONF_FILE)
4191 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4193 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4195 # Files which should only be on VM-capable nodes
4196 files_vm = set(filename
4197 for hv_name in cluster.enabled_hypervisors
4198 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4200 files_opt |= set(filename
4201 for hv_name in cluster.enabled_hypervisors
4202 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4204 # Filenames in each category must be unique
4205 all_files_set = files_all | files_mc | files_vm
4206 assert (len(all_files_set) ==
4207 sum(map(len, [files_all, files_mc, files_vm]))), \
4208 "Found file listed in more than one file list"
4210 # Optional files must be present in one other category
4211 assert all_files_set.issuperset(files_opt), \
4212 "Optional file not in a different required list"
4214 return (files_all, files_opt, files_mc, files_vm)
4217 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4218 """Distribute additional files which are part of the cluster configuration.
4220 ConfigWriter takes care of distributing the config and ssconf files, but
4221 there are more files which should be distributed to all nodes. This function
4222 makes sure those are copied.
4224 @param lu: calling logical unit
4225 @param additional_nodes: list of nodes not in the config to distribute to
4226 @type additional_vm: boolean
4227 @param additional_vm: whether the additional nodes are vm-capable or not
4230 # Gather target nodes
4231 cluster = lu.cfg.GetClusterInfo()
4232 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4234 online_nodes = lu.cfg.GetOnlineNodeList()
4235 vm_nodes = lu.cfg.GetVmCapableNodeList()
4237 if additional_nodes is not None:
4238 online_nodes.extend(additional_nodes)
4240 vm_nodes.extend(additional_nodes)
4242 # Never distribute to master node
4243 for nodelist in [online_nodes, vm_nodes]:
4244 if master_info.name in nodelist:
4245 nodelist.remove(master_info.name)
4248 (files_all, _, files_mc, files_vm) = \
4249 _ComputeAncillaryFiles(cluster, True)
4251 # Never re-distribute configuration file from here
4252 assert not (constants.CLUSTER_CONF_FILE in files_all or
4253 constants.CLUSTER_CONF_FILE in files_vm)
4254 assert not files_mc, "Master candidates not handled in this function"
4257 (online_nodes, files_all),
4258 (vm_nodes, files_vm),
4262 for (node_list, files) in filemap:
4264 _UploadHelper(lu, node_list, fname)
4267 class LUClusterRedistConf(NoHooksLU):
4268 """Force the redistribution of cluster configuration.
4270 This is a very simple LU.
4275 def ExpandNames(self):
4276 self.needed_locks = {
4277 locking.LEVEL_NODE: locking.ALL_SET,
4279 self.share_locks[locking.LEVEL_NODE] = 1
4281 def Exec(self, feedback_fn):
4282 """Redistribute the configuration.
4285 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4286 _RedistributeAncillaryFiles(self)
4289 class LUClusterActivateMasterIp(NoHooksLU):
4290 """Activate the master IP on the master node.
4293 def Exec(self, feedback_fn):
4294 """Activate the master IP.
4297 master_params = self.cfg.GetMasterNetworkParameters()
4298 ems = self.cfg.GetUseExternalMipScript()
4299 result = self.rpc.call_node_activate_master_ip(master_params.name,
4301 result.Raise("Could not activate the master IP")
4304 class LUClusterDeactivateMasterIp(NoHooksLU):
4305 """Deactivate the master IP on the master node.
4308 def Exec(self, feedback_fn):
4309 """Deactivate the master IP.
4312 master_params = self.cfg.GetMasterNetworkParameters()
4313 ems = self.cfg.GetUseExternalMipScript()
4314 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4316 result.Raise("Could not deactivate the master IP")
4319 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4320 """Sleep and poll for an instance's disk to sync.
4323 if not instance.disks or disks is not None and not disks:
4326 disks = _ExpandCheckDisks(instance, disks)
4329 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4331 node = instance.primary_node
4334 lu.cfg.SetDiskID(dev, node)
4336 # TODO: Convert to utils.Retry
4339 degr_retries = 10 # in seconds, as we sleep 1 second each time
4343 cumul_degraded = False
4344 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4345 msg = rstats.fail_msg
4347 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4350 raise errors.RemoteError("Can't contact node %s for mirror data,"
4351 " aborting." % node)
4354 rstats = rstats.payload
4356 for i, mstat in enumerate(rstats):
4358 lu.LogWarning("Can't compute data for node %s/%s",
4359 node, disks[i].iv_name)
4362 cumul_degraded = (cumul_degraded or
4363 (mstat.is_degraded and mstat.sync_percent is None))
4364 if mstat.sync_percent is not None:
4366 if mstat.estimated_time is not None:
4367 rem_time = ("%s remaining (estimated)" %
4368 utils.FormatSeconds(mstat.estimated_time))
4369 max_time = mstat.estimated_time
4371 rem_time = "no time estimate"
4372 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4373 (disks[i].iv_name, mstat.sync_percent, rem_time))
4375 # if we're done but degraded, let's do a few small retries, to
4376 # make sure we see a stable and not transient situation; therefore
4377 # we force restart of the loop
4378 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4379 logging.info("Degraded disks found, %d retries left", degr_retries)
4387 time.sleep(min(60, max_time))
4390 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4391 return not cumul_degraded
4394 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4395 """Check that mirrors are not degraded.
4397 The ldisk parameter, if True, will change the test from the
4398 is_degraded attribute (which represents overall non-ok status for
4399 the device(s)) to the ldisk (representing the local storage status).
4402 lu.cfg.SetDiskID(dev, node)
4406 if on_primary or dev.AssembleOnSecondary():
4407 rstats = lu.rpc.call_blockdev_find(node, dev)
4408 msg = rstats.fail_msg
4410 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4412 elif not rstats.payload:
4413 lu.LogWarning("Can't find disk on node %s", node)
4417 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4419 result = result and not rstats.payload.is_degraded
4422 for child in dev.children:
4423 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4428 class LUOobCommand(NoHooksLU):
4429 """Logical unit for OOB handling.
4433 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4435 def ExpandNames(self):
4436 """Gather locks we need.
4439 if self.op.node_names:
4440 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4441 lock_names = self.op.node_names
4443 lock_names = locking.ALL_SET
4445 self.needed_locks = {
4446 locking.LEVEL_NODE: lock_names,
4449 def CheckPrereq(self):
4450 """Check prerequisites.
4453 - the node exists in the configuration
4456 Any errors are signaled by raising errors.OpPrereqError.
4460 self.master_node = self.cfg.GetMasterNode()
4462 assert self.op.power_delay >= 0.0
4464 if self.op.node_names:
4465 if (self.op.command in self._SKIP_MASTER and
4466 self.master_node in self.op.node_names):
4467 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4468 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4470 if master_oob_handler:
4471 additional_text = ("run '%s %s %s' if you want to operate on the"
4472 " master regardless") % (master_oob_handler,
4476 additional_text = "it does not support out-of-band operations"
4478 raise errors.OpPrereqError(("Operating on the master node %s is not"
4479 " allowed for %s; %s") %
4480 (self.master_node, self.op.command,
4481 additional_text), errors.ECODE_INVAL)
4483 self.op.node_names = self.cfg.GetNodeList()
4484 if self.op.command in self._SKIP_MASTER:
4485 self.op.node_names.remove(self.master_node)
4487 if self.op.command in self._SKIP_MASTER:
4488 assert self.master_node not in self.op.node_names
4490 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4492 raise errors.OpPrereqError("Node %s not found" % node_name,
4495 self.nodes.append(node)
4497 if (not self.op.ignore_status and
4498 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4499 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4500 " not marked offline") % node_name,
4503 def Exec(self, feedback_fn):
4504 """Execute OOB and return result if we expect any.
4507 master_node = self.master_node
4510 for idx, node in enumerate(utils.NiceSort(self.nodes,
4511 key=lambda node: node.name)):
4512 node_entry = [(constants.RS_NORMAL, node.name)]
4513 ret.append(node_entry)
4515 oob_program = _SupportsOob(self.cfg, node)
4518 node_entry.append((constants.RS_UNAVAIL, None))
4521 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4522 self.op.command, oob_program, node.name)
4523 result = self.rpc.call_run_oob(master_node, oob_program,
4524 self.op.command, node.name,
4528 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4529 node.name, result.fail_msg)
4530 node_entry.append((constants.RS_NODATA, None))
4533 self._CheckPayload(result)
4534 except errors.OpExecError, err:
4535 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4537 node_entry.append((constants.RS_NODATA, None))
4539 if self.op.command == constants.OOB_HEALTH:
4540 # For health we should log important events
4541 for item, status in result.payload:
4542 if status in [constants.OOB_STATUS_WARNING,
4543 constants.OOB_STATUS_CRITICAL]:
4544 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4545 item, node.name, status)
4547 if self.op.command == constants.OOB_POWER_ON:
4549 elif self.op.command == constants.OOB_POWER_OFF:
4550 node.powered = False
4551 elif self.op.command == constants.OOB_POWER_STATUS:
4552 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4553 if powered != node.powered:
4554 logging.warning(("Recorded power state (%s) of node '%s' does not"
4555 " match actual power state (%s)"), node.powered,
4558 # For configuration changing commands we should update the node
4559 if self.op.command in (constants.OOB_POWER_ON,
4560 constants.OOB_POWER_OFF):
4561 self.cfg.Update(node, feedback_fn)
4563 node_entry.append((constants.RS_NORMAL, result.payload))
4565 if (self.op.command == constants.OOB_POWER_ON and
4566 idx < len(self.nodes) - 1):
4567 time.sleep(self.op.power_delay)
4571 def _CheckPayload(self, result):
4572 """Checks if the payload is valid.
4574 @param result: RPC result
4575 @raises errors.OpExecError: If payload is not valid
4579 if self.op.command == constants.OOB_HEALTH:
4580 if not isinstance(result.payload, list):
4581 errs.append("command 'health' is expected to return a list but got %s" %
4582 type(result.payload))
4584 for item, status in result.payload:
4585 if status not in constants.OOB_STATUSES:
4586 errs.append("health item '%s' has invalid status '%s'" %
4589 if self.op.command == constants.OOB_POWER_STATUS:
4590 if not isinstance(result.payload, dict):
4591 errs.append("power-status is expected to return a dict but got %s" %
4592 type(result.payload))
4594 if self.op.command in [
4595 constants.OOB_POWER_ON,
4596 constants.OOB_POWER_OFF,
4597 constants.OOB_POWER_CYCLE,
4599 if result.payload is not None:
4600 errs.append("%s is expected to not return payload but got '%s'" %
4601 (self.op.command, result.payload))
4604 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4605 utils.CommaJoin(errs))
4608 class _OsQuery(_QueryBase):
4609 FIELDS = query.OS_FIELDS
4611 def ExpandNames(self, lu):
4612 # Lock all nodes in shared mode
4613 # Temporary removal of locks, should be reverted later
4614 # TODO: reintroduce locks when they are lighter-weight
4615 lu.needed_locks = {}
4616 #self.share_locks[locking.LEVEL_NODE] = 1
4617 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4619 # The following variables interact with _QueryBase._GetNames
4621 self.wanted = self.names
4623 self.wanted = locking.ALL_SET
4625 self.do_locking = self.use_locking
4627 def DeclareLocks(self, lu, level):
4631 def _DiagnoseByOS(rlist):
4632 """Remaps a per-node return list into an a per-os per-node dictionary
4634 @param rlist: a map with node names as keys and OS objects as values
4637 @return: a dictionary with osnames as keys and as value another
4638 map, with nodes as keys and tuples of (path, status, diagnose,
4639 variants, parameters, api_versions) as values, eg::
4641 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4642 (/srv/..., False, "invalid api")],
4643 "node2": [(/srv/..., True, "", [], [])]}
4648 # we build here the list of nodes that didn't fail the RPC (at RPC
4649 # level), so that nodes with a non-responding node daemon don't
4650 # make all OSes invalid
4651 good_nodes = [node_name for node_name in rlist
4652 if not rlist[node_name].fail_msg]
4653 for node_name, nr in rlist.items():
4654 if nr.fail_msg or not nr.payload:
4656 for (name, path, status, diagnose, variants,
4657 params, api_versions) in nr.payload:
4658 if name not in all_os:
4659 # build a list of nodes for this os containing empty lists
4660 # for each node in node_list
4662 for nname in good_nodes:
4663 all_os[name][nname] = []
4664 # convert params from [name, help] to (name, help)
4665 params = [tuple(v) for v in params]
4666 all_os[name][node_name].append((path, status, diagnose,
4667 variants, params, api_versions))
4670 def _GetQueryData(self, lu):
4671 """Computes the list of nodes and their attributes.
4674 # Locking is not used
4675 assert not (compat.any(lu.glm.is_owned(level)
4676 for level in locking.LEVELS
4677 if level != locking.LEVEL_CLUSTER) or
4678 self.do_locking or self.use_locking)
4680 valid_nodes = [node.name
4681 for node in lu.cfg.GetAllNodesInfo().values()
4682 if not node.offline and node.vm_capable]
4683 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4684 cluster = lu.cfg.GetClusterInfo()
4688 for (os_name, os_data) in pol.items():
4689 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4690 hidden=(os_name in cluster.hidden_os),
4691 blacklisted=(os_name in cluster.blacklisted_os))
4695 api_versions = set()
4697 for idx, osl in enumerate(os_data.values()):
4698 info.valid = bool(info.valid and osl and osl[0][1])
4702 (node_variants, node_params, node_api) = osl[0][3:6]
4705 variants.update(node_variants)
4706 parameters.update(node_params)
4707 api_versions.update(node_api)
4709 # Filter out inconsistent values
4710 variants.intersection_update(node_variants)
4711 parameters.intersection_update(node_params)
4712 api_versions.intersection_update(node_api)
4714 info.variants = list(variants)
4715 info.parameters = list(parameters)
4716 info.api_versions = list(api_versions)
4718 data[os_name] = info
4720 # Prepare data in requested order
4721 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4725 class LUOsDiagnose(NoHooksLU):
4726 """Logical unit for OS diagnose/query.
4732 def _BuildFilter(fields, names):
4733 """Builds a filter for querying OSes.
4736 name_filter = qlang.MakeSimpleFilter("name", names)
4738 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4739 # respective field is not requested
4740 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4741 for fname in ["hidden", "blacklisted"]
4742 if fname not in fields]
4743 if "valid" not in fields:
4744 status_filter.append([qlang.OP_TRUE, "valid"])
4747 status_filter.insert(0, qlang.OP_AND)
4749 status_filter = None
4751 if name_filter and status_filter:
4752 return [qlang.OP_AND, name_filter, status_filter]
4756 return status_filter
4758 def CheckArguments(self):
4759 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4760 self.op.output_fields, False)
4762 def ExpandNames(self):
4763 self.oq.ExpandNames(self)
4765 def Exec(self, feedback_fn):
4766 return self.oq.OldStyleQuery(self)
4769 class LUNodeRemove(LogicalUnit):
4770 """Logical unit for removing a node.
4773 HPATH = "node-remove"
4774 HTYPE = constants.HTYPE_NODE
4776 def BuildHooksEnv(self):
4779 This doesn't run on the target node in the pre phase as a failed
4780 node would then be impossible to remove.
4784 "OP_TARGET": self.op.node_name,
4785 "NODE_NAME": self.op.node_name,
4788 def BuildHooksNodes(self):
4789 """Build hooks nodes.
4792 all_nodes = self.cfg.GetNodeList()
4794 all_nodes.remove(self.op.node_name)
4796 logging.warning("Node '%s', which is about to be removed, was not found"
4797 " in the list of all nodes", self.op.node_name)
4798 return (all_nodes, all_nodes)
4800 def CheckPrereq(self):
4801 """Check prerequisites.
4804 - the node exists in the configuration
4805 - it does not have primary or secondary instances
4806 - it's not the master
4808 Any errors are signaled by raising errors.OpPrereqError.
4811 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4812 node = self.cfg.GetNodeInfo(self.op.node_name)
4813 assert node is not None
4815 masternode = self.cfg.GetMasterNode()
4816 if node.name == masternode:
4817 raise errors.OpPrereqError("Node is the master node, failover to another"
4818 " node is required", errors.ECODE_INVAL)
4820 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4821 if node.name in instance.all_nodes:
4822 raise errors.OpPrereqError("Instance %s is still running on the node,"
4823 " please remove first" % instance_name,
4825 self.op.node_name = node.name
4828 def Exec(self, feedback_fn):
4829 """Removes the node from the cluster.
4833 logging.info("Stopping the node daemon and removing configs from node %s",
4836 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4838 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4841 # Promote nodes to master candidate as needed
4842 _AdjustCandidatePool(self, exceptions=[node.name])
4843 self.context.RemoveNode(node.name)
4845 # Run post hooks on the node before it's removed
4846 _RunPostHook(self, node.name)
4848 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4849 msg = result.fail_msg
4851 self.LogWarning("Errors encountered on the remote node while leaving"
4852 " the cluster: %s", msg)
4854 # Remove node from our /etc/hosts
4855 if self.cfg.GetClusterInfo().modify_etc_hosts:
4856 master_node = self.cfg.GetMasterNode()
4857 result = self.rpc.call_etc_hosts_modify(master_node,
4858 constants.ETC_HOSTS_REMOVE,
4860 result.Raise("Can't update hosts file with new host data")
4861 _RedistributeAncillaryFiles(self)
4864 class _NodeQuery(_QueryBase):
4865 FIELDS = query.NODE_FIELDS
4867 def ExpandNames(self, lu):
4868 lu.needed_locks = {}
4869 lu.share_locks = _ShareAll()
4872 self.wanted = _GetWantedNodes(lu, self.names)
4874 self.wanted = locking.ALL_SET
4876 self.do_locking = (self.use_locking and
4877 query.NQ_LIVE in self.requested_data)
4880 # If any non-static field is requested we need to lock the nodes
4881 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4883 def DeclareLocks(self, lu, level):
4886 def _GetQueryData(self, lu):
4887 """Computes the list of nodes and their attributes.
4890 all_info = lu.cfg.GetAllNodesInfo()
4892 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4894 # Gather data as requested
4895 if query.NQ_LIVE in self.requested_data:
4896 # filter out non-vm_capable nodes
4897 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4899 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4900 [lu.cfg.GetHypervisorType()])
4901 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4902 for (name, nresult) in node_data.items()
4903 if not nresult.fail_msg and nresult.payload)
4907 if query.NQ_INST in self.requested_data:
4908 node_to_primary = dict([(name, set()) for name in nodenames])
4909 node_to_secondary = dict([(name, set()) for name in nodenames])
4911 inst_data = lu.cfg.GetAllInstancesInfo()
4913 for inst in inst_data.values():
4914 if inst.primary_node in node_to_primary:
4915 node_to_primary[inst.primary_node].add(inst.name)
4916 for secnode in inst.secondary_nodes:
4917 if secnode in node_to_secondary:
4918 node_to_secondary[secnode].add(inst.name)
4920 node_to_primary = None
4921 node_to_secondary = None
4923 if query.NQ_OOB in self.requested_data:
4924 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4925 for name, node in all_info.iteritems())
4929 if query.NQ_GROUP in self.requested_data:
4930 groups = lu.cfg.GetAllNodeGroupsInfo()
4934 return query.NodeQueryData([all_info[name] for name in nodenames],
4935 live_data, lu.cfg.GetMasterNode(),
4936 node_to_primary, node_to_secondary, groups,
4937 oob_support, lu.cfg.GetClusterInfo())
4940 class LUNodeQuery(NoHooksLU):
4941 """Logical unit for querying nodes.
4944 # pylint: disable=W0142
4947 def CheckArguments(self):
4948 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4949 self.op.output_fields, self.op.use_locking)
4951 def ExpandNames(self):
4952 self.nq.ExpandNames(self)
4954 def DeclareLocks(self, level):
4955 self.nq.DeclareLocks(self, level)
4957 def Exec(self, feedback_fn):
4958 return self.nq.OldStyleQuery(self)
4961 class LUNodeQueryvols(NoHooksLU):
4962 """Logical unit for getting volumes on node(s).
4966 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4967 _FIELDS_STATIC = utils.FieldSet("node")
4969 def CheckArguments(self):
4970 _CheckOutputFields(static=self._FIELDS_STATIC,
4971 dynamic=self._FIELDS_DYNAMIC,
4972 selected=self.op.output_fields)
4974 def ExpandNames(self):
4975 self.share_locks = _ShareAll()
4976 self.needed_locks = {}
4978 if not self.op.nodes:
4979 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4981 self.needed_locks[locking.LEVEL_NODE] = \
4982 _GetWantedNodes(self, self.op.nodes)
4984 def Exec(self, feedback_fn):
4985 """Computes the list of nodes and their attributes.
4988 nodenames = self.owned_locks(locking.LEVEL_NODE)
4989 volumes = self.rpc.call_node_volumes(nodenames)
4991 ilist = self.cfg.GetAllInstancesInfo()
4992 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4995 for node in nodenames:
4996 nresult = volumes[node]
4999 msg = nresult.fail_msg
5001 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5004 node_vols = sorted(nresult.payload,
5005 key=operator.itemgetter("dev"))
5007 for vol in node_vols:
5009 for field in self.op.output_fields:
5012 elif field == "phys":
5016 elif field == "name":
5018 elif field == "size":
5019 val = int(float(vol["size"]))
5020 elif field == "instance":
5021 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5023 raise errors.ParameterError(field)
5024 node_output.append(str(val))
5026 output.append(node_output)
5031 class LUNodeQueryStorage(NoHooksLU):
5032 """Logical unit for getting information on storage units on node(s).
5035 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5038 def CheckArguments(self):
5039 _CheckOutputFields(static=self._FIELDS_STATIC,
5040 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5041 selected=self.op.output_fields)
5043 def ExpandNames(self):
5044 self.share_locks = _ShareAll()
5045 self.needed_locks = {}
5048 self.needed_locks[locking.LEVEL_NODE] = \
5049 _GetWantedNodes(self, self.op.nodes)
5051 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5053 def Exec(self, feedback_fn):
5054 """Computes the list of nodes and their attributes.
5057 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5059 # Always get name to sort by
5060 if constants.SF_NAME in self.op.output_fields:
5061 fields = self.op.output_fields[:]
5063 fields = [constants.SF_NAME] + self.op.output_fields
5065 # Never ask for node or type as it's only known to the LU
5066 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5067 while extra in fields:
5068 fields.remove(extra)
5070 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5071 name_idx = field_idx[constants.SF_NAME]
5073 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5074 data = self.rpc.call_storage_list(self.nodes,
5075 self.op.storage_type, st_args,
5076 self.op.name, fields)
5080 for node in utils.NiceSort(self.nodes):
5081 nresult = data[node]
5085 msg = nresult.fail_msg
5087 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5090 rows = dict([(row[name_idx], row) for row in nresult.payload])
5092 for name in utils.NiceSort(rows.keys()):
5097 for field in self.op.output_fields:
5098 if field == constants.SF_NODE:
5100 elif field == constants.SF_TYPE:
5101 val = self.op.storage_type
5102 elif field in field_idx:
5103 val = row[field_idx[field]]
5105 raise errors.ParameterError(field)
5114 class _InstanceQuery(_QueryBase):
5115 FIELDS = query.INSTANCE_FIELDS
5117 def ExpandNames(self, lu):
5118 lu.needed_locks = {}
5119 lu.share_locks = _ShareAll()
5122 self.wanted = _GetWantedInstances(lu, self.names)
5124 self.wanted = locking.ALL_SET
5126 self.do_locking = (self.use_locking and
5127 query.IQ_LIVE in self.requested_data)
5129 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5130 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5131 lu.needed_locks[locking.LEVEL_NODE] = []
5132 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5134 self.do_grouplocks = (self.do_locking and
5135 query.IQ_NODES in self.requested_data)
5137 def DeclareLocks(self, lu, level):
5139 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5140 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5142 # Lock all groups used by instances optimistically; this requires going
5143 # via the node before it's locked, requiring verification later on
5144 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5146 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5147 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5148 elif level == locking.LEVEL_NODE:
5149 lu._LockInstancesNodes() # pylint: disable=W0212
5152 def _CheckGroupLocks(lu):
5153 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5154 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5156 # Check if node groups for locked instances are still correct
5157 for instance_name in owned_instances:
5158 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5160 def _GetQueryData(self, lu):
5161 """Computes the list of instances and their attributes.
5164 if self.do_grouplocks:
5165 self._CheckGroupLocks(lu)
5167 cluster = lu.cfg.GetClusterInfo()
5168 all_info = lu.cfg.GetAllInstancesInfo()
5170 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5172 instance_list = [all_info[name] for name in instance_names]
5173 nodes = frozenset(itertools.chain(*(inst.all_nodes
5174 for inst in instance_list)))
5175 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5178 wrongnode_inst = set()
5180 # Gather data as requested
5181 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5183 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5185 result = node_data[name]
5187 # offline nodes will be in both lists
5188 assert result.fail_msg
5189 offline_nodes.append(name)
5191 bad_nodes.append(name)
5192 elif result.payload:
5193 for inst in result.payload:
5194 if inst in all_info:
5195 if all_info[inst].primary_node == name:
5196 live_data.update(result.payload)
5198 wrongnode_inst.add(inst)
5200 # orphan instance; we don't list it here as we don't
5201 # handle this case yet in the output of instance listing
5202 logging.warning("Orphan instance '%s' found on node %s",
5204 # else no instance is alive
5208 if query.IQ_DISKUSAGE in self.requested_data:
5209 disk_usage = dict((inst.name,
5210 _ComputeDiskSize(inst.disk_template,
5211 [{constants.IDISK_SIZE: disk.size}
5212 for disk in inst.disks]))
5213 for inst in instance_list)
5217 if query.IQ_CONSOLE in self.requested_data:
5219 for inst in instance_list:
5220 if inst.name in live_data:
5221 # Instance is running
5222 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5224 consinfo[inst.name] = None
5225 assert set(consinfo.keys()) == set(instance_names)
5229 if query.IQ_NODES in self.requested_data:
5230 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5232 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5233 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5234 for uuid in set(map(operator.attrgetter("group"),
5240 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5241 disk_usage, offline_nodes, bad_nodes,
5242 live_data, wrongnode_inst, consinfo,
5246 class LUQuery(NoHooksLU):
5247 """Query for resources/items of a certain kind.
5250 # pylint: disable=W0142
5253 def CheckArguments(self):
5254 qcls = _GetQueryImplementation(self.op.what)
5256 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5258 def ExpandNames(self):
5259 self.impl.ExpandNames(self)
5261 def DeclareLocks(self, level):
5262 self.impl.DeclareLocks(self, level)
5264 def Exec(self, feedback_fn):
5265 return self.impl.NewStyleQuery(self)
5268 class LUQueryFields(NoHooksLU):
5269 """Query for resources/items of a certain kind.
5272 # pylint: disable=W0142
5275 def CheckArguments(self):
5276 self.qcls = _GetQueryImplementation(self.op.what)
5278 def ExpandNames(self):
5279 self.needed_locks = {}
5281 def Exec(self, feedback_fn):
5282 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5285 class LUNodeModifyStorage(NoHooksLU):
5286 """Logical unit for modifying a storage volume on a node.
5291 def CheckArguments(self):
5292 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5294 storage_type = self.op.storage_type
5297 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5299 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5300 " modified" % storage_type,
5303 diff = set(self.op.changes.keys()) - modifiable
5305 raise errors.OpPrereqError("The following fields can not be modified for"
5306 " storage units of type '%s': %r" %
5307 (storage_type, list(diff)),
5310 def ExpandNames(self):
5311 self.needed_locks = {
5312 locking.LEVEL_NODE: self.op.node_name,
5315 def Exec(self, feedback_fn):
5316 """Computes the list of nodes and their attributes.
5319 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5320 result = self.rpc.call_storage_modify(self.op.node_name,
5321 self.op.storage_type, st_args,
5322 self.op.name, self.op.changes)
5323 result.Raise("Failed to modify storage unit '%s' on %s" %
5324 (self.op.name, self.op.node_name))
5327 class LUNodeAdd(LogicalUnit):
5328 """Logical unit for adding node to the cluster.
5332 HTYPE = constants.HTYPE_NODE
5333 _NFLAGS = ["master_capable", "vm_capable"]
5335 def CheckArguments(self):
5336 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5337 # validate/normalize the node name
5338 self.hostname = netutils.GetHostname(name=self.op.node_name,
5339 family=self.primary_ip_family)
5340 self.op.node_name = self.hostname.name
5342 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5343 raise errors.OpPrereqError("Cannot readd the master node",
5346 if self.op.readd and self.op.group:
5347 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5348 " being readded", errors.ECODE_INVAL)
5350 def BuildHooksEnv(self):
5353 This will run on all nodes before, and on all nodes + the new node after.
5357 "OP_TARGET": self.op.node_name,
5358 "NODE_NAME": self.op.node_name,
5359 "NODE_PIP": self.op.primary_ip,
5360 "NODE_SIP": self.op.secondary_ip,
5361 "MASTER_CAPABLE": str(self.op.master_capable),
5362 "VM_CAPABLE": str(self.op.vm_capable),
5365 def BuildHooksNodes(self):
5366 """Build hooks nodes.
5369 # Exclude added node
5370 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5371 post_nodes = pre_nodes + [self.op.node_name, ]
5373 return (pre_nodes, post_nodes)
5375 def CheckPrereq(self):
5376 """Check prerequisites.
5379 - the new node is not already in the config
5381 - its parameters (single/dual homed) matches the cluster
5383 Any errors are signaled by raising errors.OpPrereqError.
5387 hostname = self.hostname
5388 node = hostname.name
5389 primary_ip = self.op.primary_ip = hostname.ip
5390 if self.op.secondary_ip is None:
5391 if self.primary_ip_family == netutils.IP6Address.family:
5392 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5393 " IPv4 address must be given as secondary",
5395 self.op.secondary_ip = primary_ip
5397 secondary_ip = self.op.secondary_ip
5398 if not netutils.IP4Address.IsValid(secondary_ip):
5399 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5400 " address" % secondary_ip, errors.ECODE_INVAL)
5402 node_list = cfg.GetNodeList()
5403 if not self.op.readd and node in node_list:
5404 raise errors.OpPrereqError("Node %s is already in the configuration" %
5405 node, errors.ECODE_EXISTS)
5406 elif self.op.readd and node not in node_list:
5407 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5410 self.changed_primary_ip = False
5412 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5413 if self.op.readd and node == existing_node_name:
5414 if existing_node.secondary_ip != secondary_ip:
5415 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5416 " address configuration as before",
5418 if existing_node.primary_ip != primary_ip:
5419 self.changed_primary_ip = True
5423 if (existing_node.primary_ip == primary_ip or
5424 existing_node.secondary_ip == primary_ip or
5425 existing_node.primary_ip == secondary_ip or
5426 existing_node.secondary_ip == secondary_ip):
5427 raise errors.OpPrereqError("New node ip address(es) conflict with"
5428 " existing node %s" % existing_node.name,
5429 errors.ECODE_NOTUNIQUE)
5431 # After this 'if' block, None is no longer a valid value for the
5432 # _capable op attributes
5434 old_node = self.cfg.GetNodeInfo(node)
5435 assert old_node is not None, "Can't retrieve locked node %s" % node
5436 for attr in self._NFLAGS:
5437 if getattr(self.op, attr) is None:
5438 setattr(self.op, attr, getattr(old_node, attr))
5440 for attr in self._NFLAGS:
5441 if getattr(self.op, attr) is None:
5442 setattr(self.op, attr, True)
5444 if self.op.readd and not self.op.vm_capable:
5445 pri, sec = cfg.GetNodeInstances(node)
5447 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5448 " flag set to false, but it already holds"
5449 " instances" % node,
5452 # check that the type of the node (single versus dual homed) is the
5453 # same as for the master
5454 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5455 master_singlehomed = myself.secondary_ip == myself.primary_ip
5456 newbie_singlehomed = secondary_ip == primary_ip
5457 if master_singlehomed != newbie_singlehomed:
5458 if master_singlehomed:
5459 raise errors.OpPrereqError("The master has no secondary ip but the"
5460 " new node has one",
5463 raise errors.OpPrereqError("The master has a secondary ip but the"
5464 " new node doesn't have one",
5467 # checks reachability
5468 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5469 raise errors.OpPrereqError("Node not reachable by ping",
5470 errors.ECODE_ENVIRON)
5472 if not newbie_singlehomed:
5473 # check reachability from my secondary ip to newbie's secondary ip
5474 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5475 source=myself.secondary_ip):
5476 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5477 " based ping to node daemon port",
5478 errors.ECODE_ENVIRON)
5485 if self.op.master_capable:
5486 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5488 self.master_candidate = False
5491 self.new_node = old_node
5493 node_group = cfg.LookupNodeGroup(self.op.group)
5494 self.new_node = objects.Node(name=node,
5495 primary_ip=primary_ip,
5496 secondary_ip=secondary_ip,
5497 master_candidate=self.master_candidate,
5498 offline=False, drained=False,
5501 if self.op.ndparams:
5502 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5504 if self.op.hv_state:
5505 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5507 if self.op.disk_state:
5508 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5510 def Exec(self, feedback_fn):
5511 """Adds the new node to the cluster.
5514 new_node = self.new_node
5515 node = new_node.name
5517 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5520 # We adding a new node so we assume it's powered
5521 new_node.powered = True
5523 # for re-adds, reset the offline/drained/master-candidate flags;
5524 # we need to reset here, otherwise offline would prevent RPC calls
5525 # later in the procedure; this also means that if the re-add
5526 # fails, we are left with a non-offlined, broken node
5528 new_node.drained = new_node.offline = False # pylint: disable=W0201
5529 self.LogInfo("Readding a node, the offline/drained flags were reset")
5530 # if we demote the node, we do cleanup later in the procedure
5531 new_node.master_candidate = self.master_candidate
5532 if self.changed_primary_ip:
5533 new_node.primary_ip = self.op.primary_ip
5535 # copy the master/vm_capable flags
5536 for attr in self._NFLAGS:
5537 setattr(new_node, attr, getattr(self.op, attr))
5539 # notify the user about any possible mc promotion
5540 if new_node.master_candidate:
5541 self.LogInfo("Node will be a master candidate")
5543 if self.op.ndparams:
5544 new_node.ndparams = self.op.ndparams
5546 new_node.ndparams = {}
5548 if self.op.hv_state:
5549 new_node.hv_state_static = self.new_hv_state
5551 if self.op.disk_state:
5552 new_node.disk_state_static = self.new_disk_state
5554 # check connectivity
5555 result = self.rpc.call_version([node])[node]
5556 result.Raise("Can't get version information from node %s" % node)
5557 if constants.PROTOCOL_VERSION == result.payload:
5558 logging.info("Communication to node %s fine, sw version %s match",
5559 node, result.payload)
5561 raise errors.OpExecError("Version mismatch master version %s,"
5562 " node version %s" %
5563 (constants.PROTOCOL_VERSION, result.payload))
5565 # Add node to our /etc/hosts, and add key to known_hosts
5566 if self.cfg.GetClusterInfo().modify_etc_hosts:
5567 master_node = self.cfg.GetMasterNode()
5568 result = self.rpc.call_etc_hosts_modify(master_node,
5569 constants.ETC_HOSTS_ADD,
5572 result.Raise("Can't update hosts file with new host data")
5574 if new_node.secondary_ip != new_node.primary_ip:
5575 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5578 node_verify_list = [self.cfg.GetMasterNode()]
5579 node_verify_param = {
5580 constants.NV_NODELIST: ([node], {}),
5581 # TODO: do a node-net-test as well?
5584 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5585 self.cfg.GetClusterName())
5586 for verifier in node_verify_list:
5587 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5588 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5590 for failed in nl_payload:
5591 feedback_fn("ssh/hostname verification failed"
5592 " (checking from %s): %s" %
5593 (verifier, nl_payload[failed]))
5594 raise errors.OpExecError("ssh/hostname verification failed")
5597 _RedistributeAncillaryFiles(self)
5598 self.context.ReaddNode(new_node)
5599 # make sure we redistribute the config
5600 self.cfg.Update(new_node, feedback_fn)
5601 # and make sure the new node will not have old files around
5602 if not new_node.master_candidate:
5603 result = self.rpc.call_node_demote_from_mc(new_node.name)
5604 msg = result.fail_msg
5606 self.LogWarning("Node failed to demote itself from master"
5607 " candidate status: %s" % msg)
5609 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5610 additional_vm=self.op.vm_capable)
5611 self.context.AddNode(new_node, self.proc.GetECId())
5614 class LUNodeSetParams(LogicalUnit):
5615 """Modifies the parameters of a node.
5617 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5618 to the node role (as _ROLE_*)
5619 @cvar _R2F: a dictionary from node role to tuples of flags
5620 @cvar _FLAGS: a list of attribute names corresponding to the flags
5623 HPATH = "node-modify"
5624 HTYPE = constants.HTYPE_NODE
5626 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5628 (True, False, False): _ROLE_CANDIDATE,
5629 (False, True, False): _ROLE_DRAINED,
5630 (False, False, True): _ROLE_OFFLINE,
5631 (False, False, False): _ROLE_REGULAR,
5633 _R2F = dict((v, k) for k, v in _F2R.items())
5634 _FLAGS = ["master_candidate", "drained", "offline"]
5636 def CheckArguments(self):
5637 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5638 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5639 self.op.master_capable, self.op.vm_capable,
5640 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5642 if all_mods.count(None) == len(all_mods):
5643 raise errors.OpPrereqError("Please pass at least one modification",
5645 if all_mods.count(True) > 1:
5646 raise errors.OpPrereqError("Can't set the node into more than one"
5647 " state at the same time",
5650 # Boolean value that tells us whether we might be demoting from MC
5651 self.might_demote = (self.op.master_candidate == False or
5652 self.op.offline == True or
5653 self.op.drained == True or
5654 self.op.master_capable == False)
5656 if self.op.secondary_ip:
5657 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5658 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5659 " address" % self.op.secondary_ip,
5662 self.lock_all = self.op.auto_promote and self.might_demote
5663 self.lock_instances = self.op.secondary_ip is not None
5665 def _InstanceFilter(self, instance):
5666 """Filter for getting affected instances.
5669 return (instance.disk_template in constants.DTS_INT_MIRROR and
5670 self.op.node_name in instance.all_nodes)
5672 def ExpandNames(self):
5674 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5676 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5678 # Since modifying a node can have severe effects on currently running
5679 # operations the resource lock is at least acquired in shared mode
5680 self.needed_locks[locking.LEVEL_NODE_RES] = \
5681 self.needed_locks[locking.LEVEL_NODE]
5683 # Get node resource and instance locks in shared mode; they are not used
5684 # for anything but read-only access
5685 self.share_locks[locking.LEVEL_NODE_RES] = 1
5686 self.share_locks[locking.LEVEL_INSTANCE] = 1
5688 if self.lock_instances:
5689 self.needed_locks[locking.LEVEL_INSTANCE] = \
5690 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5692 def BuildHooksEnv(self):
5695 This runs on the master node.
5699 "OP_TARGET": self.op.node_name,
5700 "MASTER_CANDIDATE": str(self.op.master_candidate),
5701 "OFFLINE": str(self.op.offline),
5702 "DRAINED": str(self.op.drained),
5703 "MASTER_CAPABLE": str(self.op.master_capable),
5704 "VM_CAPABLE": str(self.op.vm_capable),
5707 def BuildHooksNodes(self):
5708 """Build hooks nodes.
5711 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5714 def CheckPrereq(self):
5715 """Check prerequisites.
5717 This only checks the instance list against the existing names.
5720 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5722 if self.lock_instances:
5723 affected_instances = \
5724 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5726 # Verify instance locks
5727 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5728 wanted_instances = frozenset(affected_instances.keys())
5729 if wanted_instances - owned_instances:
5730 raise errors.OpPrereqError("Instances affected by changing node %s's"
5731 " secondary IP address have changed since"
5732 " locks were acquired, wanted '%s', have"
5733 " '%s'; retry the operation" %
5735 utils.CommaJoin(wanted_instances),
5736 utils.CommaJoin(owned_instances)),
5739 affected_instances = None
5741 if (self.op.master_candidate is not None or
5742 self.op.drained is not None or
5743 self.op.offline is not None):
5744 # we can't change the master's node flags
5745 if self.op.node_name == self.cfg.GetMasterNode():
5746 raise errors.OpPrereqError("The master role can be changed"
5747 " only via master-failover",
5750 if self.op.master_candidate and not node.master_capable:
5751 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5752 " it a master candidate" % node.name,
5755 if self.op.vm_capable == False:
5756 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5758 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5759 " the vm_capable flag" % node.name,
5762 if node.master_candidate and self.might_demote and not self.lock_all:
5763 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5764 # check if after removing the current node, we're missing master
5766 (mc_remaining, mc_should, _) = \
5767 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5768 if mc_remaining < mc_should:
5769 raise errors.OpPrereqError("Not enough master candidates, please"
5770 " pass auto promote option to allow"
5771 " promotion", errors.ECODE_STATE)
5773 self.old_flags = old_flags = (node.master_candidate,
5774 node.drained, node.offline)
5775 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5776 self.old_role = old_role = self._F2R[old_flags]
5778 # Check for ineffective changes
5779 for attr in self._FLAGS:
5780 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5781 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5782 setattr(self.op, attr, None)
5784 # Past this point, any flag change to False means a transition
5785 # away from the respective state, as only real changes are kept
5787 # TODO: We might query the real power state if it supports OOB
5788 if _SupportsOob(self.cfg, node):
5789 if self.op.offline is False and not (node.powered or
5790 self.op.powered == True):
5791 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5792 " offline status can be reset") %
5794 elif self.op.powered is not None:
5795 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5796 " as it does not support out-of-band"
5797 " handling") % self.op.node_name)
5799 # If we're being deofflined/drained, we'll MC ourself if needed
5800 if (self.op.drained == False or self.op.offline == False or
5801 (self.op.master_capable and not node.master_capable)):
5802 if _DecideSelfPromotion(self):
5803 self.op.master_candidate = True
5804 self.LogInfo("Auto-promoting node to master candidate")
5806 # If we're no longer master capable, we'll demote ourselves from MC
5807 if self.op.master_capable == False and node.master_candidate:
5808 self.LogInfo("Demoting from master candidate")
5809 self.op.master_candidate = False
5812 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5813 if self.op.master_candidate:
5814 new_role = self._ROLE_CANDIDATE
5815 elif self.op.drained:
5816 new_role = self._ROLE_DRAINED
5817 elif self.op.offline:
5818 new_role = self._ROLE_OFFLINE
5819 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5820 # False is still in new flags, which means we're un-setting (the
5822 new_role = self._ROLE_REGULAR
5823 else: # no new flags, nothing, keep old role
5826 self.new_role = new_role
5828 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5829 # Trying to transition out of offline status
5830 # TODO: Use standard RPC runner, but make sure it works when the node is
5831 # still marked offline
5832 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5834 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5835 " to report its version: %s" %
5836 (node.name, result.fail_msg),
5839 self.LogWarning("Transitioning node from offline to online state"
5840 " without using re-add. Please make sure the node"
5843 if self.op.secondary_ip:
5844 # Ok even without locking, because this can't be changed by any LU
5845 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5846 master_singlehomed = master.secondary_ip == master.primary_ip
5847 if master_singlehomed and self.op.secondary_ip:
5848 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5849 " homed cluster", errors.ECODE_INVAL)
5851 assert not (frozenset(affected_instances) -
5852 self.owned_locks(locking.LEVEL_INSTANCE))
5855 if affected_instances:
5856 raise errors.OpPrereqError("Cannot change secondary IP address:"
5857 " offline node has instances (%s)"
5858 " configured to use it" %
5859 utils.CommaJoin(affected_instances.keys()))
5861 # On online nodes, check that no instances are running, and that
5862 # the node has the new ip and we can reach it.
5863 for instance in affected_instances.values():
5864 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5865 msg="cannot change secondary ip")
5867 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5868 if master.name != node.name:
5869 # check reachability from master secondary ip to new secondary ip
5870 if not netutils.TcpPing(self.op.secondary_ip,
5871 constants.DEFAULT_NODED_PORT,
5872 source=master.secondary_ip):
5873 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5874 " based ping to node daemon port",
5875 errors.ECODE_ENVIRON)
5877 if self.op.ndparams:
5878 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5879 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5880 self.new_ndparams = new_ndparams
5882 if self.op.hv_state:
5883 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5884 self.node.hv_state_static)
5886 if self.op.disk_state:
5887 self.new_disk_state = \
5888 _MergeAndVerifyDiskState(self.op.disk_state,
5889 self.node.disk_state_static)
5891 def Exec(self, feedback_fn):
5896 old_role = self.old_role
5897 new_role = self.new_role
5901 if self.op.ndparams:
5902 node.ndparams = self.new_ndparams
5904 if self.op.powered is not None:
5905 node.powered = self.op.powered
5907 if self.op.hv_state:
5908 node.hv_state_static = self.new_hv_state
5910 if self.op.disk_state:
5911 node.disk_state_static = self.new_disk_state
5913 for attr in ["master_capable", "vm_capable"]:
5914 val = getattr(self.op, attr)
5916 setattr(node, attr, val)
5917 result.append((attr, str(val)))
5919 if new_role != old_role:
5920 # Tell the node to demote itself, if no longer MC and not offline
5921 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5922 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5924 self.LogWarning("Node failed to demote itself: %s", msg)
5926 new_flags = self._R2F[new_role]
5927 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5929 result.append((desc, str(nf)))
5930 (node.master_candidate, node.drained, node.offline) = new_flags
5932 # we locked all nodes, we adjust the CP before updating this node
5934 _AdjustCandidatePool(self, [node.name])
5936 if self.op.secondary_ip:
5937 node.secondary_ip = self.op.secondary_ip
5938 result.append(("secondary_ip", self.op.secondary_ip))
5940 # this will trigger configuration file update, if needed
5941 self.cfg.Update(node, feedback_fn)
5943 # this will trigger job queue propagation or cleanup if the mc
5945 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5946 self.context.ReaddNode(node)
5951 class LUNodePowercycle(NoHooksLU):
5952 """Powercycles a node.
5957 def CheckArguments(self):
5958 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5959 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5960 raise errors.OpPrereqError("The node is the master and the force"
5961 " parameter was not set",
5964 def ExpandNames(self):
5965 """Locking for PowercycleNode.
5967 This is a last-resort option and shouldn't block on other
5968 jobs. Therefore, we grab no locks.
5971 self.needed_locks = {}
5973 def Exec(self, feedback_fn):
5977 result = self.rpc.call_node_powercycle(self.op.node_name,
5978 self.cfg.GetHypervisorType())
5979 result.Raise("Failed to schedule the reboot")
5980 return result.payload
5983 class LUClusterQuery(NoHooksLU):
5984 """Query cluster configuration.
5989 def ExpandNames(self):
5990 self.needed_locks = {}
5992 def Exec(self, feedback_fn):
5993 """Return cluster config.
5996 cluster = self.cfg.GetClusterInfo()
5999 # Filter just for enabled hypervisors
6000 for os_name, hv_dict in cluster.os_hvp.items():
6001 os_hvp[os_name] = {}
6002 for hv_name, hv_params in hv_dict.items():
6003 if hv_name in cluster.enabled_hypervisors:
6004 os_hvp[os_name][hv_name] = hv_params
6006 # Convert ip_family to ip_version
6007 primary_ip_version = constants.IP4_VERSION
6008 if cluster.primary_ip_family == netutils.IP6Address.family:
6009 primary_ip_version = constants.IP6_VERSION
6012 "software_version": constants.RELEASE_VERSION,
6013 "protocol_version": constants.PROTOCOL_VERSION,
6014 "config_version": constants.CONFIG_VERSION,
6015 "os_api_version": max(constants.OS_API_VERSIONS),
6016 "export_version": constants.EXPORT_VERSION,
6017 "architecture": (platform.architecture()[0], platform.machine()),
6018 "name": cluster.cluster_name,
6019 "master": cluster.master_node,
6020 "default_hypervisor": cluster.primary_hypervisor,
6021 "enabled_hypervisors": cluster.enabled_hypervisors,
6022 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6023 for hypervisor_name in cluster.enabled_hypervisors]),
6025 "beparams": cluster.beparams,
6026 "osparams": cluster.osparams,
6027 "ipolicy": cluster.ipolicy,
6028 "nicparams": cluster.nicparams,
6029 "ndparams": cluster.ndparams,
6030 "candidate_pool_size": cluster.candidate_pool_size,
6031 "master_netdev": cluster.master_netdev,
6032 "master_netmask": cluster.master_netmask,
6033 "use_external_mip_script": cluster.use_external_mip_script,
6034 "volume_group_name": cluster.volume_group_name,
6035 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6036 "file_storage_dir": cluster.file_storage_dir,
6037 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6038 "maintain_node_health": cluster.maintain_node_health,
6039 "ctime": cluster.ctime,
6040 "mtime": cluster.mtime,
6041 "uuid": cluster.uuid,
6042 "tags": list(cluster.GetTags()),
6043 "uid_pool": cluster.uid_pool,
6044 "default_iallocator": cluster.default_iallocator,
6045 "reserved_lvs": cluster.reserved_lvs,
6046 "primary_ip_version": primary_ip_version,
6047 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6048 "hidden_os": cluster.hidden_os,
6049 "blacklisted_os": cluster.blacklisted_os,
6055 class LUClusterConfigQuery(NoHooksLU):
6056 """Return configuration values.
6060 _FIELDS_DYNAMIC = utils.FieldSet()
6061 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6062 "watcher_pause", "volume_group_name")
6064 def CheckArguments(self):
6065 _CheckOutputFields(static=self._FIELDS_STATIC,
6066 dynamic=self._FIELDS_DYNAMIC,
6067 selected=self.op.output_fields)
6069 def ExpandNames(self):
6070 self.needed_locks = {}
6072 def Exec(self, feedback_fn):
6073 """Dump a representation of the cluster config to the standard output.
6077 for field in self.op.output_fields:
6078 if field == "cluster_name":
6079 entry = self.cfg.GetClusterName()
6080 elif field == "master_node":
6081 entry = self.cfg.GetMasterNode()
6082 elif field == "drain_flag":
6083 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6084 elif field == "watcher_pause":
6085 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6086 elif field == "volume_group_name":
6087 entry = self.cfg.GetVGName()
6089 raise errors.ParameterError(field)
6090 values.append(entry)
6094 class LUInstanceActivateDisks(NoHooksLU):
6095 """Bring up an instance's disks.
6100 def ExpandNames(self):
6101 self._ExpandAndLockInstance()
6102 self.needed_locks[locking.LEVEL_NODE] = []
6103 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6105 def DeclareLocks(self, level):
6106 if level == locking.LEVEL_NODE:
6107 self._LockInstancesNodes()
6109 def CheckPrereq(self):
6110 """Check prerequisites.
6112 This checks that the instance is in the cluster.
6115 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6116 assert self.instance is not None, \
6117 "Cannot retrieve locked instance %s" % self.op.instance_name
6118 _CheckNodeOnline(self, self.instance.primary_node)
6120 def Exec(self, feedback_fn):
6121 """Activate the disks.
6124 disks_ok, disks_info = \
6125 _AssembleInstanceDisks(self, self.instance,
6126 ignore_size=self.op.ignore_size)
6128 raise errors.OpExecError("Cannot activate block devices")
6133 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6135 """Prepare the block devices for an instance.
6137 This sets up the block devices on all nodes.
6139 @type lu: L{LogicalUnit}
6140 @param lu: the logical unit on whose behalf we execute
6141 @type instance: L{objects.Instance}
6142 @param instance: the instance for whose disks we assemble
6143 @type disks: list of L{objects.Disk} or None
6144 @param disks: which disks to assemble (or all, if None)
6145 @type ignore_secondaries: boolean
6146 @param ignore_secondaries: if true, errors on secondary nodes
6147 won't result in an error return from the function
6148 @type ignore_size: boolean
6149 @param ignore_size: if true, the current known size of the disk
6150 will not be used during the disk activation, useful for cases
6151 when the size is wrong
6152 @return: False if the operation failed, otherwise a list of
6153 (host, instance_visible_name, node_visible_name)
6154 with the mapping from node devices to instance devices
6159 iname = instance.name
6160 disks = _ExpandCheckDisks(instance, disks)
6162 # With the two passes mechanism we try to reduce the window of
6163 # opportunity for the race condition of switching DRBD to primary
6164 # before handshaking occured, but we do not eliminate it
6166 # The proper fix would be to wait (with some limits) until the
6167 # connection has been made and drbd transitions from WFConnection
6168 # into any other network-connected state (Connected, SyncTarget,
6171 # 1st pass, assemble on all nodes in secondary mode
6172 for idx, inst_disk in enumerate(disks):
6173 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6175 node_disk = node_disk.Copy()
6176 node_disk.UnsetSize()
6177 lu.cfg.SetDiskID(node_disk, node)
6178 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6179 msg = result.fail_msg
6181 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6182 " (is_primary=False, pass=1): %s",
6183 inst_disk.iv_name, node, msg)
6184 if not ignore_secondaries:
6187 # FIXME: race condition on drbd migration to primary
6189 # 2nd pass, do only the primary node
6190 for idx, inst_disk in enumerate(disks):
6193 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6194 if node != instance.primary_node:
6197 node_disk = node_disk.Copy()
6198 node_disk.UnsetSize()
6199 lu.cfg.SetDiskID(node_disk, node)
6200 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6201 msg = result.fail_msg
6203 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6204 " (is_primary=True, pass=2): %s",
6205 inst_disk.iv_name, node, msg)
6208 dev_path = result.payload
6210 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6212 # leave the disks configured for the primary node
6213 # this is a workaround that would be fixed better by
6214 # improving the logical/physical id handling
6216 lu.cfg.SetDiskID(disk, instance.primary_node)
6218 return disks_ok, device_info
6221 def _StartInstanceDisks(lu, instance, force):
6222 """Start the disks of an instance.
6225 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6226 ignore_secondaries=force)
6228 _ShutdownInstanceDisks(lu, instance)
6229 if force is not None and not force:
6230 lu.proc.LogWarning("", hint="If the message above refers to a"
6232 " you can retry the operation using '--force'.")
6233 raise errors.OpExecError("Disk consistency error")
6236 class LUInstanceDeactivateDisks(NoHooksLU):
6237 """Shutdown an instance's disks.
6242 def ExpandNames(self):
6243 self._ExpandAndLockInstance()
6244 self.needed_locks[locking.LEVEL_NODE] = []
6245 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6247 def DeclareLocks(self, level):
6248 if level == locking.LEVEL_NODE:
6249 self._LockInstancesNodes()
6251 def CheckPrereq(self):
6252 """Check prerequisites.
6254 This checks that the instance is in the cluster.
6257 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258 assert self.instance is not None, \
6259 "Cannot retrieve locked instance %s" % self.op.instance_name
6261 def Exec(self, feedback_fn):
6262 """Deactivate the disks
6265 instance = self.instance
6267 _ShutdownInstanceDisks(self, instance)
6269 _SafeShutdownInstanceDisks(self, instance)
6272 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6273 """Shutdown block devices of an instance.
6275 This function checks if an instance is running, before calling
6276 _ShutdownInstanceDisks.
6279 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6280 _ShutdownInstanceDisks(lu, instance, disks=disks)
6283 def _ExpandCheckDisks(instance, disks):
6284 """Return the instance disks selected by the disks list
6286 @type disks: list of L{objects.Disk} or None
6287 @param disks: selected disks
6288 @rtype: list of L{objects.Disk}
6289 @return: selected instance disks to act on
6293 return instance.disks
6295 if not set(disks).issubset(instance.disks):
6296 raise errors.ProgrammerError("Can only act on disks belonging to the"
6301 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6302 """Shutdown block devices of an instance.
6304 This does the shutdown on all nodes of the instance.
6306 If the ignore_primary is false, errors on the primary node are
6311 disks = _ExpandCheckDisks(instance, disks)
6314 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6315 lu.cfg.SetDiskID(top_disk, node)
6316 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6317 msg = result.fail_msg
6319 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6320 disk.iv_name, node, msg)
6321 if ((node == instance.primary_node and not ignore_primary) or
6322 (node != instance.primary_node and not result.offline)):
6327 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6328 """Checks if a node has enough free memory.
6330 This function check if a given node has the needed amount of free
6331 memory. In case the node has less memory or we cannot get the
6332 information from the node, this function raise an OpPrereqError
6335 @type lu: C{LogicalUnit}
6336 @param lu: a logical unit from which we get configuration data
6338 @param node: the node to check
6339 @type reason: C{str}
6340 @param reason: string to use in the error message
6341 @type requested: C{int}
6342 @param requested: the amount of memory in MiB to check for
6343 @type hypervisor_name: C{str}
6344 @param hypervisor_name: the hypervisor to ask for memory stats
6345 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6346 we cannot check the node
6349 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6350 nodeinfo[node].Raise("Can't get data from node %s" % node,
6351 prereq=True, ecode=errors.ECODE_ENVIRON)
6352 (_, _, (hv_info, )) = nodeinfo[node].payload
6354 free_mem = hv_info.get("memory_free", None)
6355 if not isinstance(free_mem, int):
6356 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6357 " was '%s'" % (node, free_mem),
6358 errors.ECODE_ENVIRON)
6359 if requested > free_mem:
6360 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6361 " needed %s MiB, available %s MiB" %
6362 (node, reason, requested, free_mem),
6366 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6367 """Checks if nodes have enough free disk space in the all VGs.
6369 This function check if all given nodes have the needed amount of
6370 free disk. In case any node has less disk or we cannot get the
6371 information from the node, this function raise an OpPrereqError
6374 @type lu: C{LogicalUnit}
6375 @param lu: a logical unit from which we get configuration data
6376 @type nodenames: C{list}
6377 @param nodenames: the list of node names to check
6378 @type req_sizes: C{dict}
6379 @param req_sizes: the hash of vg and corresponding amount of disk in
6381 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6382 or we cannot check the node
6385 for vg, req_size in req_sizes.items():
6386 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6389 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6390 """Checks if nodes have enough free disk space in the specified VG.
6392 This function check if all given nodes have the needed amount of
6393 free disk. In case any node has less disk or we cannot get the
6394 information from the node, this function raise an OpPrereqError
6397 @type lu: C{LogicalUnit}
6398 @param lu: a logical unit from which we get configuration data
6399 @type nodenames: C{list}
6400 @param nodenames: the list of node names to check
6402 @param vg: the volume group to check
6403 @type requested: C{int}
6404 @param requested: the amount of disk in MiB to check for
6405 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6406 or we cannot check the node
6409 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6410 for node in nodenames:
6411 info = nodeinfo[node]
6412 info.Raise("Cannot get current information from node %s" % node,
6413 prereq=True, ecode=errors.ECODE_ENVIRON)
6414 (_, (vg_info, ), _) = info.payload
6415 vg_free = vg_info.get("vg_free", None)
6416 if not isinstance(vg_free, int):
6417 raise errors.OpPrereqError("Can't compute free disk space on node"
6418 " %s for vg %s, result was '%s'" %
6419 (node, vg, vg_free), errors.ECODE_ENVIRON)
6420 if requested > vg_free:
6421 raise errors.OpPrereqError("Not enough disk space on target node %s"
6422 " vg %s: required %d MiB, available %d MiB" %
6423 (node, vg, requested, vg_free),
6427 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6428 """Checks if nodes have enough physical CPUs
6430 This function checks if all given nodes have the needed number of
6431 physical CPUs. In case any node has less CPUs or we cannot get the
6432 information from the node, this function raises an OpPrereqError
6435 @type lu: C{LogicalUnit}
6436 @param lu: a logical unit from which we get configuration data
6437 @type nodenames: C{list}
6438 @param nodenames: the list of node names to check
6439 @type requested: C{int}
6440 @param requested: the minimum acceptable number of physical CPUs
6441 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6442 or we cannot check the node
6445 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6446 for node in nodenames:
6447 info = nodeinfo[node]
6448 info.Raise("Cannot get current information from node %s" % node,
6449 prereq=True, ecode=errors.ECODE_ENVIRON)
6450 (_, _, (hv_info, )) = info.payload
6451 num_cpus = hv_info.get("cpu_total", None)
6452 if not isinstance(num_cpus, int):
6453 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6454 " on node %s, result was '%s'" %
6455 (node, num_cpus), errors.ECODE_ENVIRON)
6456 if requested > num_cpus:
6457 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6458 "required" % (node, num_cpus, requested),
6462 class LUInstanceStartup(LogicalUnit):
6463 """Starts an instance.
6466 HPATH = "instance-start"
6467 HTYPE = constants.HTYPE_INSTANCE
6470 def CheckArguments(self):
6472 if self.op.beparams:
6473 # fill the beparams dict
6474 objects.UpgradeBeParams(self.op.beparams)
6475 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6477 def ExpandNames(self):
6478 self._ExpandAndLockInstance()
6480 def BuildHooksEnv(self):
6483 This runs on master, primary and secondary nodes of the instance.
6487 "FORCE": self.op.force,
6490 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6494 def BuildHooksNodes(self):
6495 """Build hooks nodes.
6498 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6501 def CheckPrereq(self):
6502 """Check prerequisites.
6504 This checks that the instance is in the cluster.
6507 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6508 assert self.instance is not None, \
6509 "Cannot retrieve locked instance %s" % self.op.instance_name
6512 if self.op.hvparams:
6513 # check hypervisor parameter syntax (locally)
6514 cluster = self.cfg.GetClusterInfo()
6515 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6516 filled_hvp = cluster.FillHV(instance)
6517 filled_hvp.update(self.op.hvparams)
6518 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6519 hv_type.CheckParameterSyntax(filled_hvp)
6520 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6522 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6524 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6526 if self.primary_offline and self.op.ignore_offline_nodes:
6527 self.proc.LogWarning("Ignoring offline primary node")
6529 if self.op.hvparams or self.op.beparams:
6530 self.proc.LogWarning("Overridden parameters are ignored")
6532 _CheckNodeOnline(self, instance.primary_node)
6534 bep = self.cfg.GetClusterInfo().FillBE(instance)
6535 bep.update(self.op.beparams)
6537 # check bridges existence
6538 _CheckInstanceBridgesExist(self, instance)
6540 remote_info = self.rpc.call_instance_info(instance.primary_node,
6542 instance.hypervisor)
6543 remote_info.Raise("Error checking node %s" % instance.primary_node,
6544 prereq=True, ecode=errors.ECODE_ENVIRON)
6545 if not remote_info.payload: # not running already
6546 _CheckNodeFreeMemory(self, instance.primary_node,
6547 "starting instance %s" % instance.name,
6548 bep[constants.BE_MAXMEM], instance.hypervisor)
6550 def Exec(self, feedback_fn):
6551 """Start the instance.
6554 instance = self.instance
6555 force = self.op.force
6557 if not self.op.no_remember:
6558 self.cfg.MarkInstanceUp(instance.name)
6560 if self.primary_offline:
6561 assert self.op.ignore_offline_nodes
6562 self.proc.LogInfo("Primary node offline, marked instance as started")
6564 node_current = instance.primary_node
6566 _StartInstanceDisks(self, instance, force)
6569 self.rpc.call_instance_start(node_current,
6570 (instance, self.op.hvparams,
6572 self.op.startup_paused)
6573 msg = result.fail_msg
6575 _ShutdownInstanceDisks(self, instance)
6576 raise errors.OpExecError("Could not start instance: %s" % msg)
6579 class LUInstanceReboot(LogicalUnit):
6580 """Reboot an instance.
6583 HPATH = "instance-reboot"
6584 HTYPE = constants.HTYPE_INSTANCE
6587 def ExpandNames(self):
6588 self._ExpandAndLockInstance()
6590 def BuildHooksEnv(self):
6593 This runs on master, primary and secondary nodes of the instance.
6597 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6598 "REBOOT_TYPE": self.op.reboot_type,
6599 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6602 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6606 def BuildHooksNodes(self):
6607 """Build hooks nodes.
6610 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6613 def CheckPrereq(self):
6614 """Check prerequisites.
6616 This checks that the instance is in the cluster.
6619 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6620 assert self.instance is not None, \
6621 "Cannot retrieve locked instance %s" % self.op.instance_name
6622 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6623 _CheckNodeOnline(self, instance.primary_node)
6625 # check bridges existence
6626 _CheckInstanceBridgesExist(self, instance)
6628 def Exec(self, feedback_fn):
6629 """Reboot the instance.
6632 instance = self.instance
6633 ignore_secondaries = self.op.ignore_secondaries
6634 reboot_type = self.op.reboot_type
6636 remote_info = self.rpc.call_instance_info(instance.primary_node,
6638 instance.hypervisor)
6639 remote_info.Raise("Error checking node %s" % instance.primary_node)
6640 instance_running = bool(remote_info.payload)
6642 node_current = instance.primary_node
6644 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6645 constants.INSTANCE_REBOOT_HARD]:
6646 for disk in instance.disks:
6647 self.cfg.SetDiskID(disk, node_current)
6648 result = self.rpc.call_instance_reboot(node_current, instance,
6650 self.op.shutdown_timeout)
6651 result.Raise("Could not reboot instance")
6653 if instance_running:
6654 result = self.rpc.call_instance_shutdown(node_current, instance,
6655 self.op.shutdown_timeout)
6656 result.Raise("Could not shutdown instance for full reboot")
6657 _ShutdownInstanceDisks(self, instance)
6659 self.LogInfo("Instance %s was already stopped, starting now",
6661 _StartInstanceDisks(self, instance, ignore_secondaries)
6662 result = self.rpc.call_instance_start(node_current,
6663 (instance, None, None), False)
6664 msg = result.fail_msg
6666 _ShutdownInstanceDisks(self, instance)
6667 raise errors.OpExecError("Could not start instance for"
6668 " full reboot: %s" % msg)
6670 self.cfg.MarkInstanceUp(instance.name)
6673 class LUInstanceShutdown(LogicalUnit):
6674 """Shutdown an instance.
6677 HPATH = "instance-stop"
6678 HTYPE = constants.HTYPE_INSTANCE
6681 def ExpandNames(self):
6682 self._ExpandAndLockInstance()
6684 def BuildHooksEnv(self):
6687 This runs on master, primary and secondary nodes of the instance.
6690 env = _BuildInstanceHookEnvByObject(self, self.instance)
6691 env["TIMEOUT"] = self.op.timeout
6694 def BuildHooksNodes(self):
6695 """Build hooks nodes.
6698 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6701 def CheckPrereq(self):
6702 """Check prerequisites.
6704 This checks that the instance is in the cluster.
6707 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6708 assert self.instance is not None, \
6709 "Cannot retrieve locked instance %s" % self.op.instance_name
6711 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6713 self.primary_offline = \
6714 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6716 if self.primary_offline and self.op.ignore_offline_nodes:
6717 self.proc.LogWarning("Ignoring offline primary node")
6719 _CheckNodeOnline(self, self.instance.primary_node)
6721 def Exec(self, feedback_fn):
6722 """Shutdown the instance.
6725 instance = self.instance
6726 node_current = instance.primary_node
6727 timeout = self.op.timeout
6729 if not self.op.no_remember:
6730 self.cfg.MarkInstanceDown(instance.name)
6732 if self.primary_offline:
6733 assert self.op.ignore_offline_nodes
6734 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6736 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6737 msg = result.fail_msg
6739 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6741 _ShutdownInstanceDisks(self, instance)
6744 class LUInstanceReinstall(LogicalUnit):
6745 """Reinstall an instance.
6748 HPATH = "instance-reinstall"
6749 HTYPE = constants.HTYPE_INSTANCE
6752 def ExpandNames(self):
6753 self._ExpandAndLockInstance()
6755 def BuildHooksEnv(self):
6758 This runs on master, primary and secondary nodes of the instance.
6761 return _BuildInstanceHookEnvByObject(self, self.instance)
6763 def BuildHooksNodes(self):
6764 """Build hooks nodes.
6767 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6770 def CheckPrereq(self):
6771 """Check prerequisites.
6773 This checks that the instance is in the cluster and is not running.
6776 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6777 assert instance is not None, \
6778 "Cannot retrieve locked instance %s" % self.op.instance_name
6779 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6780 " offline, cannot reinstall")
6781 for node in instance.secondary_nodes:
6782 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6783 " cannot reinstall")
6785 if instance.disk_template == constants.DT_DISKLESS:
6786 raise errors.OpPrereqError("Instance '%s' has no disks" %
6787 self.op.instance_name,
6789 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6791 if self.op.os_type is not None:
6793 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6794 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6795 instance_os = self.op.os_type
6797 instance_os = instance.os
6799 nodelist = list(instance.all_nodes)
6801 if self.op.osparams:
6802 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6803 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6804 self.os_inst = i_osdict # the new dict (without defaults)
6808 self.instance = instance
6810 def Exec(self, feedback_fn):
6811 """Reinstall the instance.
6814 inst = self.instance
6816 if self.op.os_type is not None:
6817 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6818 inst.os = self.op.os_type
6819 # Write to configuration
6820 self.cfg.Update(inst, feedback_fn)
6822 _StartInstanceDisks(self, inst, None)
6824 feedback_fn("Running the instance OS create scripts...")
6825 # FIXME: pass debug option from opcode to backend
6826 result = self.rpc.call_instance_os_add(inst.primary_node,
6827 (inst, self.os_inst), True,
6828 self.op.debug_level)
6829 result.Raise("Could not install OS for instance %s on node %s" %
6830 (inst.name, inst.primary_node))
6832 _ShutdownInstanceDisks(self, inst)
6835 class LUInstanceRecreateDisks(LogicalUnit):
6836 """Recreate an instance's missing disks.
6839 HPATH = "instance-recreate-disks"
6840 HTYPE = constants.HTYPE_INSTANCE
6843 def CheckArguments(self):
6844 # normalise the disk list
6845 self.op.disks = sorted(frozenset(self.op.disks))
6847 def ExpandNames(self):
6848 self._ExpandAndLockInstance()
6849 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6851 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6852 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6854 self.needed_locks[locking.LEVEL_NODE] = []
6856 def DeclareLocks(self, level):
6857 if level == locking.LEVEL_NODE:
6858 # if we replace the nodes, we only need to lock the old primary,
6859 # otherwise we need to lock all nodes for disk re-creation
6860 primary_only = bool(self.op.nodes)
6861 self._LockInstancesNodes(primary_only=primary_only)
6862 elif level == locking.LEVEL_NODE_RES:
6864 self.needed_locks[locking.LEVEL_NODE_RES] = \
6865 self.needed_locks[locking.LEVEL_NODE][:]
6867 def BuildHooksEnv(self):
6870 This runs on master, primary and secondary nodes of the instance.
6873 return _BuildInstanceHookEnvByObject(self, self.instance)
6875 def BuildHooksNodes(self):
6876 """Build hooks nodes.
6879 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6882 def CheckPrereq(self):
6883 """Check prerequisites.
6885 This checks that the instance is in the cluster and is not running.
6888 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6889 assert instance is not None, \
6890 "Cannot retrieve locked instance %s" % self.op.instance_name
6892 if len(self.op.nodes) != len(instance.all_nodes):
6893 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6894 " %d replacement nodes were specified" %
6895 (instance.name, len(instance.all_nodes),
6896 len(self.op.nodes)),
6898 assert instance.disk_template != constants.DT_DRBD8 or \
6899 len(self.op.nodes) == 2
6900 assert instance.disk_template != constants.DT_PLAIN or \
6901 len(self.op.nodes) == 1
6902 primary_node = self.op.nodes[0]
6904 primary_node = instance.primary_node
6905 _CheckNodeOnline(self, primary_node)
6907 if instance.disk_template == constants.DT_DISKLESS:
6908 raise errors.OpPrereqError("Instance '%s' has no disks" %
6909 self.op.instance_name, errors.ECODE_INVAL)
6910 # if we replace nodes *and* the old primary is offline, we don't
6912 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6913 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6914 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6915 if not (self.op.nodes and old_pnode.offline):
6916 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6917 msg="cannot recreate disks")
6919 if not self.op.disks:
6920 self.op.disks = range(len(instance.disks))
6922 for idx in self.op.disks:
6923 if idx >= len(instance.disks):
6924 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6926 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6927 raise errors.OpPrereqError("Can't recreate disks partially and"
6928 " change the nodes at the same time",
6930 self.instance = instance
6932 def Exec(self, feedback_fn):
6933 """Recreate the disks.
6936 instance = self.instance
6938 assert (self.owned_locks(locking.LEVEL_NODE) ==
6939 self.owned_locks(locking.LEVEL_NODE_RES))
6942 mods = [] # keeps track of needed logical_id changes
6944 for idx, disk in enumerate(instance.disks):
6945 if idx not in self.op.disks: # disk idx has not been passed in
6948 # update secondaries for disks, if needed
6950 if disk.dev_type == constants.LD_DRBD8:
6951 # need to update the nodes and minors
6952 assert len(self.op.nodes) == 2
6953 assert len(disk.logical_id) == 6 # otherwise disk internals
6955 (_, _, old_port, _, _, old_secret) = disk.logical_id
6956 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6957 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6958 new_minors[0], new_minors[1], old_secret)
6959 assert len(disk.logical_id) == len(new_id)
6960 mods.append((idx, new_id))
6962 # now that we have passed all asserts above, we can apply the mods
6963 # in a single run (to avoid partial changes)
6964 for idx, new_id in mods:
6965 instance.disks[idx].logical_id = new_id
6967 # change primary node, if needed
6969 instance.primary_node = self.op.nodes[0]
6970 self.LogWarning("Changing the instance's nodes, you will have to"
6971 " remove any disks left on the older nodes manually")
6974 self.cfg.Update(instance, feedback_fn)
6976 _CreateDisks(self, instance, to_skip=to_skip)
6979 class LUInstanceRename(LogicalUnit):
6980 """Rename an instance.
6983 HPATH = "instance-rename"
6984 HTYPE = constants.HTYPE_INSTANCE
6986 def CheckArguments(self):
6990 if self.op.ip_check and not self.op.name_check:
6991 # TODO: make the ip check more flexible and not depend on the name check
6992 raise errors.OpPrereqError("IP address check requires a name check",
6995 def BuildHooksEnv(self):
6998 This runs on master, primary and secondary nodes of the instance.
7001 env = _BuildInstanceHookEnvByObject(self, self.instance)
7002 env["INSTANCE_NEW_NAME"] = self.op.new_name
7005 def BuildHooksNodes(self):
7006 """Build hooks nodes.
7009 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7012 def CheckPrereq(self):
7013 """Check prerequisites.
7015 This checks that the instance is in the cluster and is not running.
7018 self.op.instance_name = _ExpandInstanceName(self.cfg,
7019 self.op.instance_name)
7020 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7021 assert instance is not None
7022 _CheckNodeOnline(self, instance.primary_node)
7023 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7024 msg="cannot rename")
7025 self.instance = instance
7027 new_name = self.op.new_name
7028 if self.op.name_check:
7029 hostname = netutils.GetHostname(name=new_name)
7030 if hostname.name != new_name:
7031 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7033 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7034 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7035 " same as given hostname '%s'") %
7036 (hostname.name, self.op.new_name),
7038 new_name = self.op.new_name = hostname.name
7039 if (self.op.ip_check and
7040 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7041 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7042 (hostname.ip, new_name),
7043 errors.ECODE_NOTUNIQUE)
7045 instance_list = self.cfg.GetInstanceList()
7046 if new_name in instance_list and new_name != instance.name:
7047 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7048 new_name, errors.ECODE_EXISTS)
7050 def Exec(self, feedback_fn):
7051 """Rename the instance.
7054 inst = self.instance
7055 old_name = inst.name
7057 rename_file_storage = False
7058 if (inst.disk_template in constants.DTS_FILEBASED and
7059 self.op.new_name != inst.name):
7060 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7061 rename_file_storage = True
7063 self.cfg.RenameInstance(inst.name, self.op.new_name)
7064 # Change the instance lock. This is definitely safe while we hold the BGL.
7065 # Otherwise the new lock would have to be added in acquired mode.
7067 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7068 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7070 # re-read the instance from the configuration after rename
7071 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7073 if rename_file_storage:
7074 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7075 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7076 old_file_storage_dir,
7077 new_file_storage_dir)
7078 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7079 " (but the instance has been renamed in Ganeti)" %
7080 (inst.primary_node, old_file_storage_dir,
7081 new_file_storage_dir))
7083 _StartInstanceDisks(self, inst, None)
7085 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7086 old_name, self.op.debug_level)
7087 msg = result.fail_msg
7089 msg = ("Could not run OS rename script for instance %s on node %s"
7090 " (but the instance has been renamed in Ganeti): %s" %
7091 (inst.name, inst.primary_node, msg))
7092 self.proc.LogWarning(msg)
7094 _ShutdownInstanceDisks(self, inst)
7099 class LUInstanceRemove(LogicalUnit):
7100 """Remove an instance.
7103 HPATH = "instance-remove"
7104 HTYPE = constants.HTYPE_INSTANCE
7107 def ExpandNames(self):
7108 self._ExpandAndLockInstance()
7109 self.needed_locks[locking.LEVEL_NODE] = []
7110 self.needed_locks[locking.LEVEL_NODE_RES] = []
7111 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7113 def DeclareLocks(self, level):
7114 if level == locking.LEVEL_NODE:
7115 self._LockInstancesNodes()
7116 elif level == locking.LEVEL_NODE_RES:
7118 self.needed_locks[locking.LEVEL_NODE_RES] = \
7119 self.needed_locks[locking.LEVEL_NODE][:]
7121 def BuildHooksEnv(self):
7124 This runs on master, primary and secondary nodes of the instance.
7127 env = _BuildInstanceHookEnvByObject(self, self.instance)
7128 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7131 def BuildHooksNodes(self):
7132 """Build hooks nodes.
7135 nl = [self.cfg.GetMasterNode()]
7136 nl_post = list(self.instance.all_nodes) + nl
7137 return (nl, nl_post)
7139 def CheckPrereq(self):
7140 """Check prerequisites.
7142 This checks that the instance is in the cluster.
7145 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7146 assert self.instance is not None, \
7147 "Cannot retrieve locked instance %s" % self.op.instance_name
7149 def Exec(self, feedback_fn):
7150 """Remove the instance.
7153 instance = self.instance
7154 logging.info("Shutting down instance %s on node %s",
7155 instance.name, instance.primary_node)
7157 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7158 self.op.shutdown_timeout)
7159 msg = result.fail_msg
7161 if self.op.ignore_failures:
7162 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7164 raise errors.OpExecError("Could not shutdown instance %s on"
7166 (instance.name, instance.primary_node, msg))
7168 assert (self.owned_locks(locking.LEVEL_NODE) ==
7169 self.owned_locks(locking.LEVEL_NODE_RES))
7170 assert not (set(instance.all_nodes) -
7171 self.owned_locks(locking.LEVEL_NODE)), \
7172 "Not owning correct locks"
7174 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7177 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7178 """Utility function to remove an instance.
7181 logging.info("Removing block devices for instance %s", instance.name)
7183 if not _RemoveDisks(lu, instance):
7184 if not ignore_failures:
7185 raise errors.OpExecError("Can't remove instance's disks")
7186 feedback_fn("Warning: can't remove instance's disks")
7188 logging.info("Removing instance %s out of cluster config", instance.name)
7190 lu.cfg.RemoveInstance(instance.name)
7192 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7193 "Instance lock removal conflict"
7195 # Remove lock for the instance
7196 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7199 class LUInstanceQuery(NoHooksLU):
7200 """Logical unit for querying instances.
7203 # pylint: disable=W0142
7206 def CheckArguments(self):
7207 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7208 self.op.output_fields, self.op.use_locking)
7210 def ExpandNames(self):
7211 self.iq.ExpandNames(self)
7213 def DeclareLocks(self, level):
7214 self.iq.DeclareLocks(self, level)
7216 def Exec(self, feedback_fn):
7217 return self.iq.OldStyleQuery(self)
7220 class LUInstanceFailover(LogicalUnit):
7221 """Failover an instance.
7224 HPATH = "instance-failover"
7225 HTYPE = constants.HTYPE_INSTANCE
7228 def CheckArguments(self):
7229 """Check the arguments.
7232 self.iallocator = getattr(self.op, "iallocator", None)
7233 self.target_node = getattr(self.op, "target_node", None)
7235 def ExpandNames(self):
7236 self._ExpandAndLockInstance()
7238 if self.op.target_node is not None:
7239 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7241 self.needed_locks[locking.LEVEL_NODE] = []
7242 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7244 ignore_consistency = self.op.ignore_consistency
7245 shutdown_timeout = self.op.shutdown_timeout
7246 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7249 ignore_consistency=ignore_consistency,
7250 shutdown_timeout=shutdown_timeout,
7251 ignore_ipolicy=self.op.ignore_ipolicy)
7252 self.tasklets = [self._migrater]
7254 def DeclareLocks(self, level):
7255 if level == locking.LEVEL_NODE:
7256 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7257 if instance.disk_template in constants.DTS_EXT_MIRROR:
7258 if self.op.target_node is None:
7259 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7261 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7262 self.op.target_node]
7263 del self.recalculate_locks[locking.LEVEL_NODE]
7265 self._LockInstancesNodes()
7267 def BuildHooksEnv(self):
7270 This runs on master, primary and secondary nodes of the instance.
7273 instance = self._migrater.instance
7274 source_node = instance.primary_node
7275 target_node = self.op.target_node
7277 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7278 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7279 "OLD_PRIMARY": source_node,
7280 "NEW_PRIMARY": target_node,
7283 if instance.disk_template in constants.DTS_INT_MIRROR:
7284 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7285 env["NEW_SECONDARY"] = source_node
7287 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7289 env.update(_BuildInstanceHookEnvByObject(self, instance))
7293 def BuildHooksNodes(self):
7294 """Build hooks nodes.
7297 instance = self._migrater.instance
7298 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7299 return (nl, nl + [instance.primary_node])
7302 class LUInstanceMigrate(LogicalUnit):
7303 """Migrate an instance.
7305 This is migration without shutting down, compared to the failover,
7306 which is done with shutdown.
7309 HPATH = "instance-migrate"
7310 HTYPE = constants.HTYPE_INSTANCE
7313 def ExpandNames(self):
7314 self._ExpandAndLockInstance()
7316 if self.op.target_node is not None:
7317 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7319 self.needed_locks[locking.LEVEL_NODE] = []
7320 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7322 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7323 cleanup=self.op.cleanup,
7325 fallback=self.op.allow_failover,
7326 ignore_ipolicy=self.op.ignore_ipolicy)
7327 self.tasklets = [self._migrater]
7329 def DeclareLocks(self, level):
7330 if level == locking.LEVEL_NODE:
7331 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7332 if instance.disk_template in constants.DTS_EXT_MIRROR:
7333 if self.op.target_node is None:
7334 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7336 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7337 self.op.target_node]
7338 del self.recalculate_locks[locking.LEVEL_NODE]
7340 self._LockInstancesNodes()
7342 def BuildHooksEnv(self):
7345 This runs on master, primary and secondary nodes of the instance.
7348 instance = self._migrater.instance
7349 source_node = instance.primary_node
7350 target_node = self.op.target_node
7351 env = _BuildInstanceHookEnvByObject(self, instance)
7353 "MIGRATE_LIVE": self._migrater.live,
7354 "MIGRATE_CLEANUP": self.op.cleanup,
7355 "OLD_PRIMARY": source_node,
7356 "NEW_PRIMARY": target_node,
7359 if instance.disk_template in constants.DTS_INT_MIRROR:
7360 env["OLD_SECONDARY"] = target_node
7361 env["NEW_SECONDARY"] = source_node
7363 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7367 def BuildHooksNodes(self):
7368 """Build hooks nodes.
7371 instance = self._migrater.instance
7372 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7373 return (nl, nl + [instance.primary_node])
7376 class LUInstanceMove(LogicalUnit):
7377 """Move an instance by data-copying.
7380 HPATH = "instance-move"
7381 HTYPE = constants.HTYPE_INSTANCE
7384 def ExpandNames(self):
7385 self._ExpandAndLockInstance()
7386 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7387 self.op.target_node = target_node
7388 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7389 self.needed_locks[locking.LEVEL_NODE_RES] = []
7390 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7392 def DeclareLocks(self, level):
7393 if level == locking.LEVEL_NODE:
7394 self._LockInstancesNodes(primary_only=True)
7395 elif level == locking.LEVEL_NODE_RES:
7397 self.needed_locks[locking.LEVEL_NODE_RES] = \
7398 self.needed_locks[locking.LEVEL_NODE][:]
7400 def BuildHooksEnv(self):
7403 This runs on master, primary and secondary nodes of the instance.
7407 "TARGET_NODE": self.op.target_node,
7408 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7410 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7413 def BuildHooksNodes(self):
7414 """Build hooks nodes.
7418 self.cfg.GetMasterNode(),
7419 self.instance.primary_node,
7420 self.op.target_node,
7424 def CheckPrereq(self):
7425 """Check prerequisites.
7427 This checks that the instance is in the cluster.
7430 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7431 assert self.instance is not None, \
7432 "Cannot retrieve locked instance %s" % self.op.instance_name
7434 node = self.cfg.GetNodeInfo(self.op.target_node)
7435 assert node is not None, \
7436 "Cannot retrieve locked node %s" % self.op.target_node
7438 self.target_node = target_node = node.name
7440 if target_node == instance.primary_node:
7441 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7442 (instance.name, target_node),
7445 bep = self.cfg.GetClusterInfo().FillBE(instance)
7447 for idx, dsk in enumerate(instance.disks):
7448 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7449 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7450 " cannot copy" % idx, errors.ECODE_STATE)
7452 _CheckNodeOnline(self, target_node)
7453 _CheckNodeNotDrained(self, target_node)
7454 _CheckNodeVmCapable(self, target_node)
7455 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), node.group)
7456 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7457 ignore=self.op.ignore_ipolicy)
7459 if instance.admin_state == constants.ADMINST_UP:
7460 # check memory requirements on the secondary node
7461 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7462 instance.name, bep[constants.BE_MAXMEM],
7463 instance.hypervisor)
7465 self.LogInfo("Not checking memory on the secondary node as"
7466 " instance will not be started")
7468 # check bridge existance
7469 _CheckInstanceBridgesExist(self, instance, node=target_node)
7471 def Exec(self, feedback_fn):
7472 """Move an instance.
7474 The move is done by shutting it down on its present node, copying
7475 the data over (slow) and starting it on the new node.
7478 instance = self.instance
7480 source_node = instance.primary_node
7481 target_node = self.target_node
7483 self.LogInfo("Shutting down instance %s on source node %s",
7484 instance.name, source_node)
7486 assert (self.owned_locks(locking.LEVEL_NODE) ==
7487 self.owned_locks(locking.LEVEL_NODE_RES))
7489 result = self.rpc.call_instance_shutdown(source_node, instance,
7490 self.op.shutdown_timeout)
7491 msg = result.fail_msg
7493 if self.op.ignore_consistency:
7494 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7495 " Proceeding anyway. Please make sure node"
7496 " %s is down. Error details: %s",
7497 instance.name, source_node, source_node, msg)
7499 raise errors.OpExecError("Could not shutdown instance %s on"
7501 (instance.name, source_node, msg))
7503 # create the target disks
7505 _CreateDisks(self, instance, target_node=target_node)
7506 except errors.OpExecError:
7507 self.LogWarning("Device creation failed, reverting...")
7509 _RemoveDisks(self, instance, target_node=target_node)
7511 self.cfg.ReleaseDRBDMinors(instance.name)
7514 cluster_name = self.cfg.GetClusterInfo().cluster_name
7517 # activate, get path, copy the data over
7518 for idx, disk in enumerate(instance.disks):
7519 self.LogInfo("Copying data for disk %d", idx)
7520 result = self.rpc.call_blockdev_assemble(target_node, disk,
7521 instance.name, True, idx)
7523 self.LogWarning("Can't assemble newly created disk %d: %s",
7524 idx, result.fail_msg)
7525 errs.append(result.fail_msg)
7527 dev_path = result.payload
7528 result = self.rpc.call_blockdev_export(source_node, disk,
7529 target_node, dev_path,
7532 self.LogWarning("Can't copy data over for disk %d: %s",
7533 idx, result.fail_msg)
7534 errs.append(result.fail_msg)
7538 self.LogWarning("Some disks failed to copy, aborting")
7540 _RemoveDisks(self, instance, target_node=target_node)
7542 self.cfg.ReleaseDRBDMinors(instance.name)
7543 raise errors.OpExecError("Errors during disk copy: %s" %
7546 instance.primary_node = target_node
7547 self.cfg.Update(instance, feedback_fn)
7549 self.LogInfo("Removing the disks on the original node")
7550 _RemoveDisks(self, instance, target_node=source_node)
7552 # Only start the instance if it's marked as up
7553 if instance.admin_state == constants.ADMINST_UP:
7554 self.LogInfo("Starting instance %s on node %s",
7555 instance.name, target_node)
7557 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7558 ignore_secondaries=True)
7560 _ShutdownInstanceDisks(self, instance)
7561 raise errors.OpExecError("Can't activate the instance's disks")
7563 result = self.rpc.call_instance_start(target_node,
7564 (instance, None, None), False)
7565 msg = result.fail_msg
7567 _ShutdownInstanceDisks(self, instance)
7568 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7569 (instance.name, target_node, msg))
7572 class LUNodeMigrate(LogicalUnit):
7573 """Migrate all instances from a node.
7576 HPATH = "node-migrate"
7577 HTYPE = constants.HTYPE_NODE
7580 def CheckArguments(self):
7583 def ExpandNames(self):
7584 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7586 self.share_locks = _ShareAll()
7587 self.needed_locks = {
7588 locking.LEVEL_NODE: [self.op.node_name],
7591 def BuildHooksEnv(self):
7594 This runs on the master, the primary and all the secondaries.
7598 "NODE_NAME": self.op.node_name,
7601 def BuildHooksNodes(self):
7602 """Build hooks nodes.
7605 nl = [self.cfg.GetMasterNode()]
7608 def CheckPrereq(self):
7611 def Exec(self, feedback_fn):
7612 # Prepare jobs for migration instances
7614 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7617 iallocator=self.op.iallocator,
7618 target_node=self.op.target_node,
7619 ignore_ipolicy=self.op.ignore_ipolicy)]
7620 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7623 # TODO: Run iallocator in this opcode and pass correct placement options to
7624 # OpInstanceMigrate. Since other jobs can modify the cluster between
7625 # running the iallocator and the actual migration, a good consistency model
7626 # will have to be found.
7628 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7629 frozenset([self.op.node_name]))
7631 return ResultWithJobs(jobs)
7634 class TLMigrateInstance(Tasklet):
7635 """Tasklet class for instance migration.
7638 @ivar live: whether the migration will be done live or non-live;
7639 this variable is initalized only after CheckPrereq has run
7640 @type cleanup: boolean
7641 @ivar cleanup: Wheater we cleanup from a failed migration
7642 @type iallocator: string
7643 @ivar iallocator: The iallocator used to determine target_node
7644 @type target_node: string
7645 @ivar target_node: If given, the target_node to reallocate the instance to
7646 @type failover: boolean
7647 @ivar failover: Whether operation results in failover or migration
7648 @type fallback: boolean
7649 @ivar fallback: Whether fallback to failover is allowed if migration not
7651 @type ignore_consistency: boolean
7652 @ivar ignore_consistency: Wheter we should ignore consistency between source
7654 @type shutdown_timeout: int
7655 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7656 @type ignore_ipolicy: bool
7657 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7662 _MIGRATION_POLL_INTERVAL = 1 # seconds
7663 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7665 def __init__(self, lu, instance_name, cleanup=False,
7666 failover=False, fallback=False,
7667 ignore_consistency=False,
7668 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7669 ignore_ipolicy=False):
7670 """Initializes this class.
7673 Tasklet.__init__(self, lu)
7676 self.instance_name = instance_name
7677 self.cleanup = cleanup
7678 self.live = False # will be overridden later
7679 self.failover = failover
7680 self.fallback = fallback
7681 self.ignore_consistency = ignore_consistency
7682 self.shutdown_timeout = shutdown_timeout
7683 self.ignore_ipolicy = ignore_ipolicy
7685 def CheckPrereq(self):
7686 """Check prerequisites.
7688 This checks that the instance is in the cluster.
7691 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7692 instance = self.cfg.GetInstanceInfo(instance_name)
7693 assert instance is not None
7694 self.instance = instance
7695 cluster = self.cfg.GetClusterInfo()
7697 if (not self.cleanup and
7698 not instance.admin_state == constants.ADMINST_UP and
7699 not self.failover and self.fallback):
7700 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7701 " switching to failover")
7702 self.failover = True
7704 if instance.disk_template not in constants.DTS_MIRRORED:
7709 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7710 " %s" % (instance.disk_template, text),
7713 if instance.disk_template in constants.DTS_EXT_MIRROR:
7714 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7716 if self.lu.op.iallocator:
7717 self._RunAllocator()
7719 # We set set self.target_node as it is required by
7721 self.target_node = self.lu.op.target_node
7723 # Check that the target node is correct in terms of instance policy
7724 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7725 ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7726 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7727 ignore=self.ignore_ipolicy)
7729 # self.target_node is already populated, either directly or by the
7731 target_node = self.target_node
7732 if self.target_node == instance.primary_node:
7733 raise errors.OpPrereqError("Cannot migrate instance %s"
7734 " to its primary (%s)" %
7735 (instance.name, instance.primary_node))
7737 if len(self.lu.tasklets) == 1:
7738 # It is safe to release locks only when we're the only tasklet
7740 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7741 keep=[instance.primary_node, self.target_node])
7744 secondary_nodes = instance.secondary_nodes
7745 if not secondary_nodes:
7746 raise errors.ConfigurationError("No secondary node but using"
7747 " %s disk template" %
7748 instance.disk_template)
7749 target_node = secondary_nodes[0]
7750 if self.lu.op.iallocator or (self.lu.op.target_node and
7751 self.lu.op.target_node != target_node):
7753 text = "failed over"
7756 raise errors.OpPrereqError("Instances with disk template %s cannot"
7757 " be %s to arbitrary nodes"
7758 " (neither an iallocator nor a target"
7759 " node can be passed)" %
7760 (instance.disk_template, text),
7762 nodeinfo = self.cfg.GetNodeInfo(target_node)
7763 ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7764 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7765 ignore=self.ignore_ipolicy)
7767 i_be = cluster.FillBE(instance)
7769 # check memory requirements on the secondary node
7770 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7771 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7772 instance.name, i_be[constants.BE_MAXMEM],
7773 instance.hypervisor)
7775 self.lu.LogInfo("Not checking memory on the secondary node as"
7776 " instance will not be started")
7778 # check if failover must be forced instead of migration
7779 if (not self.cleanup and not self.failover and
7780 i_be[constants.BE_ALWAYS_FAILOVER]):
7782 self.lu.LogInfo("Instance configured to always failover; fallback"
7784 self.failover = True
7786 raise errors.OpPrereqError("This instance has been configured to"
7787 " always failover, please allow failover",
7790 # check bridge existance
7791 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7793 if not self.cleanup:
7794 _CheckNodeNotDrained(self.lu, target_node)
7795 if not self.failover:
7796 result = self.rpc.call_instance_migratable(instance.primary_node,
7798 if result.fail_msg and self.fallback:
7799 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7801 self.failover = True
7803 result.Raise("Can't migrate, please use failover",
7804 prereq=True, ecode=errors.ECODE_STATE)
7806 assert not (self.failover and self.cleanup)
7808 if not self.failover:
7809 if self.lu.op.live is not None and self.lu.op.mode is not None:
7810 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7811 " parameters are accepted",
7813 if self.lu.op.live is not None:
7815 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7817 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7818 # reset the 'live' parameter to None so that repeated
7819 # invocations of CheckPrereq do not raise an exception
7820 self.lu.op.live = None
7821 elif self.lu.op.mode is None:
7822 # read the default value from the hypervisor
7823 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7824 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7826 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7828 # Failover is never live
7831 def _RunAllocator(self):
7832 """Run the allocator based on input opcode.
7835 # FIXME: add a self.ignore_ipolicy option
7836 ial = IAllocator(self.cfg, self.rpc,
7837 mode=constants.IALLOCATOR_MODE_RELOC,
7838 name=self.instance_name,
7839 # TODO See why hail breaks with a single node below
7840 relocate_from=[self.instance.primary_node,
7841 self.instance.primary_node],
7844 ial.Run(self.lu.op.iallocator)
7847 raise errors.OpPrereqError("Can't compute nodes using"
7848 " iallocator '%s': %s" %
7849 (self.lu.op.iallocator, ial.info),
7851 if len(ial.result) != ial.required_nodes:
7852 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7853 " of nodes (%s), required %s" %
7854 (self.lu.op.iallocator, len(ial.result),
7855 ial.required_nodes), errors.ECODE_FAULT)
7856 self.target_node = ial.result[0]
7857 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7858 self.instance_name, self.lu.op.iallocator,
7859 utils.CommaJoin(ial.result))
7861 def _WaitUntilSync(self):
7862 """Poll with custom rpc for disk sync.
7864 This uses our own step-based rpc call.
7867 self.feedback_fn("* wait until resync is done")
7871 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7873 self.instance.disks)
7875 for node, nres in result.items():
7876 nres.Raise("Cannot resync disks on node %s" % node)
7877 node_done, node_percent = nres.payload
7878 all_done = all_done and node_done
7879 if node_percent is not None:
7880 min_percent = min(min_percent, node_percent)
7882 if min_percent < 100:
7883 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7886 def _EnsureSecondary(self, node):
7887 """Demote a node to secondary.
7890 self.feedback_fn("* switching node %s to secondary mode" % node)
7892 for dev in self.instance.disks:
7893 self.cfg.SetDiskID(dev, node)
7895 result = self.rpc.call_blockdev_close(node, self.instance.name,
7896 self.instance.disks)
7897 result.Raise("Cannot change disk to secondary on node %s" % node)
7899 def _GoStandalone(self):
7900 """Disconnect from the network.
7903 self.feedback_fn("* changing into standalone mode")
7904 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7905 self.instance.disks)
7906 for node, nres in result.items():
7907 nres.Raise("Cannot disconnect disks node %s" % node)
7909 def _GoReconnect(self, multimaster):
7910 """Reconnect to the network.
7916 msg = "single-master"
7917 self.feedback_fn("* changing disks into %s mode" % msg)
7918 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7919 self.instance.disks,
7920 self.instance.name, multimaster)
7921 for node, nres in result.items():
7922 nres.Raise("Cannot change disks config on node %s" % node)
7924 def _ExecCleanup(self):
7925 """Try to cleanup after a failed migration.
7927 The cleanup is done by:
7928 - check that the instance is running only on one node
7929 (and update the config if needed)
7930 - change disks on its secondary node to secondary
7931 - wait until disks are fully synchronized
7932 - disconnect from the network
7933 - change disks into single-master mode
7934 - wait again until disks are fully synchronized
7937 instance = self.instance
7938 target_node = self.target_node
7939 source_node = self.source_node
7941 # check running on only one node
7942 self.feedback_fn("* checking where the instance actually runs"
7943 " (if this hangs, the hypervisor might be in"
7945 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7946 for node, result in ins_l.items():
7947 result.Raise("Can't contact node %s" % node)
7949 runningon_source = instance.name in ins_l[source_node].payload
7950 runningon_target = instance.name in ins_l[target_node].payload
7952 if runningon_source and runningon_target:
7953 raise errors.OpExecError("Instance seems to be running on two nodes,"
7954 " or the hypervisor is confused; you will have"
7955 " to ensure manually that it runs only on one"
7956 " and restart this operation")
7958 if not (runningon_source or runningon_target):
7959 raise errors.OpExecError("Instance does not seem to be running at all;"
7960 " in this case it's safer to repair by"
7961 " running 'gnt-instance stop' to ensure disk"
7962 " shutdown, and then restarting it")
7964 if runningon_target:
7965 # the migration has actually succeeded, we need to update the config
7966 self.feedback_fn("* instance running on secondary node (%s),"
7967 " updating config" % target_node)
7968 instance.primary_node = target_node
7969 self.cfg.Update(instance, self.feedback_fn)
7970 demoted_node = source_node
7972 self.feedback_fn("* instance confirmed to be running on its"
7973 " primary node (%s)" % source_node)
7974 demoted_node = target_node
7976 if instance.disk_template in constants.DTS_INT_MIRROR:
7977 self._EnsureSecondary(demoted_node)
7979 self._WaitUntilSync()
7980 except errors.OpExecError:
7981 # we ignore here errors, since if the device is standalone, it
7982 # won't be able to sync
7984 self._GoStandalone()
7985 self._GoReconnect(False)
7986 self._WaitUntilSync()
7988 self.feedback_fn("* done")
7990 def _RevertDiskStatus(self):
7991 """Try to revert the disk status after a failed migration.
7994 target_node = self.target_node
7995 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7999 self._EnsureSecondary(target_node)
8000 self._GoStandalone()
8001 self._GoReconnect(False)
8002 self._WaitUntilSync()
8003 except errors.OpExecError, err:
8004 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8005 " please try to recover the instance manually;"
8006 " error '%s'" % str(err))
8008 def _AbortMigration(self):
8009 """Call the hypervisor code to abort a started migration.
8012 instance = self.instance
8013 target_node = self.target_node
8014 source_node = self.source_node
8015 migration_info = self.migration_info
8017 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8021 abort_msg = abort_result.fail_msg
8023 logging.error("Aborting migration failed on target node %s: %s",
8024 target_node, abort_msg)
8025 # Don't raise an exception here, as we stil have to try to revert the
8026 # disk status, even if this step failed.
8028 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8029 instance, False, self.live)
8030 abort_msg = abort_result.fail_msg
8032 logging.error("Aborting migration failed on source node %s: %s",
8033 source_node, abort_msg)
8035 def _ExecMigration(self):
8036 """Migrate an instance.
8038 The migrate is done by:
8039 - change the disks into dual-master mode
8040 - wait until disks are fully synchronized again
8041 - migrate the instance
8042 - change disks on the new secondary node (the old primary) to secondary
8043 - wait until disks are fully synchronized
8044 - change disks into single-master mode
8047 instance = self.instance
8048 target_node = self.target_node
8049 source_node = self.source_node
8051 # Check for hypervisor version mismatch and warn the user.
8052 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8053 None, [self.instance.hypervisor])
8054 for ninfo in nodeinfo.values():
8055 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8057 (_, _, (src_info, )) = nodeinfo[source_node].payload
8058 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8060 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8061 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8062 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8063 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8064 if src_version != dst_version:
8065 self.feedback_fn("* warning: hypervisor version mismatch between"
8066 " source (%s) and target (%s) node" %
8067 (src_version, dst_version))
8069 self.feedback_fn("* checking disk consistency between source and target")
8070 for dev in instance.disks:
8071 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8072 raise errors.OpExecError("Disk %s is degraded or not fully"
8073 " synchronized on target node,"
8074 " aborting migration" % dev.iv_name)
8076 # First get the migration information from the remote node
8077 result = self.rpc.call_migration_info(source_node, instance)
8078 msg = result.fail_msg
8080 log_err = ("Failed fetching source migration information from %s: %s" %
8082 logging.error(log_err)
8083 raise errors.OpExecError(log_err)
8085 self.migration_info = migration_info = result.payload
8087 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8088 # Then switch the disks to master/master mode
8089 self._EnsureSecondary(target_node)
8090 self._GoStandalone()
8091 self._GoReconnect(True)
8092 self._WaitUntilSync()
8094 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8095 result = self.rpc.call_accept_instance(target_node,
8098 self.nodes_ip[target_node])
8100 msg = result.fail_msg
8102 logging.error("Instance pre-migration failed, trying to revert"
8103 " disk status: %s", msg)
8104 self.feedback_fn("Pre-migration failed, aborting")
8105 self._AbortMigration()
8106 self._RevertDiskStatus()
8107 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8108 (instance.name, msg))
8110 self.feedback_fn("* migrating instance to %s" % target_node)
8111 result = self.rpc.call_instance_migrate(source_node, instance,
8112 self.nodes_ip[target_node],
8114 msg = result.fail_msg
8116 logging.error("Instance migration failed, trying to revert"
8117 " disk status: %s", msg)
8118 self.feedback_fn("Migration failed, aborting")
8119 self._AbortMigration()
8120 self._RevertDiskStatus()
8121 raise errors.OpExecError("Could not migrate instance %s: %s" %
8122 (instance.name, msg))
8124 self.feedback_fn("* starting memory transfer")
8125 last_feedback = time.time()
8127 result = self.rpc.call_instance_get_migration_status(source_node,
8129 msg = result.fail_msg
8130 ms = result.payload # MigrationStatus instance
8131 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8132 logging.error("Instance migration failed, trying to revert"
8133 " disk status: %s", msg)
8134 self.feedback_fn("Migration failed, aborting")
8135 self._AbortMigration()
8136 self._RevertDiskStatus()
8137 raise errors.OpExecError("Could not migrate instance %s: %s" %
8138 (instance.name, msg))
8140 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8141 self.feedback_fn("* memory transfer complete")
8144 if (utils.TimeoutExpired(last_feedback,
8145 self._MIGRATION_FEEDBACK_INTERVAL) and
8146 ms.transferred_ram is not None):
8147 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8148 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8149 last_feedback = time.time()
8151 time.sleep(self._MIGRATION_POLL_INTERVAL)
8153 result = self.rpc.call_instance_finalize_migration_src(source_node,
8157 msg = result.fail_msg
8159 logging.error("Instance migration succeeded, but finalization failed"
8160 " on the source node: %s", msg)
8161 raise errors.OpExecError("Could not finalize instance migration: %s" %
8164 instance.primary_node = target_node
8166 # distribute new instance config to the other nodes
8167 self.cfg.Update(instance, self.feedback_fn)
8169 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8173 msg = result.fail_msg
8175 logging.error("Instance migration succeeded, but finalization failed"
8176 " on the target node: %s", msg)
8177 raise errors.OpExecError("Could not finalize instance migration: %s" %
8180 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8181 self._EnsureSecondary(source_node)
8182 self._WaitUntilSync()
8183 self._GoStandalone()
8184 self._GoReconnect(False)
8185 self._WaitUntilSync()
8187 self.feedback_fn("* done")
8189 def _ExecFailover(self):
8190 """Failover an instance.
8192 The failover is done by shutting it down on its present node and
8193 starting it on the secondary.
8196 instance = self.instance
8197 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8199 source_node = instance.primary_node
8200 target_node = self.target_node
8202 if instance.admin_state == constants.ADMINST_UP:
8203 self.feedback_fn("* checking disk consistency between source and target")
8204 for dev in instance.disks:
8205 # for drbd, these are drbd over lvm
8206 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8207 if primary_node.offline:
8208 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8210 (primary_node.name, dev.iv_name, target_node))
8211 elif not self.ignore_consistency:
8212 raise errors.OpExecError("Disk %s is degraded on target node,"
8213 " aborting failover" % dev.iv_name)
8215 self.feedback_fn("* not checking disk consistency as instance is not"
8218 self.feedback_fn("* shutting down instance on source node")
8219 logging.info("Shutting down instance %s on node %s",
8220 instance.name, source_node)
8222 result = self.rpc.call_instance_shutdown(source_node, instance,
8223 self.shutdown_timeout)
8224 msg = result.fail_msg
8226 if self.ignore_consistency or primary_node.offline:
8227 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8228 " proceeding anyway; please make sure node"
8229 " %s is down; error details: %s",
8230 instance.name, source_node, source_node, msg)
8232 raise errors.OpExecError("Could not shutdown instance %s on"
8234 (instance.name, source_node, msg))
8236 self.feedback_fn("* deactivating the instance's disks on source node")
8237 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8238 raise errors.OpExecError("Can't shut down the instance's disks")
8240 instance.primary_node = target_node
8241 # distribute new instance config to the other nodes
8242 self.cfg.Update(instance, self.feedback_fn)
8244 # Only start the instance if it's marked as up
8245 if instance.admin_state == constants.ADMINST_UP:
8246 self.feedback_fn("* activating the instance's disks on target node %s" %
8248 logging.info("Starting instance %s on node %s",
8249 instance.name, target_node)
8251 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8252 ignore_secondaries=True)
8254 _ShutdownInstanceDisks(self.lu, instance)
8255 raise errors.OpExecError("Can't activate the instance's disks")
8257 self.feedback_fn("* starting the instance on the target node %s" %
8259 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8261 msg = result.fail_msg
8263 _ShutdownInstanceDisks(self.lu, instance)
8264 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8265 (instance.name, target_node, msg))
8267 def Exec(self, feedback_fn):
8268 """Perform the migration.
8271 self.feedback_fn = feedback_fn
8272 self.source_node = self.instance.primary_node
8274 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8275 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8276 self.target_node = self.instance.secondary_nodes[0]
8277 # Otherwise self.target_node has been populated either
8278 # directly, or through an iallocator.
8280 self.all_nodes = [self.source_node, self.target_node]
8281 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8282 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8285 feedback_fn("Failover instance %s" % self.instance.name)
8286 self._ExecFailover()
8288 feedback_fn("Migrating instance %s" % self.instance.name)
8291 return self._ExecCleanup()
8293 return self._ExecMigration()
8296 def _CreateBlockDev(lu, node, instance, device, force_create,
8298 """Create a tree of block devices on a given node.
8300 If this device type has to be created on secondaries, create it and
8303 If not, just recurse to children keeping the same 'force' value.
8305 @param lu: the lu on whose behalf we execute
8306 @param node: the node on which to create the device
8307 @type instance: L{objects.Instance}
8308 @param instance: the instance which owns the device
8309 @type device: L{objects.Disk}
8310 @param device: the device to create
8311 @type force_create: boolean
8312 @param force_create: whether to force creation of this device; this
8313 will be change to True whenever we find a device which has
8314 CreateOnSecondary() attribute
8315 @param info: the extra 'metadata' we should attach to the device
8316 (this will be represented as a LVM tag)
8317 @type force_open: boolean
8318 @param force_open: this parameter will be passes to the
8319 L{backend.BlockdevCreate} function where it specifies
8320 whether we run on primary or not, and it affects both
8321 the child assembly and the device own Open() execution
8324 if device.CreateOnSecondary():
8328 for child in device.children:
8329 _CreateBlockDev(lu, node, instance, child, force_create,
8332 if not force_create:
8335 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8338 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8339 """Create a single block device on a given node.
8341 This will not recurse over children of the device, so they must be
8344 @param lu: the lu on whose behalf we execute
8345 @param node: the node on which to create the device
8346 @type instance: L{objects.Instance}
8347 @param instance: the instance which owns the device
8348 @type device: L{objects.Disk}
8349 @param device: the device to create
8350 @param info: the extra 'metadata' we should attach to the device
8351 (this will be represented as a LVM tag)
8352 @type force_open: boolean
8353 @param force_open: this parameter will be passes to the
8354 L{backend.BlockdevCreate} function where it specifies
8355 whether we run on primary or not, and it affects both
8356 the child assembly and the device own Open() execution
8359 lu.cfg.SetDiskID(device, node)
8360 result = lu.rpc.call_blockdev_create(node, device, device.size,
8361 instance.name, force_open, info)
8362 result.Raise("Can't create block device %s on"
8363 " node %s for instance %s" % (device, node, instance.name))
8364 if device.physical_id is None:
8365 device.physical_id = result.payload
8368 def _GenerateUniqueNames(lu, exts):
8369 """Generate a suitable LV name.
8371 This will generate a logical volume name for the given instance.
8376 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8377 results.append("%s%s" % (new_id, val))
8381 def _ComputeLDParams(disk_template, disk_params):
8382 """Computes Logical Disk parameters from Disk Template parameters.
8384 @type disk_template: string
8385 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8386 @type disk_params: dict
8387 @param disk_params: disk template parameters; dict(template_name -> parameters
8389 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8390 contains the LD parameters of the node. The tree is flattened in-order.
8393 if disk_template not in constants.DISK_TEMPLATES:
8394 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8397 dt_params = disk_params[disk_template]
8398 if disk_template == constants.DT_DRBD8:
8400 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8401 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8402 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8403 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8404 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8405 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8406 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8407 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8408 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8409 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8410 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8411 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8415 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8418 result.append(drbd_params)
8422 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8425 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8427 result.append(data_params)
8431 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8434 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8436 result.append(meta_params)
8438 elif (disk_template == constants.DT_FILE or
8439 disk_template == constants.DT_SHARED_FILE):
8440 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8442 elif disk_template == constants.DT_PLAIN:
8444 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8447 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8449 result.append(params)
8451 elif disk_template == constants.DT_BLOCK:
8452 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8457 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8458 iv_name, p_minor, s_minor, drbd_params, data_params,
8460 """Generate a drbd8 device complete with its children.
8463 assert len(vgnames) == len(names) == 2
8464 port = lu.cfg.AllocatePort()
8465 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8467 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8468 logical_id=(vgnames[0], names[0]),
8470 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8471 logical_id=(vgnames[1], names[1]),
8473 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8474 logical_id=(primary, secondary, port,
8477 children=[dev_data, dev_meta],
8478 iv_name=iv_name, params=drbd_params)
8482 def _GenerateDiskTemplate(lu, template_name,
8483 instance_name, primary_node,
8484 secondary_nodes, disk_info,
8485 file_storage_dir, file_driver,
8486 base_index, feedback_fn, disk_params):
8487 """Generate the entire disk layout for a given template type.
8490 #TODO: compute space requirements
8492 vgname = lu.cfg.GetVGName()
8493 disk_count = len(disk_info)
8495 ld_params = _ComputeLDParams(template_name, disk_params)
8496 if template_name == constants.DT_DISKLESS:
8498 elif template_name == constants.DT_PLAIN:
8499 if len(secondary_nodes) != 0:
8500 raise errors.ProgrammerError("Wrong template configuration")
8502 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8503 for i in range(disk_count)])
8504 for idx, disk in enumerate(disk_info):
8505 disk_index = idx + base_index
8506 vg = disk.get(constants.IDISK_VG, vgname)
8507 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8508 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8509 size=disk[constants.IDISK_SIZE],
8510 logical_id=(vg, names[idx]),
8511 iv_name="disk/%d" % disk_index,
8512 mode=disk[constants.IDISK_MODE],
8513 params=ld_params[0])
8514 disks.append(disk_dev)
8515 elif template_name == constants.DT_DRBD8:
8516 drbd_params, data_params, meta_params = ld_params
8517 if len(secondary_nodes) != 1:
8518 raise errors.ProgrammerError("Wrong template configuration")
8519 remote_node = secondary_nodes[0]
8520 minors = lu.cfg.AllocateDRBDMinor(
8521 [primary_node, remote_node] * len(disk_info), instance_name)
8524 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8525 for i in range(disk_count)]):
8526 names.append(lv_prefix + "_data")
8527 names.append(lv_prefix + "_meta")
8528 for idx, disk in enumerate(disk_info):
8529 disk_index = idx + base_index
8530 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8531 data_vg = disk.get(constants.IDISK_VG, vgname)
8532 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8533 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8534 disk[constants.IDISK_SIZE],
8536 names[idx * 2:idx * 2 + 2],
8537 "disk/%d" % disk_index,
8538 minors[idx * 2], minors[idx * 2 + 1],
8539 drbd_params, data_params, meta_params)
8540 disk_dev.mode = disk[constants.IDISK_MODE]
8541 disks.append(disk_dev)
8542 elif template_name == constants.DT_FILE:
8543 if len(secondary_nodes) != 0:
8544 raise errors.ProgrammerError("Wrong template configuration")
8546 opcodes.RequireFileStorage()
8548 for idx, disk in enumerate(disk_info):
8549 disk_index = idx + base_index
8550 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8551 size=disk[constants.IDISK_SIZE],
8552 iv_name="disk/%d" % disk_index,
8553 logical_id=(file_driver,
8554 "%s/disk%d" % (file_storage_dir,
8556 mode=disk[constants.IDISK_MODE],
8557 params=ld_params[0])
8558 disks.append(disk_dev)
8559 elif template_name == constants.DT_SHARED_FILE:
8560 if len(secondary_nodes) != 0:
8561 raise errors.ProgrammerError("Wrong template configuration")
8563 opcodes.RequireSharedFileStorage()
8565 for idx, disk in enumerate(disk_info):
8566 disk_index = idx + base_index
8567 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8568 size=disk[constants.IDISK_SIZE],
8569 iv_name="disk/%d" % disk_index,
8570 logical_id=(file_driver,
8571 "%s/disk%d" % (file_storage_dir,
8573 mode=disk[constants.IDISK_MODE],
8574 params=ld_params[0])
8575 disks.append(disk_dev)
8576 elif template_name == constants.DT_BLOCK:
8577 if len(secondary_nodes) != 0:
8578 raise errors.ProgrammerError("Wrong template configuration")
8580 for idx, disk in enumerate(disk_info):
8581 disk_index = idx + base_index
8582 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8583 size=disk[constants.IDISK_SIZE],
8584 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8585 disk[constants.IDISK_ADOPT]),
8586 iv_name="disk/%d" % disk_index,
8587 mode=disk[constants.IDISK_MODE],
8588 params=ld_params[0])
8589 disks.append(disk_dev)
8592 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8596 def _GetInstanceInfoText(instance):
8597 """Compute that text that should be added to the disk's metadata.
8600 return "originstname+%s" % instance.name
8603 def _CalcEta(time_taken, written, total_size):
8604 """Calculates the ETA based on size written and total size.
8606 @param time_taken: The time taken so far
8607 @param written: amount written so far
8608 @param total_size: The total size of data to be written
8609 @return: The remaining time in seconds
8612 avg_time = time_taken / float(written)
8613 return (total_size - written) * avg_time
8616 def _WipeDisks(lu, instance):
8617 """Wipes instance disks.
8619 @type lu: L{LogicalUnit}
8620 @param lu: the logical unit on whose behalf we execute
8621 @type instance: L{objects.Instance}
8622 @param instance: the instance whose disks we should create
8623 @return: the success of the wipe
8626 node = instance.primary_node
8628 for device in instance.disks:
8629 lu.cfg.SetDiskID(device, node)
8631 logging.info("Pause sync of instance %s disks", instance.name)
8632 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8634 for idx, success in enumerate(result.payload):
8636 logging.warn("pause-sync of instance %s for disks %d failed",
8640 for idx, device in enumerate(instance.disks):
8641 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8642 # MAX_WIPE_CHUNK at max
8643 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8644 constants.MIN_WIPE_CHUNK_PERCENT)
8645 # we _must_ make this an int, otherwise rounding errors will
8647 wipe_chunk_size = int(wipe_chunk_size)
8649 lu.LogInfo("* Wiping disk %d", idx)
8650 logging.info("Wiping disk %d for instance %s, node %s using"
8651 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8656 start_time = time.time()
8658 while offset < size:
8659 wipe_size = min(wipe_chunk_size, size - offset)
8660 logging.debug("Wiping disk %d, offset %s, chunk %s",
8661 idx, offset, wipe_size)
8662 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8663 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8664 (idx, offset, wipe_size))
8667 if now - last_output >= 60:
8668 eta = _CalcEta(now - start_time, offset, size)
8669 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8670 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8673 logging.info("Resume sync of instance %s disks", instance.name)
8675 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8677 for idx, success in enumerate(result.payload):
8679 lu.LogWarning("Resume sync of disk %d failed, please have a"
8680 " look at the status and troubleshoot the issue", idx)
8681 logging.warn("resume-sync of instance %s for disks %d failed",
8685 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8686 """Create all disks for an instance.
8688 This abstracts away some work from AddInstance.
8690 @type lu: L{LogicalUnit}
8691 @param lu: the logical unit on whose behalf we execute
8692 @type instance: L{objects.Instance}
8693 @param instance: the instance whose disks we should create
8695 @param to_skip: list of indices to skip
8696 @type target_node: string
8697 @param target_node: if passed, overrides the target node for creation
8699 @return: the success of the creation
8702 info = _GetInstanceInfoText(instance)
8703 if target_node is None:
8704 pnode = instance.primary_node
8705 all_nodes = instance.all_nodes
8710 if instance.disk_template in constants.DTS_FILEBASED:
8711 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8712 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8714 result.Raise("Failed to create directory '%s' on"
8715 " node %s" % (file_storage_dir, pnode))
8717 # Note: this needs to be kept in sync with adding of disks in
8718 # LUInstanceSetParams
8719 for idx, device in enumerate(instance.disks):
8720 if to_skip and idx in to_skip:
8722 logging.info("Creating volume %s for instance %s",
8723 device.iv_name, instance.name)
8725 for node in all_nodes:
8726 f_create = node == pnode
8727 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8730 def _RemoveDisks(lu, instance, target_node=None):
8731 """Remove all disks for an instance.
8733 This abstracts away some work from `AddInstance()` and
8734 `RemoveInstance()`. Note that in case some of the devices couldn't
8735 be removed, the removal will continue with the other ones (compare
8736 with `_CreateDisks()`).
8738 @type lu: L{LogicalUnit}
8739 @param lu: the logical unit on whose behalf we execute
8740 @type instance: L{objects.Instance}
8741 @param instance: the instance whose disks we should remove
8742 @type target_node: string
8743 @param target_node: used to override the node on which to remove the disks
8745 @return: the success of the removal
8748 logging.info("Removing block devices for instance %s", instance.name)
8751 for device in instance.disks:
8753 edata = [(target_node, device)]
8755 edata = device.ComputeNodeTree(instance.primary_node)
8756 for node, disk in edata:
8757 lu.cfg.SetDiskID(disk, node)
8758 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8760 lu.LogWarning("Could not remove block device %s on node %s,"
8761 " continuing anyway: %s", device.iv_name, node, msg)
8764 # if this is a DRBD disk, return its port to the pool
8765 if device.dev_type in constants.LDS_DRBD:
8766 tcp_port = device.logical_id[2]
8767 lu.cfg.AddTcpUdpPort(tcp_port)
8769 if instance.disk_template == constants.DT_FILE:
8770 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8774 tgt = instance.primary_node
8775 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8777 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8778 file_storage_dir, instance.primary_node, result.fail_msg)
8784 def _ComputeDiskSizePerVG(disk_template, disks):
8785 """Compute disk size requirements in the volume group
8788 def _compute(disks, payload):
8789 """Universal algorithm.
8794 vgs[disk[constants.IDISK_VG]] = \
8795 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8799 # Required free disk space as a function of disk and swap space
8801 constants.DT_DISKLESS: {},
8802 constants.DT_PLAIN: _compute(disks, 0),
8803 # 128 MB are added for drbd metadata for each disk
8804 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8805 constants.DT_FILE: {},
8806 constants.DT_SHARED_FILE: {},
8809 if disk_template not in req_size_dict:
8810 raise errors.ProgrammerError("Disk template '%s' size requirement"
8811 " is unknown" % disk_template)
8813 return req_size_dict[disk_template]
8816 def _ComputeDiskSize(disk_template, disks):
8817 """Compute disk size requirements in the volume group
8820 # Required free disk space as a function of disk and swap space
8822 constants.DT_DISKLESS: None,
8823 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8824 # 128 MB are added for drbd metadata for each disk
8826 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8827 constants.DT_FILE: None,
8828 constants.DT_SHARED_FILE: 0,
8829 constants.DT_BLOCK: 0,
8832 if disk_template not in req_size_dict:
8833 raise errors.ProgrammerError("Disk template '%s' size requirement"
8834 " is unknown" % disk_template)
8836 return req_size_dict[disk_template]
8839 def _FilterVmNodes(lu, nodenames):
8840 """Filters out non-vm_capable nodes from a list.
8842 @type lu: L{LogicalUnit}
8843 @param lu: the logical unit for which we check
8844 @type nodenames: list
8845 @param nodenames: the list of nodes on which we should check
8847 @return: the list of vm-capable nodes
8850 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8851 return [name for name in nodenames if name not in vm_nodes]
8854 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8855 """Hypervisor parameter validation.
8857 This function abstract the hypervisor parameter validation to be
8858 used in both instance create and instance modify.
8860 @type lu: L{LogicalUnit}
8861 @param lu: the logical unit for which we check
8862 @type nodenames: list
8863 @param nodenames: the list of nodes on which we should check
8864 @type hvname: string
8865 @param hvname: the name of the hypervisor we should use
8866 @type hvparams: dict
8867 @param hvparams: the parameters which we need to check
8868 @raise errors.OpPrereqError: if the parameters are not valid
8871 nodenames = _FilterVmNodes(lu, nodenames)
8873 cluster = lu.cfg.GetClusterInfo()
8874 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8876 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8877 for node in nodenames:
8881 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8884 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8885 """OS parameters validation.
8887 @type lu: L{LogicalUnit}
8888 @param lu: the logical unit for which we check
8889 @type required: boolean
8890 @param required: whether the validation should fail if the OS is not
8892 @type nodenames: list
8893 @param nodenames: the list of nodes on which we should check
8894 @type osname: string
8895 @param osname: the name of the hypervisor we should use
8896 @type osparams: dict
8897 @param osparams: the parameters which we need to check
8898 @raise errors.OpPrereqError: if the parameters are not valid
8901 nodenames = _FilterVmNodes(lu, nodenames)
8902 result = lu.rpc.call_os_validate(nodenames, required, osname,
8903 [constants.OS_VALIDATE_PARAMETERS],
8905 for node, nres in result.items():
8906 # we don't check for offline cases since this should be run only
8907 # against the master node and/or an instance's nodes
8908 nres.Raise("OS Parameters validation failed on node %s" % node)
8909 if not nres.payload:
8910 lu.LogInfo("OS %s not found on node %s, validation skipped",
8914 class LUInstanceCreate(LogicalUnit):
8915 """Create an instance.
8918 HPATH = "instance-add"
8919 HTYPE = constants.HTYPE_INSTANCE
8922 def CheckArguments(self):
8926 # do not require name_check to ease forward/backward compatibility
8928 if self.op.no_install and self.op.start:
8929 self.LogInfo("No-installation mode selected, disabling startup")
8930 self.op.start = False
8931 # validate/normalize the instance name
8932 self.op.instance_name = \
8933 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8935 if self.op.ip_check and not self.op.name_check:
8936 # TODO: make the ip check more flexible and not depend on the name check
8937 raise errors.OpPrereqError("Cannot do IP address check without a name"
8938 " check", errors.ECODE_INVAL)
8940 # check nics' parameter names
8941 for nic in self.op.nics:
8942 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8944 # check disks. parameter names and consistent adopt/no-adopt strategy
8945 has_adopt = has_no_adopt = False
8946 for disk in self.op.disks:
8947 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8948 if constants.IDISK_ADOPT in disk:
8952 if has_adopt and has_no_adopt:
8953 raise errors.OpPrereqError("Either all disks are adopted or none is",
8956 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8957 raise errors.OpPrereqError("Disk adoption is not supported for the"
8958 " '%s' disk template" %
8959 self.op.disk_template,
8961 if self.op.iallocator is not None:
8962 raise errors.OpPrereqError("Disk adoption not allowed with an"
8963 " iallocator script", errors.ECODE_INVAL)
8964 if self.op.mode == constants.INSTANCE_IMPORT:
8965 raise errors.OpPrereqError("Disk adoption not allowed for"
8966 " instance import", errors.ECODE_INVAL)
8968 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8969 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8970 " but no 'adopt' parameter given" %
8971 self.op.disk_template,
8974 self.adopt_disks = has_adopt
8976 # instance name verification
8977 if self.op.name_check:
8978 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8979 self.op.instance_name = self.hostname1.name
8980 # used in CheckPrereq for ip ping check
8981 self.check_ip = self.hostname1.ip
8983 self.check_ip = None
8985 # file storage checks
8986 if (self.op.file_driver and
8987 not self.op.file_driver in constants.FILE_DRIVER):
8988 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8989 self.op.file_driver, errors.ECODE_INVAL)
8991 if self.op.disk_template == constants.DT_FILE:
8992 opcodes.RequireFileStorage()
8993 elif self.op.disk_template == constants.DT_SHARED_FILE:
8994 opcodes.RequireSharedFileStorage()
8996 ### Node/iallocator related checks
8997 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8999 if self.op.pnode is not None:
9000 if self.op.disk_template in constants.DTS_INT_MIRROR:
9001 if self.op.snode is None:
9002 raise errors.OpPrereqError("The networked disk templates need"
9003 " a mirror node", errors.ECODE_INVAL)
9005 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9007 self.op.snode = None
9009 self._cds = _GetClusterDomainSecret()
9011 if self.op.mode == constants.INSTANCE_IMPORT:
9012 # On import force_variant must be True, because if we forced it at
9013 # initial install, our only chance when importing it back is that it
9015 self.op.force_variant = True
9017 if self.op.no_install:
9018 self.LogInfo("No-installation mode has no effect during import")
9020 elif self.op.mode == constants.INSTANCE_CREATE:
9021 if self.op.os_type is None:
9022 raise errors.OpPrereqError("No guest OS specified",
9024 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9025 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9026 " installation" % self.op.os_type,
9028 if self.op.disk_template is None:
9029 raise errors.OpPrereqError("No disk template specified",
9032 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9033 # Check handshake to ensure both clusters have the same domain secret
9034 src_handshake = self.op.source_handshake
9035 if not src_handshake:
9036 raise errors.OpPrereqError("Missing source handshake",
9039 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9042 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9045 # Load and check source CA
9046 self.source_x509_ca_pem = self.op.source_x509_ca
9047 if not self.source_x509_ca_pem:
9048 raise errors.OpPrereqError("Missing source X509 CA",
9052 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9054 except OpenSSL.crypto.Error, err:
9055 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9056 (err, ), errors.ECODE_INVAL)
9058 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9059 if errcode is not None:
9060 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9063 self.source_x509_ca = cert
9065 src_instance_name = self.op.source_instance_name
9066 if not src_instance_name:
9067 raise errors.OpPrereqError("Missing source instance name",
9070 self.source_instance_name = \
9071 netutils.GetHostname(name=src_instance_name).name
9074 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9075 self.op.mode, errors.ECODE_INVAL)
9077 def ExpandNames(self):
9078 """ExpandNames for CreateInstance.
9080 Figure out the right locks for instance creation.
9083 self.needed_locks = {}
9085 instance_name = self.op.instance_name
9086 # this is just a preventive check, but someone might still add this
9087 # instance in the meantime, and creation will fail at lock-add time
9088 if instance_name in self.cfg.GetInstanceList():
9089 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9090 instance_name, errors.ECODE_EXISTS)
9092 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9094 if self.op.iallocator:
9095 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9096 # specifying a group on instance creation and then selecting nodes from
9098 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9099 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9101 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9102 nodelist = [self.op.pnode]
9103 if self.op.snode is not None:
9104 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9105 nodelist.append(self.op.snode)
9106 self.needed_locks[locking.LEVEL_NODE] = nodelist
9107 # Lock resources of instance's primary and secondary nodes (copy to
9108 # prevent accidential modification)
9109 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9111 # in case of import lock the source node too
9112 if self.op.mode == constants.INSTANCE_IMPORT:
9113 src_node = self.op.src_node
9114 src_path = self.op.src_path
9116 if src_path is None:
9117 self.op.src_path = src_path = self.op.instance_name
9119 if src_node is None:
9120 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9121 self.op.src_node = None
9122 if os.path.isabs(src_path):
9123 raise errors.OpPrereqError("Importing an instance from a path"
9124 " requires a source node option",
9127 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9128 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9129 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9130 if not os.path.isabs(src_path):
9131 self.op.src_path = src_path = \
9132 utils.PathJoin(constants.EXPORT_DIR, src_path)
9134 def _RunAllocator(self):
9135 """Run the allocator based on input opcode.
9138 nics = [n.ToDict() for n in self.nics]
9139 ial = IAllocator(self.cfg, self.rpc,
9140 mode=constants.IALLOCATOR_MODE_ALLOC,
9141 name=self.op.instance_name,
9142 disk_template=self.op.disk_template,
9145 vcpus=self.be_full[constants.BE_VCPUS],
9146 memory=self.be_full[constants.BE_MAXMEM],
9149 hypervisor=self.op.hypervisor,
9152 ial.Run(self.op.iallocator)
9155 raise errors.OpPrereqError("Can't compute nodes using"
9156 " iallocator '%s': %s" %
9157 (self.op.iallocator, ial.info),
9159 if len(ial.result) != ial.required_nodes:
9160 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9161 " of nodes (%s), required %s" %
9162 (self.op.iallocator, len(ial.result),
9163 ial.required_nodes), errors.ECODE_FAULT)
9164 self.op.pnode = ial.result[0]
9165 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9166 self.op.instance_name, self.op.iallocator,
9167 utils.CommaJoin(ial.result))
9168 if ial.required_nodes == 2:
9169 self.op.snode = ial.result[1]
9171 def BuildHooksEnv(self):
9174 This runs on master, primary and secondary nodes of the instance.
9178 "ADD_MODE": self.op.mode,
9180 if self.op.mode == constants.INSTANCE_IMPORT:
9181 env["SRC_NODE"] = self.op.src_node
9182 env["SRC_PATH"] = self.op.src_path
9183 env["SRC_IMAGES"] = self.src_images
9185 env.update(_BuildInstanceHookEnv(
9186 name=self.op.instance_name,
9187 primary_node=self.op.pnode,
9188 secondary_nodes=self.secondaries,
9189 status=self.op.start,
9190 os_type=self.op.os_type,
9191 minmem=self.be_full[constants.BE_MINMEM],
9192 maxmem=self.be_full[constants.BE_MAXMEM],
9193 vcpus=self.be_full[constants.BE_VCPUS],
9194 nics=_NICListToTuple(self, self.nics),
9195 disk_template=self.op.disk_template,
9196 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9197 for d in self.disks],
9200 hypervisor_name=self.op.hypervisor,
9206 def BuildHooksNodes(self):
9207 """Build hooks nodes.
9210 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9213 def _ReadExportInfo(self):
9214 """Reads the export information from disk.
9216 It will override the opcode source node and path with the actual
9217 information, if these two were not specified before.
9219 @return: the export information
9222 assert self.op.mode == constants.INSTANCE_IMPORT
9224 src_node = self.op.src_node
9225 src_path = self.op.src_path
9227 if src_node is None:
9228 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9229 exp_list = self.rpc.call_export_list(locked_nodes)
9231 for node in exp_list:
9232 if exp_list[node].fail_msg:
9234 if src_path in exp_list[node].payload:
9236 self.op.src_node = src_node = node
9237 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9241 raise errors.OpPrereqError("No export found for relative path %s" %
9242 src_path, errors.ECODE_INVAL)
9244 _CheckNodeOnline(self, src_node)
9245 result = self.rpc.call_export_info(src_node, src_path)
9246 result.Raise("No export or invalid export found in dir %s" % src_path)
9248 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9249 if not export_info.has_section(constants.INISECT_EXP):
9250 raise errors.ProgrammerError("Corrupted export config",
9251 errors.ECODE_ENVIRON)
9253 ei_version = export_info.get(constants.INISECT_EXP, "version")
9254 if (int(ei_version) != constants.EXPORT_VERSION):
9255 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9256 (ei_version, constants.EXPORT_VERSION),
9257 errors.ECODE_ENVIRON)
9260 def _ReadExportParams(self, einfo):
9261 """Use export parameters as defaults.
9263 In case the opcode doesn't specify (as in override) some instance
9264 parameters, then try to use them from the export information, if
9268 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9270 if self.op.disk_template is None:
9271 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9272 self.op.disk_template = einfo.get(constants.INISECT_INS,
9274 if self.op.disk_template not in constants.DISK_TEMPLATES:
9275 raise errors.OpPrereqError("Disk template specified in configuration"
9276 " file is not one of the allowed values:"
9277 " %s" % " ".join(constants.DISK_TEMPLATES))
9279 raise errors.OpPrereqError("No disk template specified and the export"
9280 " is missing the disk_template information",
9283 if not self.op.disks:
9285 # TODO: import the disk iv_name too
9286 for idx in range(constants.MAX_DISKS):
9287 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9288 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9289 disks.append({constants.IDISK_SIZE: disk_sz})
9290 self.op.disks = disks
9291 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9292 raise errors.OpPrereqError("No disk info specified and the export"
9293 " is missing the disk information",
9296 if not self.op.nics:
9298 for idx in range(constants.MAX_NICS):
9299 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9301 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9302 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9309 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9310 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9312 if (self.op.hypervisor is None and
9313 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9314 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9316 if einfo.has_section(constants.INISECT_HYP):
9317 # use the export parameters but do not override the ones
9318 # specified by the user
9319 for name, value in einfo.items(constants.INISECT_HYP):
9320 if name not in self.op.hvparams:
9321 self.op.hvparams[name] = value
9323 if einfo.has_section(constants.INISECT_BEP):
9324 # use the parameters, without overriding
9325 for name, value in einfo.items(constants.INISECT_BEP):
9326 if name not in self.op.beparams:
9327 self.op.beparams[name] = value
9328 # Compatibility for the old "memory" be param
9329 if name == constants.BE_MEMORY:
9330 if constants.BE_MAXMEM not in self.op.beparams:
9331 self.op.beparams[constants.BE_MAXMEM] = value
9332 if constants.BE_MINMEM not in self.op.beparams:
9333 self.op.beparams[constants.BE_MINMEM] = value
9335 # try to read the parameters old style, from the main section
9336 for name in constants.BES_PARAMETERS:
9337 if (name not in self.op.beparams and
9338 einfo.has_option(constants.INISECT_INS, name)):
9339 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9341 if einfo.has_section(constants.INISECT_OSP):
9342 # use the parameters, without overriding
9343 for name, value in einfo.items(constants.INISECT_OSP):
9344 if name not in self.op.osparams:
9345 self.op.osparams[name] = value
9347 def _RevertToDefaults(self, cluster):
9348 """Revert the instance parameters to the default values.
9352 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9353 for name in self.op.hvparams.keys():
9354 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9355 del self.op.hvparams[name]
9357 be_defs = cluster.SimpleFillBE({})
9358 for name in self.op.beparams.keys():
9359 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9360 del self.op.beparams[name]
9362 nic_defs = cluster.SimpleFillNIC({})
9363 for nic in self.op.nics:
9364 for name in constants.NICS_PARAMETERS:
9365 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9368 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9369 for name in self.op.osparams.keys():
9370 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9371 del self.op.osparams[name]
9373 def _CalculateFileStorageDir(self):
9374 """Calculate final instance file storage dir.
9377 # file storage dir calculation/check
9378 self.instance_file_storage_dir = None
9379 if self.op.disk_template in constants.DTS_FILEBASED:
9380 # build the full file storage dir path
9383 if self.op.disk_template == constants.DT_SHARED_FILE:
9384 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9386 get_fsd_fn = self.cfg.GetFileStorageDir
9388 cfg_storagedir = get_fsd_fn()
9389 if not cfg_storagedir:
9390 raise errors.OpPrereqError("Cluster file storage dir not defined")
9391 joinargs.append(cfg_storagedir)
9393 if self.op.file_storage_dir is not None:
9394 joinargs.append(self.op.file_storage_dir)
9396 joinargs.append(self.op.instance_name)
9398 # pylint: disable=W0142
9399 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9401 def CheckPrereq(self): # pylint: disable=R0914
9402 """Check prerequisites.
9405 self._CalculateFileStorageDir()
9407 if self.op.mode == constants.INSTANCE_IMPORT:
9408 export_info = self._ReadExportInfo()
9409 self._ReadExportParams(export_info)
9411 if (not self.cfg.GetVGName() and
9412 self.op.disk_template not in constants.DTS_NOT_LVM):
9413 raise errors.OpPrereqError("Cluster does not support lvm-based"
9414 " instances", errors.ECODE_STATE)
9416 if (self.op.hypervisor is None or
9417 self.op.hypervisor == constants.VALUE_AUTO):
9418 self.op.hypervisor = self.cfg.GetHypervisorType()
9420 cluster = self.cfg.GetClusterInfo()
9421 enabled_hvs = cluster.enabled_hypervisors
9422 if self.op.hypervisor not in enabled_hvs:
9423 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9424 " cluster (%s)" % (self.op.hypervisor,
9425 ",".join(enabled_hvs)),
9428 # Check tag validity
9429 for tag in self.op.tags:
9430 objects.TaggableObject.ValidateTag(tag)
9432 # check hypervisor parameter syntax (locally)
9433 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9434 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9436 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9437 hv_type.CheckParameterSyntax(filled_hvp)
9438 self.hv_full = filled_hvp
9439 # check that we don't specify global parameters on an instance
9440 _CheckGlobalHvParams(self.op.hvparams)
9442 # fill and remember the beparams dict
9443 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9444 for param, value in self.op.beparams.iteritems():
9445 if value == constants.VALUE_AUTO:
9446 self.op.beparams[param] = default_beparams[param]
9447 objects.UpgradeBeParams(self.op.beparams)
9448 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9449 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9451 # build os parameters
9452 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9454 # now that hvp/bep are in final format, let's reset to defaults,
9456 if self.op.identify_defaults:
9457 self._RevertToDefaults(cluster)
9461 for idx, nic in enumerate(self.op.nics):
9462 nic_mode_req = nic.get(constants.INIC_MODE, None)
9463 nic_mode = nic_mode_req
9464 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9465 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9467 # in routed mode, for the first nic, the default ip is 'auto'
9468 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9469 default_ip_mode = constants.VALUE_AUTO
9471 default_ip_mode = constants.VALUE_NONE
9473 # ip validity checks
9474 ip = nic.get(constants.INIC_IP, default_ip_mode)
9475 if ip is None or ip.lower() == constants.VALUE_NONE:
9477 elif ip.lower() == constants.VALUE_AUTO:
9478 if not self.op.name_check:
9479 raise errors.OpPrereqError("IP address set to auto but name checks"
9480 " have been skipped",
9482 nic_ip = self.hostname1.ip
9484 if not netutils.IPAddress.IsValid(ip):
9485 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9489 # TODO: check the ip address for uniqueness
9490 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9491 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9494 # MAC address verification
9495 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9496 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9497 mac = utils.NormalizeAndValidateMac(mac)
9500 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9501 except errors.ReservationError:
9502 raise errors.OpPrereqError("MAC address %s already in use"
9503 " in cluster" % mac,
9504 errors.ECODE_NOTUNIQUE)
9506 # Build nic parameters
9507 link = nic.get(constants.INIC_LINK, None)
9508 if link == constants.VALUE_AUTO:
9509 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9512 nicparams[constants.NIC_MODE] = nic_mode
9514 nicparams[constants.NIC_LINK] = link
9516 check_params = cluster.SimpleFillNIC(nicparams)
9517 objects.NIC.CheckParameterSyntax(check_params)
9518 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9520 # disk checks/pre-build
9521 default_vg = self.cfg.GetVGName()
9523 for disk in self.op.disks:
9524 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9525 if mode not in constants.DISK_ACCESS_SET:
9526 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9527 mode, errors.ECODE_INVAL)
9528 size = disk.get(constants.IDISK_SIZE, None)
9530 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9533 except (TypeError, ValueError):
9534 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9537 data_vg = disk.get(constants.IDISK_VG, default_vg)
9539 constants.IDISK_SIZE: size,
9540 constants.IDISK_MODE: mode,
9541 constants.IDISK_VG: data_vg,
9543 if constants.IDISK_METAVG in disk:
9544 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9545 if constants.IDISK_ADOPT in disk:
9546 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9547 self.disks.append(new_disk)
9549 if self.op.mode == constants.INSTANCE_IMPORT:
9551 for idx in range(len(self.disks)):
9552 option = "disk%d_dump" % idx
9553 if export_info.has_option(constants.INISECT_INS, option):
9554 # FIXME: are the old os-es, disk sizes, etc. useful?
9555 export_name = export_info.get(constants.INISECT_INS, option)
9556 image = utils.PathJoin(self.op.src_path, export_name)
9557 disk_images.append(image)
9559 disk_images.append(False)
9561 self.src_images = disk_images
9563 old_name = export_info.get(constants.INISECT_INS, "name")
9564 if self.op.instance_name == old_name:
9565 for idx, nic in enumerate(self.nics):
9566 if nic.mac == constants.VALUE_AUTO:
9567 nic_mac_ini = "nic%d_mac" % idx
9568 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9570 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9572 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9573 if self.op.ip_check:
9574 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9575 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9576 (self.check_ip, self.op.instance_name),
9577 errors.ECODE_NOTUNIQUE)
9579 #### mac address generation
9580 # By generating here the mac address both the allocator and the hooks get
9581 # the real final mac address rather than the 'auto' or 'generate' value.
9582 # There is a race condition between the generation and the instance object
9583 # creation, which means that we know the mac is valid now, but we're not
9584 # sure it will be when we actually add the instance. If things go bad
9585 # adding the instance will abort because of a duplicate mac, and the
9586 # creation job will fail.
9587 for nic in self.nics:
9588 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9589 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9593 if self.op.iallocator is not None:
9594 self._RunAllocator()
9596 # Release all unneeded node locks
9597 _ReleaseLocks(self, locking.LEVEL_NODE,
9598 keep=filter(None, [self.op.pnode, self.op.snode,
9601 #### node related checks
9603 # check primary node
9604 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9605 assert self.pnode is not None, \
9606 "Cannot retrieve locked node %s" % self.op.pnode
9608 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9609 pnode.name, errors.ECODE_STATE)
9611 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9612 pnode.name, errors.ECODE_STATE)
9613 if not pnode.vm_capable:
9614 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9615 " '%s'" % pnode.name, errors.ECODE_STATE)
9617 self.secondaries = []
9619 # mirror node verification
9620 if self.op.disk_template in constants.DTS_INT_MIRROR:
9621 if self.op.snode == pnode.name:
9622 raise errors.OpPrereqError("The secondary node cannot be the"
9623 " primary node", errors.ECODE_INVAL)
9624 _CheckNodeOnline(self, self.op.snode)
9625 _CheckNodeNotDrained(self, self.op.snode)
9626 _CheckNodeVmCapable(self, self.op.snode)
9627 self.secondaries.append(self.op.snode)
9629 snode = self.cfg.GetNodeInfo(self.op.snode)
9630 if pnode.group != snode.group:
9631 self.LogWarning("The primary and secondary nodes are in two"
9632 " different node groups; the disk parameters"
9633 " from the first disk's node group will be"
9636 nodenames = [pnode.name] + self.secondaries
9638 # Verify instance specs
9640 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9641 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9642 constants.ISPEC_DISK_COUNT: len(self.disks),
9643 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
9644 constants.ISPEC_NIC_COUNT: len(self.nics),
9647 ipolicy = _CalculateGroupIPolicy(cluster, pnode.group)
9648 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9649 if not self.op.ignore_ipolicy and res:
9650 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9651 " policy: %s") % (pnode.group,
9652 utils.CommaJoin(res)),
9655 # disk parameters (not customizable at instance or node level)
9656 # just use the primary node parameters, ignoring the secondary.
9657 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9659 if not self.adopt_disks:
9660 # Check lv size requirements, if not adopting
9661 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9662 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9664 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9665 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9666 disk[constants.IDISK_ADOPT])
9667 for disk in self.disks])
9668 if len(all_lvs) != len(self.disks):
9669 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9671 for lv_name in all_lvs:
9673 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9674 # to ReserveLV uses the same syntax
9675 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9676 except errors.ReservationError:
9677 raise errors.OpPrereqError("LV named %s used by another instance" %
9678 lv_name, errors.ECODE_NOTUNIQUE)
9680 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9681 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9683 node_lvs = self.rpc.call_lv_list([pnode.name],
9684 vg_names.payload.keys())[pnode.name]
9685 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9686 node_lvs = node_lvs.payload
9688 delta = all_lvs.difference(node_lvs.keys())
9690 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9691 utils.CommaJoin(delta),
9693 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9695 raise errors.OpPrereqError("Online logical volumes found, cannot"
9696 " adopt: %s" % utils.CommaJoin(online_lvs),
9698 # update the size of disk based on what is found
9699 for dsk in self.disks:
9700 dsk[constants.IDISK_SIZE] = \
9701 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9702 dsk[constants.IDISK_ADOPT])][0]))
9704 elif self.op.disk_template == constants.DT_BLOCK:
9705 # Normalize and de-duplicate device paths
9706 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9707 for disk in self.disks])
9708 if len(all_disks) != len(self.disks):
9709 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9711 baddisks = [d for d in all_disks
9712 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9714 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9715 " cannot be adopted" %
9716 (", ".join(baddisks),
9717 constants.ADOPTABLE_BLOCKDEV_ROOT),
9720 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9721 list(all_disks))[pnode.name]
9722 node_disks.Raise("Cannot get block device information from node %s" %
9724 node_disks = node_disks.payload
9725 delta = all_disks.difference(node_disks.keys())
9727 raise errors.OpPrereqError("Missing block device(s): %s" %
9728 utils.CommaJoin(delta),
9730 for dsk in self.disks:
9731 dsk[constants.IDISK_SIZE] = \
9732 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9734 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9736 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9737 # check OS parameters (remotely)
9738 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9740 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9742 # memory check on primary node
9743 #TODO(dynmem): use MINMEM for checking
9745 _CheckNodeFreeMemory(self, self.pnode.name,
9746 "creating instance %s" % self.op.instance_name,
9747 self.be_full[constants.BE_MAXMEM],
9750 self.dry_run_result = list(nodenames)
9752 def Exec(self, feedback_fn):
9753 """Create and add the instance to the cluster.
9756 instance = self.op.instance_name
9757 pnode_name = self.pnode.name
9759 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9760 self.owned_locks(locking.LEVEL_NODE)), \
9761 "Node locks differ from node resource locks"
9763 ht_kind = self.op.hypervisor
9764 if ht_kind in constants.HTS_REQ_PORT:
9765 network_port = self.cfg.AllocatePort()
9769 disks = _GenerateDiskTemplate(self,
9770 self.op.disk_template,
9771 instance, pnode_name,
9774 self.instance_file_storage_dir,
9775 self.op.file_driver,
9780 iobj = objects.Instance(name=instance, os=self.op.os_type,
9781 primary_node=pnode_name,
9782 nics=self.nics, disks=disks,
9783 disk_template=self.op.disk_template,
9784 admin_state=constants.ADMINST_DOWN,
9785 network_port=network_port,
9786 beparams=self.op.beparams,
9787 hvparams=self.op.hvparams,
9788 hypervisor=self.op.hypervisor,
9789 osparams=self.op.osparams,
9793 for tag in self.op.tags:
9796 if self.adopt_disks:
9797 if self.op.disk_template == constants.DT_PLAIN:
9798 # rename LVs to the newly-generated names; we need to construct
9799 # 'fake' LV disks with the old data, plus the new unique_id
9800 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9802 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9803 rename_to.append(t_dsk.logical_id)
9804 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9805 self.cfg.SetDiskID(t_dsk, pnode_name)
9806 result = self.rpc.call_blockdev_rename(pnode_name,
9807 zip(tmp_disks, rename_to))
9808 result.Raise("Failed to rename adoped LVs")
9810 feedback_fn("* creating instance disks...")
9812 _CreateDisks(self, iobj)
9813 except errors.OpExecError:
9814 self.LogWarning("Device creation failed, reverting...")
9816 _RemoveDisks(self, iobj)
9818 self.cfg.ReleaseDRBDMinors(instance)
9821 feedback_fn("adding instance %s to cluster config" % instance)
9823 self.cfg.AddInstance(iobj, self.proc.GetECId())
9825 # Declare that we don't want to remove the instance lock anymore, as we've
9826 # added the instance to the config
9827 del self.remove_locks[locking.LEVEL_INSTANCE]
9829 if self.op.mode == constants.INSTANCE_IMPORT:
9830 # Release unused nodes
9831 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9834 _ReleaseLocks(self, locking.LEVEL_NODE)
9837 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9838 feedback_fn("* wiping instance disks...")
9840 _WipeDisks(self, iobj)
9841 except errors.OpExecError, err:
9842 logging.exception("Wiping disks failed")
9843 self.LogWarning("Wiping instance disks failed (%s)", err)
9847 # Something is already wrong with the disks, don't do anything else
9849 elif self.op.wait_for_sync:
9850 disk_abort = not _WaitForSync(self, iobj)
9851 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9852 # make sure the disks are not degraded (still sync-ing is ok)
9853 feedback_fn("* checking mirrors status")
9854 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9859 _RemoveDisks(self, iobj)
9860 self.cfg.RemoveInstance(iobj.name)
9861 # Make sure the instance lock gets removed
9862 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9863 raise errors.OpExecError("There are some degraded disks for"
9866 # Release all node resource locks
9867 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9869 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9870 if self.op.mode == constants.INSTANCE_CREATE:
9871 if not self.op.no_install:
9872 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9873 not self.op.wait_for_sync)
9875 feedback_fn("* pausing disk sync to install instance OS")
9876 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9878 for idx, success in enumerate(result.payload):
9880 logging.warn("pause-sync of instance %s for disk %d failed",
9883 feedback_fn("* running the instance OS create scripts...")
9884 # FIXME: pass debug option from opcode to backend
9886 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9887 self.op.debug_level)
9889 feedback_fn("* resuming disk sync")
9890 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9892 for idx, success in enumerate(result.payload):
9894 logging.warn("resume-sync of instance %s for disk %d failed",
9897 os_add_result.Raise("Could not add os for instance %s"
9898 " on node %s" % (instance, pnode_name))
9900 elif self.op.mode == constants.INSTANCE_IMPORT:
9901 feedback_fn("* running the instance OS import scripts...")
9905 for idx, image in enumerate(self.src_images):
9909 # FIXME: pass debug option from opcode to backend
9910 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9911 constants.IEIO_FILE, (image, ),
9912 constants.IEIO_SCRIPT,
9913 (iobj.disks[idx], idx),
9915 transfers.append(dt)
9918 masterd.instance.TransferInstanceData(self, feedback_fn,
9919 self.op.src_node, pnode_name,
9920 self.pnode.secondary_ip,
9922 if not compat.all(import_result):
9923 self.LogWarning("Some disks for instance %s on node %s were not"
9924 " imported successfully" % (instance, pnode_name))
9926 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9927 feedback_fn("* preparing remote import...")
9928 # The source cluster will stop the instance before attempting to make a
9929 # connection. In some cases stopping an instance can take a long time,
9930 # hence the shutdown timeout is added to the connection timeout.
9931 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9932 self.op.source_shutdown_timeout)
9933 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9935 assert iobj.primary_node == self.pnode.name
9937 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9938 self.source_x509_ca,
9939 self._cds, timeouts)
9940 if not compat.all(disk_results):
9941 # TODO: Should the instance still be started, even if some disks
9942 # failed to import (valid for local imports, too)?
9943 self.LogWarning("Some disks for instance %s on node %s were not"
9944 " imported successfully" % (instance, pnode_name))
9946 # Run rename script on newly imported instance
9947 assert iobj.name == instance
9948 feedback_fn("Running rename script for %s" % instance)
9949 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9950 self.source_instance_name,
9951 self.op.debug_level)
9953 self.LogWarning("Failed to run rename script for %s on node"
9954 " %s: %s" % (instance, pnode_name, result.fail_msg))
9957 # also checked in the prereq part
9958 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9961 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9964 iobj.admin_state = constants.ADMINST_UP
9965 self.cfg.Update(iobj, feedback_fn)
9966 logging.info("Starting instance %s on node %s", instance, pnode_name)
9967 feedback_fn("* starting instance...")
9968 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9970 result.Raise("Could not start instance")
9972 return list(iobj.all_nodes)
9975 class LUInstanceConsole(NoHooksLU):
9976 """Connect to an instance's console.
9978 This is somewhat special in that it returns the command line that
9979 you need to run on the master node in order to connect to the
9985 def ExpandNames(self):
9986 self.share_locks = _ShareAll()
9987 self._ExpandAndLockInstance()
9989 def CheckPrereq(self):
9990 """Check prerequisites.
9992 This checks that the instance is in the cluster.
9995 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9996 assert self.instance is not None, \
9997 "Cannot retrieve locked instance %s" % self.op.instance_name
9998 _CheckNodeOnline(self, self.instance.primary_node)
10000 def Exec(self, feedback_fn):
10001 """Connect to the console of an instance
10004 instance = self.instance
10005 node = instance.primary_node
10007 node_insts = self.rpc.call_instance_list([node],
10008 [instance.hypervisor])[node]
10009 node_insts.Raise("Can't get node information from %s" % node)
10011 if instance.name not in node_insts.payload:
10012 if instance.admin_state == constants.ADMINST_UP:
10013 state = constants.INSTST_ERRORDOWN
10014 elif instance.admin_state == constants.ADMINST_DOWN:
10015 state = constants.INSTST_ADMINDOWN
10017 state = constants.INSTST_ADMINOFFLINE
10018 raise errors.OpExecError("Instance %s is not running (state %s)" %
10019 (instance.name, state))
10021 logging.debug("Connecting to console of %s on %s", instance.name, node)
10023 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10026 def _GetInstanceConsole(cluster, instance):
10027 """Returns console information for an instance.
10029 @type cluster: L{objects.Cluster}
10030 @type instance: L{objects.Instance}
10034 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10035 # beparams and hvparams are passed separately, to avoid editing the
10036 # instance and then saving the defaults in the instance itself.
10037 hvparams = cluster.FillHV(instance)
10038 beparams = cluster.FillBE(instance)
10039 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10041 assert console.instance == instance.name
10042 assert console.Validate()
10044 return console.ToDict()
10047 class LUInstanceReplaceDisks(LogicalUnit):
10048 """Replace the disks of an instance.
10051 HPATH = "mirrors-replace"
10052 HTYPE = constants.HTYPE_INSTANCE
10055 def CheckArguments(self):
10056 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10057 self.op.iallocator)
10059 def ExpandNames(self):
10060 self._ExpandAndLockInstance()
10062 assert locking.LEVEL_NODE not in self.needed_locks
10063 assert locking.LEVEL_NODE_RES not in self.needed_locks
10064 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10066 assert self.op.iallocator is None or self.op.remote_node is None, \
10067 "Conflicting options"
10069 if self.op.remote_node is not None:
10070 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10072 # Warning: do not remove the locking of the new secondary here
10073 # unless DRBD8.AddChildren is changed to work in parallel;
10074 # currently it doesn't since parallel invocations of
10075 # FindUnusedMinor will conflict
10076 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10077 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10079 self.needed_locks[locking.LEVEL_NODE] = []
10080 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10082 if self.op.iallocator is not None:
10083 # iallocator will select a new node in the same group
10084 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10086 self.needed_locks[locking.LEVEL_NODE_RES] = []
10088 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10089 self.op.iallocator, self.op.remote_node,
10090 self.op.disks, False, self.op.early_release)
10092 self.tasklets = [self.replacer]
10094 def DeclareLocks(self, level):
10095 if level == locking.LEVEL_NODEGROUP:
10096 assert self.op.remote_node is None
10097 assert self.op.iallocator is not None
10098 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10100 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10101 # Lock all groups used by instance optimistically; this requires going
10102 # via the node before it's locked, requiring verification later on
10103 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10104 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10106 elif level == locking.LEVEL_NODE:
10107 if self.op.iallocator is not None:
10108 assert self.op.remote_node is None
10109 assert not self.needed_locks[locking.LEVEL_NODE]
10111 # Lock member nodes of all locked groups
10112 self.needed_locks[locking.LEVEL_NODE] = [node_name
10113 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10114 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10116 self._LockInstancesNodes()
10117 elif level == locking.LEVEL_NODE_RES:
10119 self.needed_locks[locking.LEVEL_NODE_RES] = \
10120 self.needed_locks[locking.LEVEL_NODE]
10122 def BuildHooksEnv(self):
10123 """Build hooks env.
10125 This runs on the master, the primary and all the secondaries.
10128 instance = self.replacer.instance
10130 "MODE": self.op.mode,
10131 "NEW_SECONDARY": self.op.remote_node,
10132 "OLD_SECONDARY": instance.secondary_nodes[0],
10134 env.update(_BuildInstanceHookEnvByObject(self, instance))
10137 def BuildHooksNodes(self):
10138 """Build hooks nodes.
10141 instance = self.replacer.instance
10143 self.cfg.GetMasterNode(),
10144 instance.primary_node,
10146 if self.op.remote_node is not None:
10147 nl.append(self.op.remote_node)
10150 def CheckPrereq(self):
10151 """Check prerequisites.
10154 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10155 self.op.iallocator is None)
10157 # Verify if node group locks are still correct
10158 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10160 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10162 return LogicalUnit.CheckPrereq(self)
10165 class TLReplaceDisks(Tasklet):
10166 """Replaces disks for an instance.
10168 Note: Locking is not within the scope of this class.
10171 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10172 disks, delay_iallocator, early_release):
10173 """Initializes this class.
10176 Tasklet.__init__(self, lu)
10179 self.instance_name = instance_name
10181 self.iallocator_name = iallocator_name
10182 self.remote_node = remote_node
10184 self.delay_iallocator = delay_iallocator
10185 self.early_release = early_release
10188 self.instance = None
10189 self.new_node = None
10190 self.target_node = None
10191 self.other_node = None
10192 self.remote_node_info = None
10193 self.node_secondary_ip = None
10196 def CheckArguments(mode, remote_node, iallocator):
10197 """Helper function for users of this class.
10200 # check for valid parameter combination
10201 if mode == constants.REPLACE_DISK_CHG:
10202 if remote_node is None and iallocator is None:
10203 raise errors.OpPrereqError("When changing the secondary either an"
10204 " iallocator script must be used or the"
10205 " new node given", errors.ECODE_INVAL)
10207 if remote_node is not None and iallocator is not None:
10208 raise errors.OpPrereqError("Give either the iallocator or the new"
10209 " secondary, not both", errors.ECODE_INVAL)
10211 elif remote_node is not None or iallocator is not None:
10212 # Not replacing the secondary
10213 raise errors.OpPrereqError("The iallocator and new node options can"
10214 " only be used when changing the"
10215 " secondary node", errors.ECODE_INVAL)
10218 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10219 """Compute a new secondary node using an IAllocator.
10222 ial = IAllocator(lu.cfg, lu.rpc,
10223 mode=constants.IALLOCATOR_MODE_RELOC,
10224 name=instance_name,
10225 relocate_from=list(relocate_from))
10227 ial.Run(iallocator_name)
10229 if not ial.success:
10230 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10231 " %s" % (iallocator_name, ial.info),
10232 errors.ECODE_NORES)
10234 if len(ial.result) != ial.required_nodes:
10235 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10236 " of nodes (%s), required %s" %
10238 len(ial.result), ial.required_nodes),
10239 errors.ECODE_FAULT)
10241 remote_node_name = ial.result[0]
10243 lu.LogInfo("Selected new secondary for instance '%s': %s",
10244 instance_name, remote_node_name)
10246 return remote_node_name
10248 def _FindFaultyDisks(self, node_name):
10249 """Wrapper for L{_FindFaultyInstanceDisks}.
10252 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10255 def _CheckDisksActivated(self, instance):
10256 """Checks if the instance disks are activated.
10258 @param instance: The instance to check disks
10259 @return: True if they are activated, False otherwise
10262 nodes = instance.all_nodes
10264 for idx, dev in enumerate(instance.disks):
10266 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10267 self.cfg.SetDiskID(dev, node)
10269 result = self.rpc.call_blockdev_find(node, dev)
10273 elif result.fail_msg or not result.payload:
10278 def CheckPrereq(self):
10279 """Check prerequisites.
10281 This checks that the instance is in the cluster.
10284 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10285 assert instance is not None, \
10286 "Cannot retrieve locked instance %s" % self.instance_name
10288 if instance.disk_template != constants.DT_DRBD8:
10289 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10290 " instances", errors.ECODE_INVAL)
10292 if len(instance.secondary_nodes) != 1:
10293 raise errors.OpPrereqError("The instance has a strange layout,"
10294 " expected one secondary but found %d" %
10295 len(instance.secondary_nodes),
10296 errors.ECODE_FAULT)
10298 if not self.delay_iallocator:
10299 self._CheckPrereq2()
10301 def _CheckPrereq2(self):
10302 """Check prerequisites, second part.
10304 This function should always be part of CheckPrereq. It was separated and is
10305 now called from Exec because during node evacuation iallocator was only
10306 called with an unmodified cluster model, not taking planned changes into
10310 instance = self.instance
10311 secondary_node = instance.secondary_nodes[0]
10313 if self.iallocator_name is None:
10314 remote_node = self.remote_node
10316 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10317 instance.name, instance.secondary_nodes)
10319 if remote_node is None:
10320 self.remote_node_info = None
10322 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10323 "Remote node '%s' is not locked" % remote_node
10325 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10326 assert self.remote_node_info is not None, \
10327 "Cannot retrieve locked node %s" % remote_node
10329 if remote_node == self.instance.primary_node:
10330 raise errors.OpPrereqError("The specified node is the primary node of"
10331 " the instance", errors.ECODE_INVAL)
10333 if remote_node == secondary_node:
10334 raise errors.OpPrereqError("The specified node is already the"
10335 " secondary node of the instance",
10336 errors.ECODE_INVAL)
10338 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10339 constants.REPLACE_DISK_CHG):
10340 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10341 errors.ECODE_INVAL)
10343 if self.mode == constants.REPLACE_DISK_AUTO:
10344 if not self._CheckDisksActivated(instance):
10345 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10346 " first" % self.instance_name,
10347 errors.ECODE_STATE)
10348 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10349 faulty_secondary = self._FindFaultyDisks(secondary_node)
10351 if faulty_primary and faulty_secondary:
10352 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10353 " one node and can not be repaired"
10354 " automatically" % self.instance_name,
10355 errors.ECODE_STATE)
10358 self.disks = faulty_primary
10359 self.target_node = instance.primary_node
10360 self.other_node = secondary_node
10361 check_nodes = [self.target_node, self.other_node]
10362 elif faulty_secondary:
10363 self.disks = faulty_secondary
10364 self.target_node = secondary_node
10365 self.other_node = instance.primary_node
10366 check_nodes = [self.target_node, self.other_node]
10372 # Non-automatic modes
10373 if self.mode == constants.REPLACE_DISK_PRI:
10374 self.target_node = instance.primary_node
10375 self.other_node = secondary_node
10376 check_nodes = [self.target_node, self.other_node]
10378 elif self.mode == constants.REPLACE_DISK_SEC:
10379 self.target_node = secondary_node
10380 self.other_node = instance.primary_node
10381 check_nodes = [self.target_node, self.other_node]
10383 elif self.mode == constants.REPLACE_DISK_CHG:
10384 self.new_node = remote_node
10385 self.other_node = instance.primary_node
10386 self.target_node = secondary_node
10387 check_nodes = [self.new_node, self.other_node]
10389 _CheckNodeNotDrained(self.lu, remote_node)
10390 _CheckNodeVmCapable(self.lu, remote_node)
10392 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10393 assert old_node_info is not None
10394 if old_node_info.offline and not self.early_release:
10395 # doesn't make sense to delay the release
10396 self.early_release = True
10397 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10398 " early-release mode", secondary_node)
10401 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10404 # If not specified all disks should be replaced
10406 self.disks = range(len(self.instance.disks))
10408 # TODO: compute disk parameters
10409 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10410 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10411 if primary_node_info.group != secondary_node_info.group:
10412 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10413 " different node groups; the disk parameters of the"
10414 " primary node's group will be applied.")
10416 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10418 for node in check_nodes:
10419 _CheckNodeOnline(self.lu, node)
10421 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10424 if node_name is not None)
10426 # Release unneeded node and node resource locks
10427 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10428 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10430 # Release any owned node group
10431 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10432 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10434 # Check whether disks are valid
10435 for disk_idx in self.disks:
10436 instance.FindDisk(disk_idx)
10438 # Get secondary node IP addresses
10439 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10440 in self.cfg.GetMultiNodeInfo(touched_nodes))
10442 def Exec(self, feedback_fn):
10443 """Execute disk replacement.
10445 This dispatches the disk replacement to the appropriate handler.
10448 if self.delay_iallocator:
10449 self._CheckPrereq2()
10452 # Verify owned locks before starting operation
10453 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10454 assert set(owned_nodes) == set(self.node_secondary_ip), \
10455 ("Incorrect node locks, owning %s, expected %s" %
10456 (owned_nodes, self.node_secondary_ip.keys()))
10457 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10458 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10460 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10461 assert list(owned_instances) == [self.instance_name], \
10462 "Instance '%s' not locked" % self.instance_name
10464 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10465 "Should not own any node group lock at this point"
10468 feedback_fn("No disks need replacement")
10471 feedback_fn("Replacing disk(s) %s for %s" %
10472 (utils.CommaJoin(self.disks), self.instance.name))
10474 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10476 # Activate the instance disks if we're replacing them on a down instance
10478 _StartInstanceDisks(self.lu, self.instance, True)
10481 # Should we replace the secondary node?
10482 if self.new_node is not None:
10483 fn = self._ExecDrbd8Secondary
10485 fn = self._ExecDrbd8DiskOnly
10487 result = fn(feedback_fn)
10489 # Deactivate the instance disks if we're replacing them on a
10492 _SafeShutdownInstanceDisks(self.lu, self.instance)
10494 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10497 # Verify owned locks
10498 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10499 nodes = frozenset(self.node_secondary_ip)
10500 assert ((self.early_release and not owned_nodes) or
10501 (not self.early_release and not (set(owned_nodes) - nodes))), \
10502 ("Not owning the correct locks, early_release=%s, owned=%r,"
10503 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10507 def _CheckVolumeGroup(self, nodes):
10508 self.lu.LogInfo("Checking volume groups")
10510 vgname = self.cfg.GetVGName()
10512 # Make sure volume group exists on all involved nodes
10513 results = self.rpc.call_vg_list(nodes)
10515 raise errors.OpExecError("Can't list volume groups on the nodes")
10518 res = results[node]
10519 res.Raise("Error checking node %s" % node)
10520 if vgname not in res.payload:
10521 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10524 def _CheckDisksExistence(self, nodes):
10525 # Check disk existence
10526 for idx, dev in enumerate(self.instance.disks):
10527 if idx not in self.disks:
10531 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10532 self.cfg.SetDiskID(dev, node)
10534 result = self.rpc.call_blockdev_find(node, dev)
10536 msg = result.fail_msg
10537 if msg or not result.payload:
10539 msg = "disk not found"
10540 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10543 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10544 for idx, dev in enumerate(self.instance.disks):
10545 if idx not in self.disks:
10548 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10551 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10553 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10554 " replace disks for instance %s" %
10555 (node_name, self.instance.name))
10557 def _CreateNewStorage(self, node_name):
10558 """Create new storage on the primary or secondary node.
10560 This is only used for same-node replaces, not for changing the
10561 secondary node, hence we don't want to modify the existing disk.
10566 for idx, dev in enumerate(self.instance.disks):
10567 if idx not in self.disks:
10570 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10572 self.cfg.SetDiskID(dev, node_name)
10574 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10575 names = _GenerateUniqueNames(self.lu, lv_names)
10577 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10579 vg_data = dev.children[0].logical_id[0]
10580 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10581 logical_id=(vg_data, names[0]), params=data_p)
10582 vg_meta = dev.children[1].logical_id[0]
10583 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10584 logical_id=(vg_meta, names[1]), params=meta_p)
10586 new_lvs = [lv_data, lv_meta]
10587 old_lvs = [child.Copy() for child in dev.children]
10588 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10590 # we pass force_create=True to force the LVM creation
10591 for new_lv in new_lvs:
10592 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10593 _GetInstanceInfoText(self.instance), False)
10597 def _CheckDevices(self, node_name, iv_names):
10598 for name, (dev, _, _) in iv_names.iteritems():
10599 self.cfg.SetDiskID(dev, node_name)
10601 result = self.rpc.call_blockdev_find(node_name, dev)
10603 msg = result.fail_msg
10604 if msg or not result.payload:
10606 msg = "disk not found"
10607 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10610 if result.payload.is_degraded:
10611 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10613 def _RemoveOldStorage(self, node_name, iv_names):
10614 for name, (_, old_lvs, _) in iv_names.iteritems():
10615 self.lu.LogInfo("Remove logical volumes for %s" % name)
10618 self.cfg.SetDiskID(lv, node_name)
10620 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10622 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10623 hint="remove unused LVs manually")
10625 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10626 """Replace a disk on the primary or secondary for DRBD 8.
10628 The algorithm for replace is quite complicated:
10630 1. for each disk to be replaced:
10632 1. create new LVs on the target node with unique names
10633 1. detach old LVs from the drbd device
10634 1. rename old LVs to name_replaced.<time_t>
10635 1. rename new LVs to old LVs
10636 1. attach the new LVs (with the old names now) to the drbd device
10638 1. wait for sync across all devices
10640 1. for each modified disk:
10642 1. remove old LVs (which have the name name_replaces.<time_t>)
10644 Failures are not very well handled.
10649 # Step: check device activation
10650 self.lu.LogStep(1, steps_total, "Check device existence")
10651 self._CheckDisksExistence([self.other_node, self.target_node])
10652 self._CheckVolumeGroup([self.target_node, self.other_node])
10654 # Step: check other node consistency
10655 self.lu.LogStep(2, steps_total, "Check peer consistency")
10656 self._CheckDisksConsistency(self.other_node,
10657 self.other_node == self.instance.primary_node,
10660 # Step: create new storage
10661 self.lu.LogStep(3, steps_total, "Allocate new storage")
10662 iv_names = self._CreateNewStorage(self.target_node)
10664 # Step: for each lv, detach+rename*2+attach
10665 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10666 for dev, old_lvs, new_lvs in iv_names.itervalues():
10667 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10669 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10671 result.Raise("Can't detach drbd from local storage on node"
10672 " %s for device %s" % (self.target_node, dev.iv_name))
10674 #cfg.Update(instance)
10676 # ok, we created the new LVs, so now we know we have the needed
10677 # storage; as such, we proceed on the target node to rename
10678 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10679 # using the assumption that logical_id == physical_id (which in
10680 # turn is the unique_id on that node)
10682 # FIXME(iustin): use a better name for the replaced LVs
10683 temp_suffix = int(time.time())
10684 ren_fn = lambda d, suff: (d.physical_id[0],
10685 d.physical_id[1] + "_replaced-%s" % suff)
10687 # Build the rename list based on what LVs exist on the node
10688 rename_old_to_new = []
10689 for to_ren in old_lvs:
10690 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10691 if not result.fail_msg and result.payload:
10693 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10695 self.lu.LogInfo("Renaming the old LVs on the target node")
10696 result = self.rpc.call_blockdev_rename(self.target_node,
10698 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10700 # Now we rename the new LVs to the old LVs
10701 self.lu.LogInfo("Renaming the new LVs on the target node")
10702 rename_new_to_old = [(new, old.physical_id)
10703 for old, new in zip(old_lvs, new_lvs)]
10704 result = self.rpc.call_blockdev_rename(self.target_node,
10706 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10708 # Intermediate steps of in memory modifications
10709 for old, new in zip(old_lvs, new_lvs):
10710 new.logical_id = old.logical_id
10711 self.cfg.SetDiskID(new, self.target_node)
10713 # We need to modify old_lvs so that removal later removes the
10714 # right LVs, not the newly added ones; note that old_lvs is a
10716 for disk in old_lvs:
10717 disk.logical_id = ren_fn(disk, temp_suffix)
10718 self.cfg.SetDiskID(disk, self.target_node)
10720 # Now that the new lvs have the old name, we can add them to the device
10721 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10722 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10724 msg = result.fail_msg
10726 for new_lv in new_lvs:
10727 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10730 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10731 hint=("cleanup manually the unused logical"
10733 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10735 cstep = itertools.count(5)
10737 if self.early_release:
10738 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10739 self._RemoveOldStorage(self.target_node, iv_names)
10740 # TODO: Check if releasing locks early still makes sense
10741 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10743 # Release all resource locks except those used by the instance
10744 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10745 keep=self.node_secondary_ip.keys())
10747 # Release all node locks while waiting for sync
10748 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10750 # TODO: Can the instance lock be downgraded here? Take the optional disk
10751 # shutdown in the caller into consideration.
10754 # This can fail as the old devices are degraded and _WaitForSync
10755 # does a combined result over all disks, so we don't check its return value
10756 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10757 _WaitForSync(self.lu, self.instance)
10759 # Check all devices manually
10760 self._CheckDevices(self.instance.primary_node, iv_names)
10762 # Step: remove old storage
10763 if not self.early_release:
10764 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10765 self._RemoveOldStorage(self.target_node, iv_names)
10767 def _ExecDrbd8Secondary(self, feedback_fn):
10768 """Replace the secondary node for DRBD 8.
10770 The algorithm for replace is quite complicated:
10771 - for all disks of the instance:
10772 - create new LVs on the new node with same names
10773 - shutdown the drbd device on the old secondary
10774 - disconnect the drbd network on the primary
10775 - create the drbd device on the new secondary
10776 - network attach the drbd on the primary, using an artifice:
10777 the drbd code for Attach() will connect to the network if it
10778 finds a device which is connected to the good local disks but
10779 not network enabled
10780 - wait for sync across all devices
10781 - remove all disks from the old secondary
10783 Failures are not very well handled.
10788 pnode = self.instance.primary_node
10790 # Step: check device activation
10791 self.lu.LogStep(1, steps_total, "Check device existence")
10792 self._CheckDisksExistence([self.instance.primary_node])
10793 self._CheckVolumeGroup([self.instance.primary_node])
10795 # Step: check other node consistency
10796 self.lu.LogStep(2, steps_total, "Check peer consistency")
10797 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10799 # Step: create new storage
10800 self.lu.LogStep(3, steps_total, "Allocate new storage")
10801 for idx, dev in enumerate(self.instance.disks):
10802 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10803 (self.new_node, idx))
10804 # we pass force_create=True to force LVM creation
10805 for new_lv in dev.children:
10806 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10807 _GetInstanceInfoText(self.instance), False)
10809 # Step 4: dbrd minors and drbd setups changes
10810 # after this, we must manually remove the drbd minors on both the
10811 # error and the success paths
10812 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10813 minors = self.cfg.AllocateDRBDMinor([self.new_node
10814 for dev in self.instance.disks],
10815 self.instance.name)
10816 logging.debug("Allocated minors %r", minors)
10819 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10820 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10821 (self.new_node, idx))
10822 # create new devices on new_node; note that we create two IDs:
10823 # one without port, so the drbd will be activated without
10824 # networking information on the new node at this stage, and one
10825 # with network, for the latter activation in step 4
10826 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10827 if self.instance.primary_node == o_node1:
10830 assert self.instance.primary_node == o_node2, "Three-node instance?"
10833 new_alone_id = (self.instance.primary_node, self.new_node, None,
10834 p_minor, new_minor, o_secret)
10835 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10836 p_minor, new_minor, o_secret)
10838 iv_names[idx] = (dev, dev.children, new_net_id)
10839 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10841 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10842 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10843 logical_id=new_alone_id,
10844 children=dev.children,
10846 params=drbd_params)
10848 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10849 _GetInstanceInfoText(self.instance), False)
10850 except errors.GenericError:
10851 self.cfg.ReleaseDRBDMinors(self.instance.name)
10854 # We have new devices, shutdown the drbd on the old secondary
10855 for idx, dev in enumerate(self.instance.disks):
10856 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10857 self.cfg.SetDiskID(dev, self.target_node)
10858 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10860 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10861 "node: %s" % (idx, msg),
10862 hint=("Please cleanup this device manually as"
10863 " soon as possible"))
10865 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10866 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10867 self.instance.disks)[pnode]
10869 msg = result.fail_msg
10871 # detaches didn't succeed (unlikely)
10872 self.cfg.ReleaseDRBDMinors(self.instance.name)
10873 raise errors.OpExecError("Can't detach the disks from the network on"
10874 " old node: %s" % (msg,))
10876 # if we managed to detach at least one, we update all the disks of
10877 # the instance to point to the new secondary
10878 self.lu.LogInfo("Updating instance configuration")
10879 for dev, _, new_logical_id in iv_names.itervalues():
10880 dev.logical_id = new_logical_id
10881 self.cfg.SetDiskID(dev, self.instance.primary_node)
10883 self.cfg.Update(self.instance, feedback_fn)
10885 # Release all node locks (the configuration has been updated)
10886 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10888 # and now perform the drbd attach
10889 self.lu.LogInfo("Attaching primary drbds to new secondary"
10890 " (standalone => connected)")
10891 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10893 self.node_secondary_ip,
10894 self.instance.disks,
10895 self.instance.name,
10897 for to_node, to_result in result.items():
10898 msg = to_result.fail_msg
10900 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10902 hint=("please do a gnt-instance info to see the"
10903 " status of disks"))
10905 cstep = itertools.count(5)
10907 if self.early_release:
10908 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10909 self._RemoveOldStorage(self.target_node, iv_names)
10910 # TODO: Check if releasing locks early still makes sense
10911 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10913 # Release all resource locks except those used by the instance
10914 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10915 keep=self.node_secondary_ip.keys())
10917 # TODO: Can the instance lock be downgraded here? Take the optional disk
10918 # shutdown in the caller into consideration.
10921 # This can fail as the old devices are degraded and _WaitForSync
10922 # does a combined result over all disks, so we don't check its return value
10923 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10924 _WaitForSync(self.lu, self.instance)
10926 # Check all devices manually
10927 self._CheckDevices(self.instance.primary_node, iv_names)
10929 # Step: remove old storage
10930 if not self.early_release:
10931 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10932 self._RemoveOldStorage(self.target_node, iv_names)
10935 class LURepairNodeStorage(NoHooksLU):
10936 """Repairs the volume group on a node.
10941 def CheckArguments(self):
10942 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10944 storage_type = self.op.storage_type
10946 if (constants.SO_FIX_CONSISTENCY not in
10947 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10948 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10949 " repaired" % storage_type,
10950 errors.ECODE_INVAL)
10952 def ExpandNames(self):
10953 self.needed_locks = {
10954 locking.LEVEL_NODE: [self.op.node_name],
10957 def _CheckFaultyDisks(self, instance, node_name):
10958 """Ensure faulty disks abort the opcode or at least warn."""
10960 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10962 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10963 " node '%s'" % (instance.name, node_name),
10964 errors.ECODE_STATE)
10965 except errors.OpPrereqError, err:
10966 if self.op.ignore_consistency:
10967 self.proc.LogWarning(str(err.args[0]))
10971 def CheckPrereq(self):
10972 """Check prerequisites.
10975 # Check whether any instance on this node has faulty disks
10976 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10977 if inst.admin_state != constants.ADMINST_UP:
10979 check_nodes = set(inst.all_nodes)
10980 check_nodes.discard(self.op.node_name)
10981 for inst_node_name in check_nodes:
10982 self._CheckFaultyDisks(inst, inst_node_name)
10984 def Exec(self, feedback_fn):
10985 feedback_fn("Repairing storage unit '%s' on %s ..." %
10986 (self.op.name, self.op.node_name))
10988 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10989 result = self.rpc.call_storage_execute(self.op.node_name,
10990 self.op.storage_type, st_args,
10992 constants.SO_FIX_CONSISTENCY)
10993 result.Raise("Failed to repair storage unit '%s' on %s" %
10994 (self.op.name, self.op.node_name))
10997 class LUNodeEvacuate(NoHooksLU):
10998 """Evacuates instances off a list of nodes.
11003 _MODE2IALLOCATOR = {
11004 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11005 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11006 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11008 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11009 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11010 constants.IALLOCATOR_NEVAC_MODES)
11012 def CheckArguments(self):
11013 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11015 def ExpandNames(self):
11016 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11018 if self.op.remote_node is not None:
11019 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11020 assert self.op.remote_node
11022 if self.op.remote_node == self.op.node_name:
11023 raise errors.OpPrereqError("Can not use evacuated node as a new"
11024 " secondary node", errors.ECODE_INVAL)
11026 if self.op.mode != constants.NODE_EVAC_SEC:
11027 raise errors.OpPrereqError("Without the use of an iallocator only"
11028 " secondary instances can be evacuated",
11029 errors.ECODE_INVAL)
11032 self.share_locks = _ShareAll()
11033 self.needed_locks = {
11034 locking.LEVEL_INSTANCE: [],
11035 locking.LEVEL_NODEGROUP: [],
11036 locking.LEVEL_NODE: [],
11039 # Determine nodes (via group) optimistically, needs verification once locks
11040 # have been acquired
11041 self.lock_nodes = self._DetermineNodes()
11043 def _DetermineNodes(self):
11044 """Gets the list of nodes to operate on.
11047 if self.op.remote_node is None:
11048 # Iallocator will choose any node(s) in the same group
11049 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11051 group_nodes = frozenset([self.op.remote_node])
11053 # Determine nodes to be locked
11054 return set([self.op.node_name]) | group_nodes
11056 def _DetermineInstances(self):
11057 """Builds list of instances to operate on.
11060 assert self.op.mode in constants.NODE_EVAC_MODES
11062 if self.op.mode == constants.NODE_EVAC_PRI:
11063 # Primary instances only
11064 inst_fn = _GetNodePrimaryInstances
11065 assert self.op.remote_node is None, \
11066 "Evacuating primary instances requires iallocator"
11067 elif self.op.mode == constants.NODE_EVAC_SEC:
11068 # Secondary instances only
11069 inst_fn = _GetNodeSecondaryInstances
11072 assert self.op.mode == constants.NODE_EVAC_ALL
11073 inst_fn = _GetNodeInstances
11074 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11076 raise errors.OpPrereqError("Due to an issue with the iallocator"
11077 " interface it is not possible to evacuate"
11078 " all instances at once; specify explicitly"
11079 " whether to evacuate primary or secondary"
11081 errors.ECODE_INVAL)
11083 return inst_fn(self.cfg, self.op.node_name)
11085 def DeclareLocks(self, level):
11086 if level == locking.LEVEL_INSTANCE:
11087 # Lock instances optimistically, needs verification once node and group
11088 # locks have been acquired
11089 self.needed_locks[locking.LEVEL_INSTANCE] = \
11090 set(i.name for i in self._DetermineInstances())
11092 elif level == locking.LEVEL_NODEGROUP:
11093 # Lock node groups for all potential target nodes optimistically, needs
11094 # verification once nodes have been acquired
11095 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11096 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11098 elif level == locking.LEVEL_NODE:
11099 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11101 def CheckPrereq(self):
11103 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11104 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11105 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11107 need_nodes = self._DetermineNodes()
11109 if not owned_nodes.issuperset(need_nodes):
11110 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11111 " locks were acquired, current nodes are"
11112 " are '%s', used to be '%s'; retry the"
11114 (self.op.node_name,
11115 utils.CommaJoin(need_nodes),
11116 utils.CommaJoin(owned_nodes)),
11117 errors.ECODE_STATE)
11119 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11120 if owned_groups != wanted_groups:
11121 raise errors.OpExecError("Node groups changed since locks were acquired,"
11122 " current groups are '%s', used to be '%s';"
11123 " retry the operation" %
11124 (utils.CommaJoin(wanted_groups),
11125 utils.CommaJoin(owned_groups)))
11127 # Determine affected instances
11128 self.instances = self._DetermineInstances()
11129 self.instance_names = [i.name for i in self.instances]
11131 if set(self.instance_names) != owned_instances:
11132 raise errors.OpExecError("Instances on node '%s' changed since locks"
11133 " were acquired, current instances are '%s',"
11134 " used to be '%s'; retry the operation" %
11135 (self.op.node_name,
11136 utils.CommaJoin(self.instance_names),
11137 utils.CommaJoin(owned_instances)))
11139 if self.instance_names:
11140 self.LogInfo("Evacuating instances from node '%s': %s",
11142 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11144 self.LogInfo("No instances to evacuate from node '%s'",
11147 if self.op.remote_node is not None:
11148 for i in self.instances:
11149 if i.primary_node == self.op.remote_node:
11150 raise errors.OpPrereqError("Node %s is the primary node of"
11151 " instance %s, cannot use it as"
11153 (self.op.remote_node, i.name),
11154 errors.ECODE_INVAL)
11156 def Exec(self, feedback_fn):
11157 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11159 if not self.instance_names:
11160 # No instances to evacuate
11163 elif self.op.iallocator is not None:
11164 # TODO: Implement relocation to other group
11165 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11166 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11167 instances=list(self.instance_names))
11169 ial.Run(self.op.iallocator)
11171 if not ial.success:
11172 raise errors.OpPrereqError("Can't compute node evacuation using"
11173 " iallocator '%s': %s" %
11174 (self.op.iallocator, ial.info),
11175 errors.ECODE_NORES)
11177 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11179 elif self.op.remote_node is not None:
11180 assert self.op.mode == constants.NODE_EVAC_SEC
11182 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11183 remote_node=self.op.remote_node,
11185 mode=constants.REPLACE_DISK_CHG,
11186 early_release=self.op.early_release)]
11187 for instance_name in self.instance_names
11191 raise errors.ProgrammerError("No iallocator or remote node")
11193 return ResultWithJobs(jobs)
11196 def _SetOpEarlyRelease(early_release, op):
11197 """Sets C{early_release} flag on opcodes if available.
11201 op.early_release = early_release
11202 except AttributeError:
11203 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11208 def _NodeEvacDest(use_nodes, group, nodes):
11209 """Returns group or nodes depending on caller's choice.
11213 return utils.CommaJoin(nodes)
11218 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11219 """Unpacks the result of change-group and node-evacuate iallocator requests.
11221 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11222 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11224 @type lu: L{LogicalUnit}
11225 @param lu: Logical unit instance
11226 @type alloc_result: tuple/list
11227 @param alloc_result: Result from iallocator
11228 @type early_release: bool
11229 @param early_release: Whether to release locks early if possible
11230 @type use_nodes: bool
11231 @param use_nodes: Whether to display node names instead of groups
11234 (moved, failed, jobs) = alloc_result
11237 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11238 for (name, reason) in failed)
11239 lu.LogWarning("Unable to evacuate instances %s", failreason)
11240 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11243 lu.LogInfo("Instances to be moved: %s",
11244 utils.CommaJoin("%s (to %s)" %
11245 (name, _NodeEvacDest(use_nodes, group, nodes))
11246 for (name, group, nodes) in moved))
11248 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11249 map(opcodes.OpCode.LoadOpCode, ops))
11253 class LUInstanceGrowDisk(LogicalUnit):
11254 """Grow a disk of an instance.
11257 HPATH = "disk-grow"
11258 HTYPE = constants.HTYPE_INSTANCE
11261 def ExpandNames(self):
11262 self._ExpandAndLockInstance()
11263 self.needed_locks[locking.LEVEL_NODE] = []
11264 self.needed_locks[locking.LEVEL_NODE_RES] = []
11265 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11267 def DeclareLocks(self, level):
11268 if level == locking.LEVEL_NODE:
11269 self._LockInstancesNodes()
11270 elif level == locking.LEVEL_NODE_RES:
11272 self.needed_locks[locking.LEVEL_NODE_RES] = \
11273 self.needed_locks[locking.LEVEL_NODE][:]
11275 def BuildHooksEnv(self):
11276 """Build hooks env.
11278 This runs on the master, the primary and all the secondaries.
11282 "DISK": self.op.disk,
11283 "AMOUNT": self.op.amount,
11285 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11288 def BuildHooksNodes(self):
11289 """Build hooks nodes.
11292 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11295 def CheckPrereq(self):
11296 """Check prerequisites.
11298 This checks that the instance is in the cluster.
11301 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11302 assert instance is not None, \
11303 "Cannot retrieve locked instance %s" % self.op.instance_name
11304 nodenames = list(instance.all_nodes)
11305 for node in nodenames:
11306 _CheckNodeOnline(self, node)
11308 self.instance = instance
11310 if instance.disk_template not in constants.DTS_GROWABLE:
11311 raise errors.OpPrereqError("Instance's disk layout does not support"
11312 " growing", errors.ECODE_INVAL)
11314 self.disk = instance.FindDisk(self.op.disk)
11316 if instance.disk_template not in (constants.DT_FILE,
11317 constants.DT_SHARED_FILE):
11318 # TODO: check the free disk space for file, when that feature will be
11320 _CheckNodesFreeDiskPerVG(self, nodenames,
11321 self.disk.ComputeGrowth(self.op.amount))
11323 def Exec(self, feedback_fn):
11324 """Execute disk grow.
11327 instance = self.instance
11330 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11331 assert (self.owned_locks(locking.LEVEL_NODE) ==
11332 self.owned_locks(locking.LEVEL_NODE_RES))
11334 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11336 raise errors.OpExecError("Cannot activate block device to grow")
11338 feedback_fn("Growing disk %s of instance '%s' by %s" %
11339 (self.op.disk, instance.name,
11340 utils.FormatUnit(self.op.amount, "h")))
11342 # First run all grow ops in dry-run mode
11343 for node in instance.all_nodes:
11344 self.cfg.SetDiskID(disk, node)
11345 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11346 result.Raise("Grow request failed to node %s" % node)
11348 # We know that (as far as we can test) operations across different
11349 # nodes will succeed, time to run it for real
11350 for node in instance.all_nodes:
11351 self.cfg.SetDiskID(disk, node)
11352 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11353 result.Raise("Grow request failed to node %s" % node)
11355 # TODO: Rewrite code to work properly
11356 # DRBD goes into sync mode for a short amount of time after executing the
11357 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11358 # calling "resize" in sync mode fails. Sleeping for a short amount of
11359 # time is a work-around.
11362 disk.RecordGrow(self.op.amount)
11363 self.cfg.Update(instance, feedback_fn)
11365 # Changes have been recorded, release node lock
11366 _ReleaseLocks(self, locking.LEVEL_NODE)
11368 # Downgrade lock while waiting for sync
11369 self.glm.downgrade(locking.LEVEL_INSTANCE)
11371 if self.op.wait_for_sync:
11372 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11374 self.proc.LogWarning("Disk sync-ing has not returned a good"
11375 " status; please check the instance")
11376 if instance.admin_state != constants.ADMINST_UP:
11377 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11378 elif instance.admin_state != constants.ADMINST_UP:
11379 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11380 " not supposed to be running because no wait for"
11381 " sync mode was requested")
11383 assert self.owned_locks(locking.LEVEL_NODE_RES)
11384 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11387 class LUInstanceQueryData(NoHooksLU):
11388 """Query runtime instance data.
11393 def ExpandNames(self):
11394 self.needed_locks = {}
11396 # Use locking if requested or when non-static information is wanted
11397 if not (self.op.static or self.op.use_locking):
11398 self.LogWarning("Non-static data requested, locks need to be acquired")
11399 self.op.use_locking = True
11401 if self.op.instances or not self.op.use_locking:
11402 # Expand instance names right here
11403 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11405 # Will use acquired locks
11406 self.wanted_names = None
11408 if self.op.use_locking:
11409 self.share_locks = _ShareAll()
11411 if self.wanted_names is None:
11412 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11414 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11416 self.needed_locks[locking.LEVEL_NODE] = []
11417 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11419 def DeclareLocks(self, level):
11420 if self.op.use_locking and level == locking.LEVEL_NODE:
11421 self._LockInstancesNodes()
11423 def CheckPrereq(self):
11424 """Check prerequisites.
11426 This only checks the optional instance list against the existing names.
11429 if self.wanted_names is None:
11430 assert self.op.use_locking, "Locking was not used"
11431 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11433 self.wanted_instances = \
11434 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11436 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11437 """Returns the status of a block device
11440 if self.op.static or not node:
11443 self.cfg.SetDiskID(dev, node)
11445 result = self.rpc.call_blockdev_find(node, dev)
11449 result.Raise("Can't compute disk status for %s" % instance_name)
11451 status = result.payload
11455 return (status.dev_path, status.major, status.minor,
11456 status.sync_percent, status.estimated_time,
11457 status.is_degraded, status.ldisk_status)
11459 def _ComputeDiskStatus(self, instance, snode, dev):
11460 """Compute block device status.
11463 if dev.dev_type in constants.LDS_DRBD:
11464 # we change the snode then (otherwise we use the one passed in)
11465 if dev.logical_id[0] == instance.primary_node:
11466 snode = dev.logical_id[1]
11468 snode = dev.logical_id[0]
11470 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11471 instance.name, dev)
11472 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11475 dev_children = map(compat.partial(self._ComputeDiskStatus,
11482 "iv_name": dev.iv_name,
11483 "dev_type": dev.dev_type,
11484 "logical_id": dev.logical_id,
11485 "physical_id": dev.physical_id,
11486 "pstatus": dev_pstatus,
11487 "sstatus": dev_sstatus,
11488 "children": dev_children,
11493 def Exec(self, feedback_fn):
11494 """Gather and return data"""
11497 cluster = self.cfg.GetClusterInfo()
11499 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11500 for i in self.wanted_instances)
11501 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11502 if self.op.static or pnode.offline:
11503 remote_state = None
11505 self.LogWarning("Primary node %s is marked offline, returning static"
11506 " information only for instance %s" %
11507 (pnode.name, instance.name))
11509 remote_info = self.rpc.call_instance_info(instance.primary_node,
11511 instance.hypervisor)
11512 remote_info.Raise("Error checking node %s" % instance.primary_node)
11513 remote_info = remote_info.payload
11514 if remote_info and "state" in remote_info:
11515 remote_state = "up"
11517 if instance.admin_state == constants.ADMINST_UP:
11518 remote_state = "down"
11520 remote_state = instance.admin_state
11522 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11525 result[instance.name] = {
11526 "name": instance.name,
11527 "config_state": instance.admin_state,
11528 "run_state": remote_state,
11529 "pnode": instance.primary_node,
11530 "snodes": instance.secondary_nodes,
11532 # this happens to be the same format used for hooks
11533 "nics": _NICListToTuple(self, instance.nics),
11534 "disk_template": instance.disk_template,
11536 "hypervisor": instance.hypervisor,
11537 "network_port": instance.network_port,
11538 "hv_instance": instance.hvparams,
11539 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11540 "be_instance": instance.beparams,
11541 "be_actual": cluster.FillBE(instance),
11542 "os_instance": instance.osparams,
11543 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11544 "serial_no": instance.serial_no,
11545 "mtime": instance.mtime,
11546 "ctime": instance.ctime,
11547 "uuid": instance.uuid,
11553 class LUInstanceSetParams(LogicalUnit):
11554 """Modifies an instances's parameters.
11557 HPATH = "instance-modify"
11558 HTYPE = constants.HTYPE_INSTANCE
11561 def CheckArguments(self):
11562 if not (self.op.nics or self.op.disks or self.op.disk_template or
11563 self.op.hvparams or self.op.beparams or self.op.os_name or
11564 self.op.online_inst or self.op.offline_inst):
11565 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11567 if self.op.hvparams:
11568 _CheckGlobalHvParams(self.op.hvparams)
11572 for disk_op, disk_dict in self.op.disks:
11573 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11574 if disk_op == constants.DDM_REMOVE:
11575 disk_addremove += 1
11577 elif disk_op == constants.DDM_ADD:
11578 disk_addremove += 1
11580 if not isinstance(disk_op, int):
11581 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11582 if not isinstance(disk_dict, dict):
11583 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11584 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11586 if disk_op == constants.DDM_ADD:
11587 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11588 if mode not in constants.DISK_ACCESS_SET:
11589 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11590 errors.ECODE_INVAL)
11591 size = disk_dict.get(constants.IDISK_SIZE, None)
11593 raise errors.OpPrereqError("Required disk parameter size missing",
11594 errors.ECODE_INVAL)
11597 except (TypeError, ValueError), err:
11598 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11599 str(err), errors.ECODE_INVAL)
11600 disk_dict[constants.IDISK_SIZE] = size
11602 # modification of disk
11603 if constants.IDISK_SIZE in disk_dict:
11604 raise errors.OpPrereqError("Disk size change not possible, use"
11605 " grow-disk", errors.ECODE_INVAL)
11607 if disk_addremove > 1:
11608 raise errors.OpPrereqError("Only one disk add or remove operation"
11609 " supported at a time", errors.ECODE_INVAL)
11611 if self.op.disks and self.op.disk_template is not None:
11612 raise errors.OpPrereqError("Disk template conversion and other disk"
11613 " changes not supported at the same time",
11614 errors.ECODE_INVAL)
11616 if (self.op.disk_template and
11617 self.op.disk_template in constants.DTS_INT_MIRROR and
11618 self.op.remote_node is None):
11619 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11620 " one requires specifying a secondary node",
11621 errors.ECODE_INVAL)
11625 for nic_op, nic_dict in self.op.nics:
11626 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11627 if nic_op == constants.DDM_REMOVE:
11630 elif nic_op == constants.DDM_ADD:
11633 if not isinstance(nic_op, int):
11634 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11635 if not isinstance(nic_dict, dict):
11636 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11637 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11639 # nic_dict should be a dict
11640 nic_ip = nic_dict.get(constants.INIC_IP, None)
11641 if nic_ip is not None:
11642 if nic_ip.lower() == constants.VALUE_NONE:
11643 nic_dict[constants.INIC_IP] = None
11645 if not netutils.IPAddress.IsValid(nic_ip):
11646 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11647 errors.ECODE_INVAL)
11649 nic_bridge = nic_dict.get("bridge", None)
11650 nic_link = nic_dict.get(constants.INIC_LINK, None)
11651 if nic_bridge and nic_link:
11652 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11653 " at the same time", errors.ECODE_INVAL)
11654 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11655 nic_dict["bridge"] = None
11656 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11657 nic_dict[constants.INIC_LINK] = None
11659 if nic_op == constants.DDM_ADD:
11660 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11661 if nic_mac is None:
11662 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11664 if constants.INIC_MAC in nic_dict:
11665 nic_mac = nic_dict[constants.INIC_MAC]
11666 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11667 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11669 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11670 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11671 " modifying an existing nic",
11672 errors.ECODE_INVAL)
11674 if nic_addremove > 1:
11675 raise errors.OpPrereqError("Only one NIC add or remove operation"
11676 " supported at a time", errors.ECODE_INVAL)
11678 def ExpandNames(self):
11679 self._ExpandAndLockInstance()
11680 # Can't even acquire node locks in shared mode as upcoming changes in
11681 # Ganeti 2.6 will start to modify the node object on disk conversion
11682 self.needed_locks[locking.LEVEL_NODE] = []
11683 self.needed_locks[locking.LEVEL_NODE_RES] = []
11684 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11686 def DeclareLocks(self, level):
11687 if level == locking.LEVEL_NODE:
11688 self._LockInstancesNodes()
11689 if self.op.disk_template and self.op.remote_node:
11690 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11691 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11692 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11694 self.needed_locks[locking.LEVEL_NODE_RES] = \
11695 self.needed_locks[locking.LEVEL_NODE][:]
11697 def BuildHooksEnv(self):
11698 """Build hooks env.
11700 This runs on the master, primary and secondaries.
11704 if constants.BE_MINMEM in self.be_new:
11705 args["minmem"] = self.be_new[constants.BE_MINMEM]
11706 if constants.BE_MAXMEM in self.be_new:
11707 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11708 if constants.BE_VCPUS in self.be_new:
11709 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11710 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11711 # information at all.
11714 nic_override = dict(self.op.nics)
11715 for idx, nic in enumerate(self.instance.nics):
11716 if idx in nic_override:
11717 this_nic_override = nic_override[idx]
11719 this_nic_override = {}
11720 if constants.INIC_IP in this_nic_override:
11721 ip = this_nic_override[constants.INIC_IP]
11724 if constants.INIC_MAC in this_nic_override:
11725 mac = this_nic_override[constants.INIC_MAC]
11728 if idx in self.nic_pnew:
11729 nicparams = self.nic_pnew[idx]
11731 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11732 mode = nicparams[constants.NIC_MODE]
11733 link = nicparams[constants.NIC_LINK]
11734 args["nics"].append((ip, mac, mode, link))
11735 if constants.DDM_ADD in nic_override:
11736 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11737 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11738 nicparams = self.nic_pnew[constants.DDM_ADD]
11739 mode = nicparams[constants.NIC_MODE]
11740 link = nicparams[constants.NIC_LINK]
11741 args["nics"].append((ip, mac, mode, link))
11742 elif constants.DDM_REMOVE in nic_override:
11743 del args["nics"][-1]
11745 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11746 if self.op.disk_template:
11747 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11751 def BuildHooksNodes(self):
11752 """Build hooks nodes.
11755 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11758 def CheckPrereq(self):
11759 """Check prerequisites.
11761 This only checks the instance list against the existing names.
11764 # checking the new params on the primary/secondary nodes
11766 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11767 cluster = self.cluster = self.cfg.GetClusterInfo()
11768 assert self.instance is not None, \
11769 "Cannot retrieve locked instance %s" % self.op.instance_name
11770 pnode = instance.primary_node
11771 nodelist = list(instance.all_nodes)
11772 pnode_info = self.cfg.GetNodeInfo(pnode)
11773 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11776 if self.op.os_name and not self.op.force:
11777 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11778 self.op.force_variant)
11779 instance_os = self.op.os_name
11781 instance_os = instance.os
11783 if self.op.disk_template:
11784 if instance.disk_template == self.op.disk_template:
11785 raise errors.OpPrereqError("Instance already has disk template %s" %
11786 instance.disk_template, errors.ECODE_INVAL)
11788 if (instance.disk_template,
11789 self.op.disk_template) not in self._DISK_CONVERSIONS:
11790 raise errors.OpPrereqError("Unsupported disk template conversion from"
11791 " %s to %s" % (instance.disk_template,
11792 self.op.disk_template),
11793 errors.ECODE_INVAL)
11794 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11795 msg="cannot change disk template")
11796 if self.op.disk_template in constants.DTS_INT_MIRROR:
11797 if self.op.remote_node == pnode:
11798 raise errors.OpPrereqError("Given new secondary node %s is the same"
11799 " as the primary node of the instance" %
11800 self.op.remote_node, errors.ECODE_STATE)
11801 _CheckNodeOnline(self, self.op.remote_node)
11802 _CheckNodeNotDrained(self, self.op.remote_node)
11803 # FIXME: here we assume that the old instance type is DT_PLAIN
11804 assert instance.disk_template == constants.DT_PLAIN
11805 disks = [{constants.IDISK_SIZE: d.size,
11806 constants.IDISK_VG: d.logical_id[0]}
11807 for d in instance.disks]
11808 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11809 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11811 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11812 ipolicy = _CalculateGroupIPolicy(cluster, snode_info.group)
11813 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11814 ignore=self.op.ignore_ipolicy)
11815 if pnode_info.group != snode_info.group:
11816 self.LogWarning("The primary and secondary nodes are in two"
11817 " different node groups; the disk parameters"
11818 " from the first disk's node group will be"
11821 # hvparams processing
11822 if self.op.hvparams:
11823 hv_type = instance.hypervisor
11824 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11825 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11826 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11829 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11830 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11831 self.hv_proposed = self.hv_new = hv_new # the new actual values
11832 self.hv_inst = i_hvdict # the new dict (without defaults)
11834 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11836 self.hv_new = self.hv_inst = {}
11838 # beparams processing
11839 if self.op.beparams:
11840 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11842 objects.UpgradeBeParams(i_bedict)
11843 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11844 be_new = cluster.SimpleFillBE(i_bedict)
11845 self.be_proposed = self.be_new = be_new # the new actual values
11846 self.be_inst = i_bedict # the new dict (without defaults)
11848 self.be_new = self.be_inst = {}
11849 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11850 be_old = cluster.FillBE(instance)
11852 # CPU param validation -- checking every time a paramtere is
11853 # changed to cover all cases where either CPU mask or vcpus have
11855 if (constants.BE_VCPUS in self.be_proposed and
11856 constants.HV_CPU_MASK in self.hv_proposed):
11858 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11859 # Verify mask is consistent with number of vCPUs. Can skip this
11860 # test if only 1 entry in the CPU mask, which means same mask
11861 # is applied to all vCPUs.
11862 if (len(cpu_list) > 1 and
11863 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11864 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11866 (self.be_proposed[constants.BE_VCPUS],
11867 self.hv_proposed[constants.HV_CPU_MASK]),
11868 errors.ECODE_INVAL)
11870 # Only perform this test if a new CPU mask is given
11871 if constants.HV_CPU_MASK in self.hv_new:
11872 # Calculate the largest CPU number requested
11873 max_requested_cpu = max(map(max, cpu_list))
11874 # Check that all of the instance's nodes have enough physical CPUs to
11875 # satisfy the requested CPU mask
11876 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11877 max_requested_cpu + 1, instance.hypervisor)
11879 # osparams processing
11880 if self.op.osparams:
11881 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11882 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11883 self.os_inst = i_osdict # the new dict (without defaults)
11889 #TODO(dynmem): do the appropriate check involving MINMEM
11890 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11891 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11892 mem_check_list = [pnode]
11893 if be_new[constants.BE_AUTO_BALANCE]:
11894 # either we changed auto_balance to yes or it was from before
11895 mem_check_list.extend(instance.secondary_nodes)
11896 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11897 instance.hypervisor)
11898 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11899 [instance.hypervisor])
11900 pninfo = nodeinfo[pnode]
11901 msg = pninfo.fail_msg
11903 # Assume the primary node is unreachable and go ahead
11904 self.warn.append("Can't get info from primary node %s: %s" %
11907 (_, _, (pnhvinfo, )) = pninfo.payload
11908 if not isinstance(pnhvinfo.get("memory_free", None), int):
11909 self.warn.append("Node data from primary node %s doesn't contain"
11910 " free memory information" % pnode)
11911 elif instance_info.fail_msg:
11912 self.warn.append("Can't get instance runtime information: %s" %
11913 instance_info.fail_msg)
11915 if instance_info.payload:
11916 current_mem = int(instance_info.payload["memory"])
11918 # Assume instance not running
11919 # (there is a slight race condition here, but it's not very
11920 # probable, and we have no other way to check)
11921 # TODO: Describe race condition
11923 #TODO(dynmem): do the appropriate check involving MINMEM
11924 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11925 pnhvinfo["memory_free"])
11927 raise errors.OpPrereqError("This change will prevent the instance"
11928 " from starting, due to %d MB of memory"
11929 " missing on its primary node" %
11931 errors.ECODE_NORES)
11933 if be_new[constants.BE_AUTO_BALANCE]:
11934 for node, nres in nodeinfo.items():
11935 if node not in instance.secondary_nodes:
11937 nres.Raise("Can't get info from secondary node %s" % node,
11938 prereq=True, ecode=errors.ECODE_STATE)
11939 (_, _, (nhvinfo, )) = nres.payload
11940 if not isinstance(nhvinfo.get("memory_free", None), int):
11941 raise errors.OpPrereqError("Secondary node %s didn't return free"
11942 " memory information" % node,
11943 errors.ECODE_STATE)
11944 #TODO(dynmem): do the appropriate check involving MINMEM
11945 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11946 raise errors.OpPrereqError("This change will prevent the instance"
11947 " from failover to its secondary node"
11948 " %s, due to not enough memory" % node,
11949 errors.ECODE_STATE)
11953 self.nic_pinst = {}
11954 for nic_op, nic_dict in self.op.nics:
11955 if nic_op == constants.DDM_REMOVE:
11956 if not instance.nics:
11957 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11958 errors.ECODE_INVAL)
11960 if nic_op != constants.DDM_ADD:
11962 if not instance.nics:
11963 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11964 " no NICs" % nic_op,
11965 errors.ECODE_INVAL)
11966 if nic_op < 0 or nic_op >= len(instance.nics):
11967 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11969 (nic_op, len(instance.nics) - 1),
11970 errors.ECODE_INVAL)
11971 old_nic_params = instance.nics[nic_op].nicparams
11972 old_nic_ip = instance.nics[nic_op].ip
11974 old_nic_params = {}
11977 update_params_dict = dict([(key, nic_dict[key])
11978 for key in constants.NICS_PARAMETERS
11979 if key in nic_dict])
11981 if "bridge" in nic_dict:
11982 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11984 new_nic_params = _GetUpdatedParams(old_nic_params,
11985 update_params_dict)
11986 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11987 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11988 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11989 self.nic_pinst[nic_op] = new_nic_params
11990 self.nic_pnew[nic_op] = new_filled_nic_params
11991 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11993 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11994 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11995 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11997 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11999 self.warn.append(msg)
12001 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12002 if new_nic_mode == constants.NIC_MODE_ROUTED:
12003 if constants.INIC_IP in nic_dict:
12004 nic_ip = nic_dict[constants.INIC_IP]
12006 nic_ip = old_nic_ip
12008 raise errors.OpPrereqError("Cannot set the nic ip to None"
12009 " on a routed nic", errors.ECODE_INVAL)
12010 if constants.INIC_MAC in nic_dict:
12011 nic_mac = nic_dict[constants.INIC_MAC]
12012 if nic_mac is None:
12013 raise errors.OpPrereqError("Cannot set the nic mac to None",
12014 errors.ECODE_INVAL)
12015 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12016 # otherwise generate the mac
12017 nic_dict[constants.INIC_MAC] = \
12018 self.cfg.GenerateMAC(self.proc.GetECId())
12020 # or validate/reserve the current one
12022 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12023 except errors.ReservationError:
12024 raise errors.OpPrereqError("MAC address %s already in use"
12025 " in cluster" % nic_mac,
12026 errors.ECODE_NOTUNIQUE)
12029 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12030 raise errors.OpPrereqError("Disk operations not supported for"
12031 " diskless instances",
12032 errors.ECODE_INVAL)
12033 for disk_op, _ in self.op.disks:
12034 if disk_op == constants.DDM_REMOVE:
12035 if len(instance.disks) == 1:
12036 raise errors.OpPrereqError("Cannot remove the last disk of"
12037 " an instance", errors.ECODE_INVAL)
12038 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12039 msg="cannot remove disks")
12041 if (disk_op == constants.DDM_ADD and
12042 len(instance.disks) >= constants.MAX_DISKS):
12043 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12044 " add more" % constants.MAX_DISKS,
12045 errors.ECODE_STATE)
12046 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12048 if disk_op < 0 or disk_op >= len(instance.disks):
12049 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12051 (disk_op, len(instance.disks)),
12052 errors.ECODE_INVAL)
12054 # disabling the instance
12055 if self.op.offline_inst:
12056 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12057 msg="cannot change instance state to offline")
12059 # enabling the instance
12060 if self.op.online_inst:
12061 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12062 msg="cannot make instance go online")
12064 def _ConvertPlainToDrbd(self, feedback_fn):
12065 """Converts an instance from plain to drbd.
12068 feedback_fn("Converting template to drbd")
12069 instance = self.instance
12070 pnode = instance.primary_node
12071 snode = self.op.remote_node
12073 assert instance.disk_template == constants.DT_PLAIN
12075 # create a fake disk info for _GenerateDiskTemplate
12076 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12077 constants.IDISK_VG: d.logical_id[0]}
12078 for d in instance.disks]
12079 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12080 instance.name, pnode, [snode],
12081 disk_info, None, None, 0, feedback_fn,
12083 info = _GetInstanceInfoText(instance)
12084 feedback_fn("Creating aditional volumes...")
12085 # first, create the missing data and meta devices
12086 for disk in new_disks:
12087 # unfortunately this is... not too nice
12088 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12090 for child in disk.children:
12091 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12092 # at this stage, all new LVs have been created, we can rename the
12094 feedback_fn("Renaming original volumes...")
12095 rename_list = [(o, n.children[0].logical_id)
12096 for (o, n) in zip(instance.disks, new_disks)]
12097 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12098 result.Raise("Failed to rename original LVs")
12100 feedback_fn("Initializing DRBD devices...")
12101 # all child devices are in place, we can now create the DRBD devices
12102 for disk in new_disks:
12103 for node in [pnode, snode]:
12104 f_create = node == pnode
12105 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12107 # at this point, the instance has been modified
12108 instance.disk_template = constants.DT_DRBD8
12109 instance.disks = new_disks
12110 self.cfg.Update(instance, feedback_fn)
12112 # Release node locks while waiting for sync
12113 _ReleaseLocks(self, locking.LEVEL_NODE)
12115 # disks are created, waiting for sync
12116 disk_abort = not _WaitForSync(self, instance,
12117 oneshot=not self.op.wait_for_sync)
12119 raise errors.OpExecError("There are some degraded disks for"
12120 " this instance, please cleanup manually")
12122 # Node resource locks will be released by caller
12124 def _ConvertDrbdToPlain(self, feedback_fn):
12125 """Converts an instance from drbd to plain.
12128 instance = self.instance
12130 assert len(instance.secondary_nodes) == 1
12131 assert instance.disk_template == constants.DT_DRBD8
12133 pnode = instance.primary_node
12134 snode = instance.secondary_nodes[0]
12135 feedback_fn("Converting template to plain")
12137 old_disks = instance.disks
12138 new_disks = [d.children[0] for d in old_disks]
12140 # copy over size and mode
12141 for parent, child in zip(old_disks, new_disks):
12142 child.size = parent.size
12143 child.mode = parent.mode
12145 # update instance structure
12146 instance.disks = new_disks
12147 instance.disk_template = constants.DT_PLAIN
12148 self.cfg.Update(instance, feedback_fn)
12150 # Release locks in case removing disks takes a while
12151 _ReleaseLocks(self, locking.LEVEL_NODE)
12153 feedback_fn("Removing volumes on the secondary node...")
12154 for disk in old_disks:
12155 self.cfg.SetDiskID(disk, snode)
12156 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12158 self.LogWarning("Could not remove block device %s on node %s,"
12159 " continuing anyway: %s", disk.iv_name, snode, msg)
12161 feedback_fn("Removing unneeded volumes on the primary node...")
12162 for idx, disk in enumerate(old_disks):
12163 meta = disk.children[1]
12164 self.cfg.SetDiskID(meta, pnode)
12165 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12167 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12168 " continuing anyway: %s", idx, pnode, msg)
12170 # this is a DRBD disk, return its port to the pool
12171 for disk in old_disks:
12172 tcp_port = disk.logical_id[2]
12173 self.cfg.AddTcpUdpPort(tcp_port)
12175 # Node resource locks will be released by caller
12177 def Exec(self, feedback_fn):
12178 """Modifies an instance.
12180 All parameters take effect only at the next restart of the instance.
12183 # Process here the warnings from CheckPrereq, as we don't have a
12184 # feedback_fn there.
12185 for warn in self.warn:
12186 feedback_fn("WARNING: %s" % warn)
12188 assert ((self.op.disk_template is None) ^
12189 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12190 "Not owning any node resource locks"
12193 instance = self.instance
12195 for disk_op, disk_dict in self.op.disks:
12196 if disk_op == constants.DDM_REMOVE:
12197 # remove the last disk
12198 device = instance.disks.pop()
12199 device_idx = len(instance.disks)
12200 for node, disk in device.ComputeNodeTree(instance.primary_node):
12201 self.cfg.SetDiskID(disk, node)
12202 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12204 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12205 " continuing anyway", device_idx, node, msg)
12206 result.append(("disk/%d" % device_idx, "remove"))
12208 # if this is a DRBD disk, return its port to the pool
12209 if device.dev_type in constants.LDS_DRBD:
12210 tcp_port = device.logical_id[2]
12211 self.cfg.AddTcpUdpPort(tcp_port)
12212 elif disk_op == constants.DDM_ADD:
12214 if instance.disk_template in (constants.DT_FILE,
12215 constants.DT_SHARED_FILE):
12216 file_driver, file_path = instance.disks[0].logical_id
12217 file_path = os.path.dirname(file_path)
12219 file_driver = file_path = None
12220 disk_idx_base = len(instance.disks)
12221 new_disk = _GenerateDiskTemplate(self,
12222 instance.disk_template,
12223 instance.name, instance.primary_node,
12224 instance.secondary_nodes,
12230 self.diskparams)[0]
12231 instance.disks.append(new_disk)
12232 info = _GetInstanceInfoText(instance)
12234 logging.info("Creating volume %s for instance %s",
12235 new_disk.iv_name, instance.name)
12236 # Note: this needs to be kept in sync with _CreateDisks
12238 for node in instance.all_nodes:
12239 f_create = node == instance.primary_node
12241 _CreateBlockDev(self, node, instance, new_disk,
12242 f_create, info, f_create)
12243 except errors.OpExecError, err:
12244 self.LogWarning("Failed to create volume %s (%s) on"
12246 new_disk.iv_name, new_disk, node, err)
12247 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12248 (new_disk.size, new_disk.mode)))
12250 # change a given disk
12251 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12252 result.append(("disk.mode/%d" % disk_op,
12253 disk_dict[constants.IDISK_MODE]))
12255 if self.op.disk_template:
12257 check_nodes = set(instance.all_nodes)
12258 if self.op.remote_node:
12259 check_nodes.add(self.op.remote_node)
12260 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12261 owned = self.owned_locks(level)
12262 assert not (check_nodes - owned), \
12263 ("Not owning the correct locks, owning %r, expected at least %r" %
12264 (owned, check_nodes))
12266 r_shut = _ShutdownInstanceDisks(self, instance)
12268 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12269 " proceed with disk template conversion")
12270 mode = (instance.disk_template, self.op.disk_template)
12272 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12274 self.cfg.ReleaseDRBDMinors(instance.name)
12276 result.append(("disk_template", self.op.disk_template))
12278 assert instance.disk_template == self.op.disk_template, \
12279 ("Expected disk template '%s', found '%s'" %
12280 (self.op.disk_template, instance.disk_template))
12282 # Release node and resource locks if there are any (they might already have
12283 # been released during disk conversion)
12284 _ReleaseLocks(self, locking.LEVEL_NODE)
12285 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12288 for nic_op, nic_dict in self.op.nics:
12289 if nic_op == constants.DDM_REMOVE:
12290 # remove the last nic
12291 del instance.nics[-1]
12292 result.append(("nic.%d" % len(instance.nics), "remove"))
12293 elif nic_op == constants.DDM_ADD:
12294 # mac and bridge should be set, by now
12295 mac = nic_dict[constants.INIC_MAC]
12296 ip = nic_dict.get(constants.INIC_IP, None)
12297 nicparams = self.nic_pinst[constants.DDM_ADD]
12298 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12299 instance.nics.append(new_nic)
12300 result.append(("nic.%d" % (len(instance.nics) - 1),
12301 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12302 (new_nic.mac, new_nic.ip,
12303 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12304 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12307 for key in (constants.INIC_MAC, constants.INIC_IP):
12308 if key in nic_dict:
12309 setattr(instance.nics[nic_op], key, nic_dict[key])
12310 if nic_op in self.nic_pinst:
12311 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12312 for key, val in nic_dict.iteritems():
12313 result.append(("nic.%s/%d" % (key, nic_op), val))
12316 if self.op.hvparams:
12317 instance.hvparams = self.hv_inst
12318 for key, val in self.op.hvparams.iteritems():
12319 result.append(("hv/%s" % key, val))
12322 if self.op.beparams:
12323 instance.beparams = self.be_inst
12324 for key, val in self.op.beparams.iteritems():
12325 result.append(("be/%s" % key, val))
12328 if self.op.os_name:
12329 instance.os = self.op.os_name
12332 if self.op.osparams:
12333 instance.osparams = self.os_inst
12334 for key, val in self.op.osparams.iteritems():
12335 result.append(("os/%s" % key, val))
12337 # online/offline instance
12338 if self.op.online_inst:
12339 self.cfg.MarkInstanceDown(instance.name)
12340 result.append(("admin_state", constants.ADMINST_DOWN))
12341 if self.op.offline_inst:
12342 self.cfg.MarkInstanceOffline(instance.name)
12343 result.append(("admin_state", constants.ADMINST_OFFLINE))
12345 self.cfg.Update(instance, feedback_fn)
12347 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12348 self.owned_locks(locking.LEVEL_NODE)), \
12349 "All node locks should have been released by now"
12353 _DISK_CONVERSIONS = {
12354 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12355 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12359 class LUInstanceChangeGroup(LogicalUnit):
12360 HPATH = "instance-change-group"
12361 HTYPE = constants.HTYPE_INSTANCE
12364 def ExpandNames(self):
12365 self.share_locks = _ShareAll()
12366 self.needed_locks = {
12367 locking.LEVEL_NODEGROUP: [],
12368 locking.LEVEL_NODE: [],
12371 self._ExpandAndLockInstance()
12373 if self.op.target_groups:
12374 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12375 self.op.target_groups)
12377 self.req_target_uuids = None
12379 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12381 def DeclareLocks(self, level):
12382 if level == locking.LEVEL_NODEGROUP:
12383 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12385 if self.req_target_uuids:
12386 lock_groups = set(self.req_target_uuids)
12388 # Lock all groups used by instance optimistically; this requires going
12389 # via the node before it's locked, requiring verification later on
12390 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12391 lock_groups.update(instance_groups)
12393 # No target groups, need to lock all of them
12394 lock_groups = locking.ALL_SET
12396 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12398 elif level == locking.LEVEL_NODE:
12399 if self.req_target_uuids:
12400 # Lock all nodes used by instances
12401 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12402 self._LockInstancesNodes()
12404 # Lock all nodes in all potential target groups
12405 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12406 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12407 member_nodes = [node_name
12408 for group in lock_groups
12409 for node_name in self.cfg.GetNodeGroup(group).members]
12410 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12412 # Lock all nodes as all groups are potential targets
12413 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12415 def CheckPrereq(self):
12416 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12417 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12418 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12420 assert (self.req_target_uuids is None or
12421 owned_groups.issuperset(self.req_target_uuids))
12422 assert owned_instances == set([self.op.instance_name])
12424 # Get instance information
12425 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12427 # Check if node groups for locked instance are still correct
12428 assert owned_nodes.issuperset(self.instance.all_nodes), \
12429 ("Instance %s's nodes changed while we kept the lock" %
12430 self.op.instance_name)
12432 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12435 if self.req_target_uuids:
12436 # User requested specific target groups
12437 self.target_uuids = self.req_target_uuids
12439 # All groups except those used by the instance are potential targets
12440 self.target_uuids = owned_groups - inst_groups
12442 conflicting_groups = self.target_uuids & inst_groups
12443 if conflicting_groups:
12444 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12445 " used by the instance '%s'" %
12446 (utils.CommaJoin(conflicting_groups),
12447 self.op.instance_name),
12448 errors.ECODE_INVAL)
12450 if not self.target_uuids:
12451 raise errors.OpPrereqError("There are no possible target groups",
12452 errors.ECODE_INVAL)
12454 def BuildHooksEnv(self):
12455 """Build hooks env.
12458 assert self.target_uuids
12461 "TARGET_GROUPS": " ".join(self.target_uuids),
12464 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12468 def BuildHooksNodes(self):
12469 """Build hooks nodes.
12472 mn = self.cfg.GetMasterNode()
12473 return ([mn], [mn])
12475 def Exec(self, feedback_fn):
12476 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12478 assert instances == [self.op.instance_name], "Instance not locked"
12480 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12481 instances=instances, target_groups=list(self.target_uuids))
12483 ial.Run(self.op.iallocator)
12485 if not ial.success:
12486 raise errors.OpPrereqError("Can't compute solution for changing group of"
12487 " instance '%s' using iallocator '%s': %s" %
12488 (self.op.instance_name, self.op.iallocator,
12490 errors.ECODE_NORES)
12492 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12494 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12495 " instance '%s'", len(jobs), self.op.instance_name)
12497 return ResultWithJobs(jobs)
12500 class LUBackupQuery(NoHooksLU):
12501 """Query the exports list
12506 def ExpandNames(self):
12507 self.needed_locks = {}
12508 self.share_locks[locking.LEVEL_NODE] = 1
12509 if not self.op.nodes:
12510 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12512 self.needed_locks[locking.LEVEL_NODE] = \
12513 _GetWantedNodes(self, self.op.nodes)
12515 def Exec(self, feedback_fn):
12516 """Compute the list of all the exported system images.
12519 @return: a dictionary with the structure node->(export-list)
12520 where export-list is a list of the instances exported on
12524 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12525 rpcresult = self.rpc.call_export_list(self.nodes)
12527 for node in rpcresult:
12528 if rpcresult[node].fail_msg:
12529 result[node] = False
12531 result[node] = rpcresult[node].payload
12536 class LUBackupPrepare(NoHooksLU):
12537 """Prepares an instance for an export and returns useful information.
12542 def ExpandNames(self):
12543 self._ExpandAndLockInstance()
12545 def CheckPrereq(self):
12546 """Check prerequisites.
12549 instance_name = self.op.instance_name
12551 self.instance = self.cfg.GetInstanceInfo(instance_name)
12552 assert self.instance is not None, \
12553 "Cannot retrieve locked instance %s" % self.op.instance_name
12554 _CheckNodeOnline(self, self.instance.primary_node)
12556 self._cds = _GetClusterDomainSecret()
12558 def Exec(self, feedback_fn):
12559 """Prepares an instance for an export.
12562 instance = self.instance
12564 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12565 salt = utils.GenerateSecret(8)
12567 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12568 result = self.rpc.call_x509_cert_create(instance.primary_node,
12569 constants.RIE_CERT_VALIDITY)
12570 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12572 (name, cert_pem) = result.payload
12574 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12578 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12579 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12581 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12587 class LUBackupExport(LogicalUnit):
12588 """Export an instance to an image in the cluster.
12591 HPATH = "instance-export"
12592 HTYPE = constants.HTYPE_INSTANCE
12595 def CheckArguments(self):
12596 """Check the arguments.
12599 self.x509_key_name = self.op.x509_key_name
12600 self.dest_x509_ca_pem = self.op.destination_x509_ca
12602 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12603 if not self.x509_key_name:
12604 raise errors.OpPrereqError("Missing X509 key name for encryption",
12605 errors.ECODE_INVAL)
12607 if not self.dest_x509_ca_pem:
12608 raise errors.OpPrereqError("Missing destination X509 CA",
12609 errors.ECODE_INVAL)
12611 def ExpandNames(self):
12612 self._ExpandAndLockInstance()
12614 # Lock all nodes for local exports
12615 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12616 # FIXME: lock only instance primary and destination node
12618 # Sad but true, for now we have do lock all nodes, as we don't know where
12619 # the previous export might be, and in this LU we search for it and
12620 # remove it from its current node. In the future we could fix this by:
12621 # - making a tasklet to search (share-lock all), then create the
12622 # new one, then one to remove, after
12623 # - removing the removal operation altogether
12624 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12626 def DeclareLocks(self, level):
12627 """Last minute lock declaration."""
12628 # All nodes are locked anyway, so nothing to do here.
12630 def BuildHooksEnv(self):
12631 """Build hooks env.
12633 This will run on the master, primary node and target node.
12637 "EXPORT_MODE": self.op.mode,
12638 "EXPORT_NODE": self.op.target_node,
12639 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12640 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12641 # TODO: Generic function for boolean env variables
12642 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12645 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12649 def BuildHooksNodes(self):
12650 """Build hooks nodes.
12653 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12655 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12656 nl.append(self.op.target_node)
12660 def CheckPrereq(self):
12661 """Check prerequisites.
12663 This checks that the instance and node names are valid.
12666 instance_name = self.op.instance_name
12668 self.instance = self.cfg.GetInstanceInfo(instance_name)
12669 assert self.instance is not None, \
12670 "Cannot retrieve locked instance %s" % self.op.instance_name
12671 _CheckNodeOnline(self, self.instance.primary_node)
12673 if (self.op.remove_instance and
12674 self.instance.admin_state == constants.ADMINST_UP and
12675 not self.op.shutdown):
12676 raise errors.OpPrereqError("Can not remove instance without shutting it"
12679 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12680 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12681 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12682 assert self.dst_node is not None
12684 _CheckNodeOnline(self, self.dst_node.name)
12685 _CheckNodeNotDrained(self, self.dst_node.name)
12688 self.dest_disk_info = None
12689 self.dest_x509_ca = None
12691 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12692 self.dst_node = None
12694 if len(self.op.target_node) != len(self.instance.disks):
12695 raise errors.OpPrereqError(("Received destination information for %s"
12696 " disks, but instance %s has %s disks") %
12697 (len(self.op.target_node), instance_name,
12698 len(self.instance.disks)),
12699 errors.ECODE_INVAL)
12701 cds = _GetClusterDomainSecret()
12703 # Check X509 key name
12705 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12706 except (TypeError, ValueError), err:
12707 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12709 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12710 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12711 errors.ECODE_INVAL)
12713 # Load and verify CA
12715 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12716 except OpenSSL.crypto.Error, err:
12717 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12718 (err, ), errors.ECODE_INVAL)
12720 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12721 if errcode is not None:
12722 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12723 (msg, ), errors.ECODE_INVAL)
12725 self.dest_x509_ca = cert
12727 # Verify target information
12729 for idx, disk_data in enumerate(self.op.target_node):
12731 (host, port, magic) = \
12732 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12733 except errors.GenericError, err:
12734 raise errors.OpPrereqError("Target info for disk %s: %s" %
12735 (idx, err), errors.ECODE_INVAL)
12737 disk_info.append((host, port, magic))
12739 assert len(disk_info) == len(self.op.target_node)
12740 self.dest_disk_info = disk_info
12743 raise errors.ProgrammerError("Unhandled export mode %r" %
12746 # instance disk type verification
12747 # TODO: Implement export support for file-based disks
12748 for disk in self.instance.disks:
12749 if disk.dev_type == constants.LD_FILE:
12750 raise errors.OpPrereqError("Export not supported for instances with"
12751 " file-based disks", errors.ECODE_INVAL)
12753 def _CleanupExports(self, feedback_fn):
12754 """Removes exports of current instance from all other nodes.
12756 If an instance in a cluster with nodes A..D was exported to node C, its
12757 exports will be removed from the nodes A, B and D.
12760 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12762 nodelist = self.cfg.GetNodeList()
12763 nodelist.remove(self.dst_node.name)
12765 # on one-node clusters nodelist will be empty after the removal
12766 # if we proceed the backup would be removed because OpBackupQuery
12767 # substitutes an empty list with the full cluster node list.
12768 iname = self.instance.name
12770 feedback_fn("Removing old exports for instance %s" % iname)
12771 exportlist = self.rpc.call_export_list(nodelist)
12772 for node in exportlist:
12773 if exportlist[node].fail_msg:
12775 if iname in exportlist[node].payload:
12776 msg = self.rpc.call_export_remove(node, iname).fail_msg
12778 self.LogWarning("Could not remove older export for instance %s"
12779 " on node %s: %s", iname, node, msg)
12781 def Exec(self, feedback_fn):
12782 """Export an instance to an image in the cluster.
12785 assert self.op.mode in constants.EXPORT_MODES
12787 instance = self.instance
12788 src_node = instance.primary_node
12790 if self.op.shutdown:
12791 # shutdown the instance, but not the disks
12792 feedback_fn("Shutting down instance %s" % instance.name)
12793 result = self.rpc.call_instance_shutdown(src_node, instance,
12794 self.op.shutdown_timeout)
12795 # TODO: Maybe ignore failures if ignore_remove_failures is set
12796 result.Raise("Could not shutdown instance %s on"
12797 " node %s" % (instance.name, src_node))
12799 # set the disks ID correctly since call_instance_start needs the
12800 # correct drbd minor to create the symlinks
12801 for disk in instance.disks:
12802 self.cfg.SetDiskID(disk, src_node)
12804 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12807 # Activate the instance disks if we'exporting a stopped instance
12808 feedback_fn("Activating disks for %s" % instance.name)
12809 _StartInstanceDisks(self, instance, None)
12812 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12815 helper.CreateSnapshots()
12817 if (self.op.shutdown and
12818 instance.admin_state == constants.ADMINST_UP and
12819 not self.op.remove_instance):
12820 assert not activate_disks
12821 feedback_fn("Starting instance %s" % instance.name)
12822 result = self.rpc.call_instance_start(src_node,
12823 (instance, None, None), False)
12824 msg = result.fail_msg
12826 feedback_fn("Failed to start instance: %s" % msg)
12827 _ShutdownInstanceDisks(self, instance)
12828 raise errors.OpExecError("Could not start instance: %s" % msg)
12830 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12831 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12832 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12833 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12834 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12836 (key_name, _, _) = self.x509_key_name
12839 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12842 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12843 key_name, dest_ca_pem,
12848 # Check for backwards compatibility
12849 assert len(dresults) == len(instance.disks)
12850 assert compat.all(isinstance(i, bool) for i in dresults), \
12851 "Not all results are boolean: %r" % dresults
12855 feedback_fn("Deactivating disks for %s" % instance.name)
12856 _ShutdownInstanceDisks(self, instance)
12858 if not (compat.all(dresults) and fin_resu):
12861 failures.append("export finalization")
12862 if not compat.all(dresults):
12863 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12865 failures.append("disk export: disk(s) %s" % fdsk)
12867 raise errors.OpExecError("Export failed, errors in %s" %
12868 utils.CommaJoin(failures))
12870 # At this point, the export was successful, we can cleanup/finish
12872 # Remove instance if requested
12873 if self.op.remove_instance:
12874 feedback_fn("Removing instance %s" % instance.name)
12875 _RemoveInstance(self, feedback_fn, instance,
12876 self.op.ignore_remove_failures)
12878 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12879 self._CleanupExports(feedback_fn)
12881 return fin_resu, dresults
12884 class LUBackupRemove(NoHooksLU):
12885 """Remove exports related to the named instance.
12890 def ExpandNames(self):
12891 self.needed_locks = {}
12892 # We need all nodes to be locked in order for RemoveExport to work, but we
12893 # don't need to lock the instance itself, as nothing will happen to it (and
12894 # we can remove exports also for a removed instance)
12895 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12897 def Exec(self, feedback_fn):
12898 """Remove any export.
12901 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12902 # If the instance was not found we'll try with the name that was passed in.
12903 # This will only work if it was an FQDN, though.
12905 if not instance_name:
12907 instance_name = self.op.instance_name
12909 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12910 exportlist = self.rpc.call_export_list(locked_nodes)
12912 for node in exportlist:
12913 msg = exportlist[node].fail_msg
12915 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12917 if instance_name in exportlist[node].payload:
12919 result = self.rpc.call_export_remove(node, instance_name)
12920 msg = result.fail_msg
12922 logging.error("Could not remove export for instance %s"
12923 " on node %s: %s", instance_name, node, msg)
12925 if fqdn_warn and not found:
12926 feedback_fn("Export not found. If trying to remove an export belonging"
12927 " to a deleted instance please use its Fully Qualified"
12931 class LUGroupAdd(LogicalUnit):
12932 """Logical unit for creating node groups.
12935 HPATH = "group-add"
12936 HTYPE = constants.HTYPE_GROUP
12939 def ExpandNames(self):
12940 # We need the new group's UUID here so that we can create and acquire the
12941 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12942 # that it should not check whether the UUID exists in the configuration.
12943 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12944 self.needed_locks = {}
12945 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12947 def CheckPrereq(self):
12948 """Check prerequisites.
12950 This checks that the given group name is not an existing node group
12955 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12956 except errors.OpPrereqError:
12959 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12960 " node group (UUID: %s)" %
12961 (self.op.group_name, existing_uuid),
12962 errors.ECODE_EXISTS)
12964 if self.op.ndparams:
12965 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12967 if self.op.hv_state:
12968 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12970 self.new_hv_state = None
12972 if self.op.disk_state:
12973 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12975 self.new_disk_state = None
12977 if self.op.diskparams:
12978 for templ in constants.DISK_TEMPLATES:
12979 if templ not in self.op.diskparams:
12980 self.op.diskparams[templ] = {}
12981 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12983 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12985 if self.op.ipolicy:
12986 cluster = self.cfg.GetClusterInfo()
12987 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12988 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12990 def BuildHooksEnv(self):
12991 """Build hooks env.
12995 "GROUP_NAME": self.op.group_name,
12998 def BuildHooksNodes(self):
12999 """Build hooks nodes.
13002 mn = self.cfg.GetMasterNode()
13003 return ([mn], [mn])
13005 def Exec(self, feedback_fn):
13006 """Add the node group to the cluster.
13009 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13010 uuid=self.group_uuid,
13011 alloc_policy=self.op.alloc_policy,
13012 ndparams=self.op.ndparams,
13013 diskparams=self.op.diskparams,
13014 ipolicy=self.op.ipolicy,
13015 hv_state_static=self.new_hv_state,
13016 disk_state_static=self.new_disk_state)
13018 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13019 del self.remove_locks[locking.LEVEL_NODEGROUP]
13022 class LUGroupAssignNodes(NoHooksLU):
13023 """Logical unit for assigning nodes to groups.
13028 def ExpandNames(self):
13029 # These raise errors.OpPrereqError on their own:
13030 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13031 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13033 # We want to lock all the affected nodes and groups. We have readily
13034 # available the list of nodes, and the *destination* group. To gather the
13035 # list of "source" groups, we need to fetch node information later on.
13036 self.needed_locks = {
13037 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13038 locking.LEVEL_NODE: self.op.nodes,
13041 def DeclareLocks(self, level):
13042 if level == locking.LEVEL_NODEGROUP:
13043 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13045 # Try to get all affected nodes' groups without having the group or node
13046 # lock yet. Needs verification later in the code flow.
13047 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13049 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13051 def CheckPrereq(self):
13052 """Check prerequisites.
13055 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13056 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13057 frozenset(self.op.nodes))
13059 expected_locks = (set([self.group_uuid]) |
13060 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13061 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13062 if actual_locks != expected_locks:
13063 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13064 " current groups are '%s', used to be '%s'" %
13065 (utils.CommaJoin(expected_locks),
13066 utils.CommaJoin(actual_locks)))
13068 self.node_data = self.cfg.GetAllNodesInfo()
13069 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13070 instance_data = self.cfg.GetAllInstancesInfo()
13072 if self.group is None:
13073 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13074 (self.op.group_name, self.group_uuid))
13076 (new_splits, previous_splits) = \
13077 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13078 for node in self.op.nodes],
13079 self.node_data, instance_data)
13082 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13084 if not self.op.force:
13085 raise errors.OpExecError("The following instances get split by this"
13086 " change and --force was not given: %s" %
13089 self.LogWarning("This operation will split the following instances: %s",
13092 if previous_splits:
13093 self.LogWarning("In addition, these already-split instances continue"
13094 " to be split across groups: %s",
13095 utils.CommaJoin(utils.NiceSort(previous_splits)))
13097 def Exec(self, feedback_fn):
13098 """Assign nodes to a new group.
13101 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13103 self.cfg.AssignGroupNodes(mods)
13106 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13107 """Check for split instances after a node assignment.
13109 This method considers a series of node assignments as an atomic operation,
13110 and returns information about split instances after applying the set of
13113 In particular, it returns information about newly split instances, and
13114 instances that were already split, and remain so after the change.
13116 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13119 @type changes: list of (node_name, new_group_uuid) pairs.
13120 @param changes: list of node assignments to consider.
13121 @param node_data: a dict with data for all nodes
13122 @param instance_data: a dict with all instances to consider
13123 @rtype: a two-tuple
13124 @return: a list of instances that were previously okay and result split as a
13125 consequence of this change, and a list of instances that were previously
13126 split and this change does not fix.
13129 changed_nodes = dict((node, group) for node, group in changes
13130 if node_data[node].group != group)
13132 all_split_instances = set()
13133 previously_split_instances = set()
13135 def InstanceNodes(instance):
13136 return [instance.primary_node] + list(instance.secondary_nodes)
13138 for inst in instance_data.values():
13139 if inst.disk_template not in constants.DTS_INT_MIRROR:
13142 instance_nodes = InstanceNodes(inst)
13144 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13145 previously_split_instances.add(inst.name)
13147 if len(set(changed_nodes.get(node, node_data[node].group)
13148 for node in instance_nodes)) > 1:
13149 all_split_instances.add(inst.name)
13151 return (list(all_split_instances - previously_split_instances),
13152 list(previously_split_instances & all_split_instances))
13155 class _GroupQuery(_QueryBase):
13156 FIELDS = query.GROUP_FIELDS
13158 def ExpandNames(self, lu):
13159 lu.needed_locks = {}
13161 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13162 self._cluster = lu.cfg.GetClusterInfo()
13163 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13166 self.wanted = [name_to_uuid[name]
13167 for name in utils.NiceSort(name_to_uuid.keys())]
13169 # Accept names to be either names or UUIDs.
13172 all_uuid = frozenset(self._all_groups.keys())
13174 for name in self.names:
13175 if name in all_uuid:
13176 self.wanted.append(name)
13177 elif name in name_to_uuid:
13178 self.wanted.append(name_to_uuid[name])
13180 missing.append(name)
13183 raise errors.OpPrereqError("Some groups do not exist: %s" %
13184 utils.CommaJoin(missing),
13185 errors.ECODE_NOENT)
13187 def DeclareLocks(self, lu, level):
13190 def _GetQueryData(self, lu):
13191 """Computes the list of node groups and their attributes.
13194 do_nodes = query.GQ_NODE in self.requested_data
13195 do_instances = query.GQ_INST in self.requested_data
13197 group_to_nodes = None
13198 group_to_instances = None
13200 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13201 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13202 # latter GetAllInstancesInfo() is not enough, for we have to go through
13203 # instance->node. Hence, we will need to process nodes even if we only need
13204 # instance information.
13205 if do_nodes or do_instances:
13206 all_nodes = lu.cfg.GetAllNodesInfo()
13207 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13210 for node in all_nodes.values():
13211 if node.group in group_to_nodes:
13212 group_to_nodes[node.group].append(node.name)
13213 node_to_group[node.name] = node.group
13216 all_instances = lu.cfg.GetAllInstancesInfo()
13217 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13219 for instance in all_instances.values():
13220 node = instance.primary_node
13221 if node in node_to_group:
13222 group_to_instances[node_to_group[node]].append(instance.name)
13225 # Do not pass on node information if it was not requested.
13226 group_to_nodes = None
13228 return query.GroupQueryData(self._cluster,
13229 [self._all_groups[uuid]
13230 for uuid in self.wanted],
13231 group_to_nodes, group_to_instances)
13234 class LUGroupQuery(NoHooksLU):
13235 """Logical unit for querying node groups.
13240 def CheckArguments(self):
13241 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13242 self.op.output_fields, False)
13244 def ExpandNames(self):
13245 self.gq.ExpandNames(self)
13247 def DeclareLocks(self, level):
13248 self.gq.DeclareLocks(self, level)
13250 def Exec(self, feedback_fn):
13251 return self.gq.OldStyleQuery(self)
13254 class LUGroupSetParams(LogicalUnit):
13255 """Modifies the parameters of a node group.
13258 HPATH = "group-modify"
13259 HTYPE = constants.HTYPE_GROUP
13262 def CheckArguments(self):
13265 self.op.diskparams,
13266 self.op.alloc_policy,
13268 self.op.disk_state,
13272 if all_changes.count(None) == len(all_changes):
13273 raise errors.OpPrereqError("Please pass at least one modification",
13274 errors.ECODE_INVAL)
13276 def ExpandNames(self):
13277 # This raises errors.OpPrereqError on its own:
13278 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13280 self.needed_locks = {
13281 locking.LEVEL_NODEGROUP: [self.group_uuid],
13284 def CheckPrereq(self):
13285 """Check prerequisites.
13288 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13290 if self.group is None:
13291 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13292 (self.op.group_name, self.group_uuid))
13294 if self.op.ndparams:
13295 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13296 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13297 self.new_ndparams = new_ndparams
13299 if self.op.diskparams:
13300 self.new_diskparams = dict()
13301 for templ in constants.DISK_TEMPLATES:
13302 if templ not in self.op.diskparams:
13303 self.op.diskparams[templ] = {}
13304 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13305 self.op.diskparams[templ])
13306 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13307 self.new_diskparams[templ] = new_templ_params
13309 if self.op.hv_state:
13310 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13311 self.group.hv_state_static)
13313 if self.op.disk_state:
13314 self.new_disk_state = \
13315 _MergeAndVerifyDiskState(self.op.disk_state,
13316 self.group.disk_state_static)
13318 if self.op.ipolicy:
13320 for key, value in self.op.ipolicy.iteritems():
13321 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13324 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13325 self.new_ipolicy = g_ipolicy
13326 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13328 def BuildHooksEnv(self):
13329 """Build hooks env.
13333 "GROUP_NAME": self.op.group_name,
13334 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13337 def BuildHooksNodes(self):
13338 """Build hooks nodes.
13341 mn = self.cfg.GetMasterNode()
13342 return ([mn], [mn])
13344 def Exec(self, feedback_fn):
13345 """Modifies the node group.
13350 if self.op.ndparams:
13351 self.group.ndparams = self.new_ndparams
13352 result.append(("ndparams", str(self.group.ndparams)))
13354 if self.op.diskparams:
13355 self.group.diskparams = self.new_diskparams
13356 result.append(("diskparams", str(self.group.diskparams)))
13358 if self.op.alloc_policy:
13359 self.group.alloc_policy = self.op.alloc_policy
13361 if self.op.hv_state:
13362 self.group.hv_state_static = self.new_hv_state
13364 if self.op.disk_state:
13365 self.group.disk_state_static = self.new_disk_state
13367 if self.op.ipolicy:
13368 self.group.ipolicy = self.new_ipolicy
13370 self.cfg.Update(self.group, feedback_fn)
13374 class LUGroupRemove(LogicalUnit):
13375 HPATH = "group-remove"
13376 HTYPE = constants.HTYPE_GROUP
13379 def ExpandNames(self):
13380 # This will raises errors.OpPrereqError on its own:
13381 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13382 self.needed_locks = {
13383 locking.LEVEL_NODEGROUP: [self.group_uuid],
13386 def CheckPrereq(self):
13387 """Check prerequisites.
13389 This checks that the given group name exists as a node group, that is
13390 empty (i.e., contains no nodes), and that is not the last group of the
13394 # Verify that the group is empty.
13395 group_nodes = [node.name
13396 for node in self.cfg.GetAllNodesInfo().values()
13397 if node.group == self.group_uuid]
13400 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13402 (self.op.group_name,
13403 utils.CommaJoin(utils.NiceSort(group_nodes))),
13404 errors.ECODE_STATE)
13406 # Verify the cluster would not be left group-less.
13407 if len(self.cfg.GetNodeGroupList()) == 1:
13408 raise errors.OpPrereqError("Group '%s' is the only group,"
13409 " cannot be removed" %
13410 self.op.group_name,
13411 errors.ECODE_STATE)
13413 def BuildHooksEnv(self):
13414 """Build hooks env.
13418 "GROUP_NAME": self.op.group_name,
13421 def BuildHooksNodes(self):
13422 """Build hooks nodes.
13425 mn = self.cfg.GetMasterNode()
13426 return ([mn], [mn])
13428 def Exec(self, feedback_fn):
13429 """Remove the node group.
13433 self.cfg.RemoveNodeGroup(self.group_uuid)
13434 except errors.ConfigurationError:
13435 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13436 (self.op.group_name, self.group_uuid))
13438 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13441 class LUGroupRename(LogicalUnit):
13442 HPATH = "group-rename"
13443 HTYPE = constants.HTYPE_GROUP
13446 def ExpandNames(self):
13447 # This raises errors.OpPrereqError on its own:
13448 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13450 self.needed_locks = {
13451 locking.LEVEL_NODEGROUP: [self.group_uuid],
13454 def CheckPrereq(self):
13455 """Check prerequisites.
13457 Ensures requested new name is not yet used.
13461 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13462 except errors.OpPrereqError:
13465 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13466 " node group (UUID: %s)" %
13467 (self.op.new_name, new_name_uuid),
13468 errors.ECODE_EXISTS)
13470 def BuildHooksEnv(self):
13471 """Build hooks env.
13475 "OLD_NAME": self.op.group_name,
13476 "NEW_NAME": self.op.new_name,
13479 def BuildHooksNodes(self):
13480 """Build hooks nodes.
13483 mn = self.cfg.GetMasterNode()
13485 all_nodes = self.cfg.GetAllNodesInfo()
13486 all_nodes.pop(mn, None)
13489 run_nodes.extend(node.name for node in all_nodes.values()
13490 if node.group == self.group_uuid)
13492 return (run_nodes, run_nodes)
13494 def Exec(self, feedback_fn):
13495 """Rename the node group.
13498 group = self.cfg.GetNodeGroup(self.group_uuid)
13501 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13502 (self.op.group_name, self.group_uuid))
13504 group.name = self.op.new_name
13505 self.cfg.Update(group, feedback_fn)
13507 return self.op.new_name
13510 class LUGroupEvacuate(LogicalUnit):
13511 HPATH = "group-evacuate"
13512 HTYPE = constants.HTYPE_GROUP
13515 def ExpandNames(self):
13516 # This raises errors.OpPrereqError on its own:
13517 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13519 if self.op.target_groups:
13520 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13521 self.op.target_groups)
13523 self.req_target_uuids = []
13525 if self.group_uuid in self.req_target_uuids:
13526 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13527 " as a target group (targets are %s)" %
13529 utils.CommaJoin(self.req_target_uuids)),
13530 errors.ECODE_INVAL)
13532 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13534 self.share_locks = _ShareAll()
13535 self.needed_locks = {
13536 locking.LEVEL_INSTANCE: [],
13537 locking.LEVEL_NODEGROUP: [],
13538 locking.LEVEL_NODE: [],
13541 def DeclareLocks(self, level):
13542 if level == locking.LEVEL_INSTANCE:
13543 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13545 # Lock instances optimistically, needs verification once node and group
13546 # locks have been acquired
13547 self.needed_locks[locking.LEVEL_INSTANCE] = \
13548 self.cfg.GetNodeGroupInstances(self.group_uuid)
13550 elif level == locking.LEVEL_NODEGROUP:
13551 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13553 if self.req_target_uuids:
13554 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13556 # Lock all groups used by instances optimistically; this requires going
13557 # via the node before it's locked, requiring verification later on
13558 lock_groups.update(group_uuid
13559 for instance_name in
13560 self.owned_locks(locking.LEVEL_INSTANCE)
13562 self.cfg.GetInstanceNodeGroups(instance_name))
13564 # No target groups, need to lock all of them
13565 lock_groups = locking.ALL_SET
13567 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13569 elif level == locking.LEVEL_NODE:
13570 # This will only lock the nodes in the group to be evacuated which
13571 # contain actual instances
13572 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13573 self._LockInstancesNodes()
13575 # Lock all nodes in group to be evacuated and target groups
13576 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13577 assert self.group_uuid in owned_groups
13578 member_nodes = [node_name
13579 for group in owned_groups
13580 for node_name in self.cfg.GetNodeGroup(group).members]
13581 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13583 def CheckPrereq(self):
13584 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13585 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13586 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13588 assert owned_groups.issuperset(self.req_target_uuids)
13589 assert self.group_uuid in owned_groups
13591 # Check if locked instances are still correct
13592 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13594 # Get instance information
13595 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13597 # Check if node groups for locked instances are still correct
13598 for instance_name in owned_instances:
13599 inst = self.instances[instance_name]
13600 assert owned_nodes.issuperset(inst.all_nodes), \
13601 "Instance %s's nodes changed while we kept the lock" % instance_name
13603 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13606 assert self.group_uuid in inst_groups, \
13607 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13609 if self.req_target_uuids:
13610 # User requested specific target groups
13611 self.target_uuids = self.req_target_uuids
13613 # All groups except the one to be evacuated are potential targets
13614 self.target_uuids = [group_uuid for group_uuid in owned_groups
13615 if group_uuid != self.group_uuid]
13617 if not self.target_uuids:
13618 raise errors.OpPrereqError("There are no possible target groups",
13619 errors.ECODE_INVAL)
13621 def BuildHooksEnv(self):
13622 """Build hooks env.
13626 "GROUP_NAME": self.op.group_name,
13627 "TARGET_GROUPS": " ".join(self.target_uuids),
13630 def BuildHooksNodes(self):
13631 """Build hooks nodes.
13634 mn = self.cfg.GetMasterNode()
13636 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13638 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13640 return (run_nodes, run_nodes)
13642 def Exec(self, feedback_fn):
13643 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13645 assert self.group_uuid not in self.target_uuids
13647 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13648 instances=instances, target_groups=self.target_uuids)
13650 ial.Run(self.op.iallocator)
13652 if not ial.success:
13653 raise errors.OpPrereqError("Can't compute group evacuation using"
13654 " iallocator '%s': %s" %
13655 (self.op.iallocator, ial.info),
13656 errors.ECODE_NORES)
13658 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13660 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13661 len(jobs), self.op.group_name)
13663 return ResultWithJobs(jobs)
13666 class TagsLU(NoHooksLU): # pylint: disable=W0223
13667 """Generic tags LU.
13669 This is an abstract class which is the parent of all the other tags LUs.
13672 def ExpandNames(self):
13673 self.group_uuid = None
13674 self.needed_locks = {}
13675 if self.op.kind == constants.TAG_NODE:
13676 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13677 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13678 elif self.op.kind == constants.TAG_INSTANCE:
13679 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13680 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13681 elif self.op.kind == constants.TAG_NODEGROUP:
13682 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13684 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13685 # not possible to acquire the BGL based on opcode parameters)
13687 def CheckPrereq(self):
13688 """Check prerequisites.
13691 if self.op.kind == constants.TAG_CLUSTER:
13692 self.target = self.cfg.GetClusterInfo()
13693 elif self.op.kind == constants.TAG_NODE:
13694 self.target = self.cfg.GetNodeInfo(self.op.name)
13695 elif self.op.kind == constants.TAG_INSTANCE:
13696 self.target = self.cfg.GetInstanceInfo(self.op.name)
13697 elif self.op.kind == constants.TAG_NODEGROUP:
13698 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13700 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13701 str(self.op.kind), errors.ECODE_INVAL)
13704 class LUTagsGet(TagsLU):
13705 """Returns the tags of a given object.
13710 def ExpandNames(self):
13711 TagsLU.ExpandNames(self)
13713 # Share locks as this is only a read operation
13714 self.share_locks = _ShareAll()
13716 def Exec(self, feedback_fn):
13717 """Returns the tag list.
13720 return list(self.target.GetTags())
13723 class LUTagsSearch(NoHooksLU):
13724 """Searches the tags for a given pattern.
13729 def ExpandNames(self):
13730 self.needed_locks = {}
13732 def CheckPrereq(self):
13733 """Check prerequisites.
13735 This checks the pattern passed for validity by compiling it.
13739 self.re = re.compile(self.op.pattern)
13740 except re.error, err:
13741 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13742 (self.op.pattern, err), errors.ECODE_INVAL)
13744 def Exec(self, feedback_fn):
13745 """Returns the tag list.
13749 tgts = [("/cluster", cfg.GetClusterInfo())]
13750 ilist = cfg.GetAllInstancesInfo().values()
13751 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13752 nlist = cfg.GetAllNodesInfo().values()
13753 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13754 tgts.extend(("/nodegroup/%s" % n.name, n)
13755 for n in cfg.GetAllNodeGroupsInfo().values())
13757 for path, target in tgts:
13758 for tag in target.GetTags():
13759 if self.re.search(tag):
13760 results.append((path, tag))
13764 class LUTagsSet(TagsLU):
13765 """Sets a tag on a given object.
13770 def CheckPrereq(self):
13771 """Check prerequisites.
13773 This checks the type and length of the tag name and value.
13776 TagsLU.CheckPrereq(self)
13777 for tag in self.op.tags:
13778 objects.TaggableObject.ValidateTag(tag)
13780 def Exec(self, feedback_fn):
13785 for tag in self.op.tags:
13786 self.target.AddTag(tag)
13787 except errors.TagError, err:
13788 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13789 self.cfg.Update(self.target, feedback_fn)
13792 class LUTagsDel(TagsLU):
13793 """Delete a list of tags from a given object.
13798 def CheckPrereq(self):
13799 """Check prerequisites.
13801 This checks that we have the given tag.
13804 TagsLU.CheckPrereq(self)
13805 for tag in self.op.tags:
13806 objects.TaggableObject.ValidateTag(tag)
13807 del_tags = frozenset(self.op.tags)
13808 cur_tags = self.target.GetTags()
13810 diff_tags = del_tags - cur_tags
13812 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13813 raise errors.OpPrereqError("Tag(s) %s not found" %
13814 (utils.CommaJoin(diff_names), ),
13815 errors.ECODE_NOENT)
13817 def Exec(self, feedback_fn):
13818 """Remove the tag from the object.
13821 for tag in self.op.tags:
13822 self.target.RemoveTag(tag)
13823 self.cfg.Update(self.target, feedback_fn)
13826 class LUTestDelay(NoHooksLU):
13827 """Sleep for a specified amount of time.
13829 This LU sleeps on the master and/or nodes for a specified amount of
13835 def ExpandNames(self):
13836 """Expand names and set required locks.
13838 This expands the node list, if any.
13841 self.needed_locks = {}
13842 if self.op.on_nodes:
13843 # _GetWantedNodes can be used here, but is not always appropriate to use
13844 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13845 # more information.
13846 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13847 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13849 def _TestDelay(self):
13850 """Do the actual sleep.
13853 if self.op.on_master:
13854 if not utils.TestDelay(self.op.duration):
13855 raise errors.OpExecError("Error during master delay test")
13856 if self.op.on_nodes:
13857 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13858 for node, node_result in result.items():
13859 node_result.Raise("Failure during rpc call to node %s" % node)
13861 def Exec(self, feedback_fn):
13862 """Execute the test delay opcode, with the wanted repetitions.
13865 if self.op.repeat == 0:
13868 top_value = self.op.repeat - 1
13869 for i in range(self.op.repeat):
13870 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13874 class LUTestJqueue(NoHooksLU):
13875 """Utility LU to test some aspects of the job queue.
13880 # Must be lower than default timeout for WaitForJobChange to see whether it
13881 # notices changed jobs
13882 _CLIENT_CONNECT_TIMEOUT = 20.0
13883 _CLIENT_CONFIRM_TIMEOUT = 60.0
13886 def _NotifyUsingSocket(cls, cb, errcls):
13887 """Opens a Unix socket and waits for another program to connect.
13890 @param cb: Callback to send socket name to client
13891 @type errcls: class
13892 @param errcls: Exception class to use for errors
13895 # Using a temporary directory as there's no easy way to create temporary
13896 # sockets without writing a custom loop around tempfile.mktemp and
13898 tmpdir = tempfile.mkdtemp()
13900 tmpsock = utils.PathJoin(tmpdir, "sock")
13902 logging.debug("Creating temporary socket at %s", tmpsock)
13903 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13908 # Send details to client
13911 # Wait for client to connect before continuing
13912 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13914 (conn, _) = sock.accept()
13915 except socket.error, err:
13916 raise errcls("Client didn't connect in time (%s)" % err)
13920 # Remove as soon as client is connected
13921 shutil.rmtree(tmpdir)
13923 # Wait for client to close
13926 # pylint: disable=E1101
13927 # Instance of '_socketobject' has no ... member
13928 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13930 except socket.error, err:
13931 raise errcls("Client failed to confirm notification (%s)" % err)
13935 def _SendNotification(self, test, arg, sockname):
13936 """Sends a notification to the client.
13939 @param test: Test name
13940 @param arg: Test argument (depends on test)
13941 @type sockname: string
13942 @param sockname: Socket path
13945 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13947 def _Notify(self, prereq, test, arg):
13948 """Notifies the client of a test.
13951 @param prereq: Whether this is a prereq-phase test
13953 @param test: Test name
13954 @param arg: Test argument (depends on test)
13958 errcls = errors.OpPrereqError
13960 errcls = errors.OpExecError
13962 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13966 def CheckArguments(self):
13967 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13968 self.expandnames_calls = 0
13970 def ExpandNames(self):
13971 checkargs_calls = getattr(self, "checkargs_calls", 0)
13972 if checkargs_calls < 1:
13973 raise errors.ProgrammerError("CheckArguments was not called")
13975 self.expandnames_calls += 1
13977 if self.op.notify_waitlock:
13978 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13980 self.LogInfo("Expanding names")
13982 # Get lock on master node (just to get a lock, not for a particular reason)
13983 self.needed_locks = {
13984 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13987 def Exec(self, feedback_fn):
13988 if self.expandnames_calls < 1:
13989 raise errors.ProgrammerError("ExpandNames was not called")
13991 if self.op.notify_exec:
13992 self._Notify(False, constants.JQT_EXEC, None)
13994 self.LogInfo("Executing")
13996 if self.op.log_messages:
13997 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13998 for idx, msg in enumerate(self.op.log_messages):
13999 self.LogInfo("Sending log message %s", idx + 1)
14000 feedback_fn(constants.JQT_MSGPREFIX + msg)
14001 # Report how many test messages have been sent
14002 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14005 raise errors.OpExecError("Opcode failure was requested")
14010 class IAllocator(object):
14011 """IAllocator framework.
14013 An IAllocator instance has three sets of attributes:
14014 - cfg that is needed to query the cluster
14015 - input data (all members of the _KEYS class attribute are required)
14016 - four buffer attributes (in|out_data|text), that represent the
14017 input (to the external script) in text and data structure format,
14018 and the output from it, again in two formats
14019 - the result variables from the script (success, info, nodes) for
14023 # pylint: disable=R0902
14024 # lots of instance attributes
14026 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14028 self.rpc = rpc_runner
14029 # init buffer variables
14030 self.in_text = self.out_text = self.in_data = self.out_data = None
14031 # init all input fields so that pylint is happy
14033 self.memory = self.disks = self.disk_template = None
14034 self.os = self.tags = self.nics = self.vcpus = None
14035 self.hypervisor = None
14036 self.relocate_from = None
14038 self.instances = None
14039 self.evac_mode = None
14040 self.target_groups = []
14042 self.required_nodes = None
14043 # init result fields
14044 self.success = self.info = self.result = None
14047 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14049 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14050 " IAllocator" % self.mode)
14052 keyset = [n for (n, _) in keydata]
14055 if key not in keyset:
14056 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14057 " IAllocator" % key)
14058 setattr(self, key, kwargs[key])
14061 if key not in kwargs:
14062 raise errors.ProgrammerError("Missing input parameter '%s' to"
14063 " IAllocator" % key)
14064 self._BuildInputData(compat.partial(fn, self), keydata)
14066 def _ComputeClusterData(self):
14067 """Compute the generic allocator input data.
14069 This is the data that is independent of the actual operation.
14073 cluster_info = cfg.GetClusterInfo()
14076 "version": constants.IALLOCATOR_VERSION,
14077 "cluster_name": cfg.GetClusterName(),
14078 "cluster_tags": list(cluster_info.GetTags()),
14079 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14080 # we don't have job IDs
14082 ninfo = cfg.GetAllNodesInfo()
14083 iinfo = cfg.GetAllInstancesInfo().values()
14084 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14087 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14089 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14090 hypervisor_name = self.hypervisor
14091 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14092 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14094 hypervisor_name = cluster_info.primary_hypervisor
14096 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14099 self.rpc.call_all_instances_info(node_list,
14100 cluster_info.enabled_hypervisors)
14102 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14104 config_ndata = self._ComputeBasicNodeData(ninfo)
14105 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14106 i_list, config_ndata)
14107 assert len(data["nodes"]) == len(ninfo), \
14108 "Incomplete node data computed"
14110 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14112 self.in_data = data
14115 def _ComputeNodeGroupData(cfg):
14116 """Compute node groups data.
14119 ng = dict((guuid, {
14120 "name": gdata.name,
14121 "alloc_policy": gdata.alloc_policy,
14123 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14128 def _ComputeBasicNodeData(node_cfg):
14129 """Compute global node data.
14132 @returns: a dict of name: (node dict, node config)
14135 # fill in static (config-based) values
14136 node_results = dict((ninfo.name, {
14137 "tags": list(ninfo.GetTags()),
14138 "primary_ip": ninfo.primary_ip,
14139 "secondary_ip": ninfo.secondary_ip,
14140 "offline": ninfo.offline,
14141 "drained": ninfo.drained,
14142 "master_candidate": ninfo.master_candidate,
14143 "group": ninfo.group,
14144 "master_capable": ninfo.master_capable,
14145 "vm_capable": ninfo.vm_capable,
14147 for ninfo in node_cfg.values())
14149 return node_results
14152 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14154 """Compute global node data.
14156 @param node_results: the basic node structures as filled from the config
14159 #TODO(dynmem): compute the right data on MAX and MIN memory
14160 # make a copy of the current dict
14161 node_results = dict(node_results)
14162 for nname, nresult in node_data.items():
14163 assert nname in node_results, "Missing basic data for node %s" % nname
14164 ninfo = node_cfg[nname]
14166 if not (ninfo.offline or ninfo.drained):
14167 nresult.Raise("Can't get data for node %s" % nname)
14168 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14170 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14172 for attr in ["memory_total", "memory_free", "memory_dom0",
14173 "vg_size", "vg_free", "cpu_total"]:
14174 if attr not in remote_info:
14175 raise errors.OpExecError("Node '%s' didn't return attribute"
14176 " '%s'" % (nname, attr))
14177 if not isinstance(remote_info[attr], int):
14178 raise errors.OpExecError("Node '%s' returned invalid value"
14180 (nname, attr, remote_info[attr]))
14181 # compute memory used by primary instances
14182 i_p_mem = i_p_up_mem = 0
14183 for iinfo, beinfo in i_list:
14184 if iinfo.primary_node == nname:
14185 i_p_mem += beinfo[constants.BE_MAXMEM]
14186 if iinfo.name not in node_iinfo[nname].payload:
14189 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14190 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14191 remote_info["memory_free"] -= max(0, i_mem_diff)
14193 if iinfo.admin_state == constants.ADMINST_UP:
14194 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14196 # compute memory used by instances
14198 "total_memory": remote_info["memory_total"],
14199 "reserved_memory": remote_info["memory_dom0"],
14200 "free_memory": remote_info["memory_free"],
14201 "total_disk": remote_info["vg_size"],
14202 "free_disk": remote_info["vg_free"],
14203 "total_cpus": remote_info["cpu_total"],
14204 "i_pri_memory": i_p_mem,
14205 "i_pri_up_memory": i_p_up_mem,
14207 pnr_dyn.update(node_results[nname])
14208 node_results[nname] = pnr_dyn
14210 return node_results
14213 def _ComputeInstanceData(cluster_info, i_list):
14214 """Compute global instance data.
14218 for iinfo, beinfo in i_list:
14220 for nic in iinfo.nics:
14221 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14225 "mode": filled_params[constants.NIC_MODE],
14226 "link": filled_params[constants.NIC_LINK],
14228 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14229 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14230 nic_data.append(nic_dict)
14232 "tags": list(iinfo.GetTags()),
14233 "admin_state": iinfo.admin_state,
14234 "vcpus": beinfo[constants.BE_VCPUS],
14235 "memory": beinfo[constants.BE_MAXMEM],
14237 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14239 "disks": [{constants.IDISK_SIZE: dsk.size,
14240 constants.IDISK_MODE: dsk.mode}
14241 for dsk in iinfo.disks],
14242 "disk_template": iinfo.disk_template,
14243 "hypervisor": iinfo.hypervisor,
14245 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14247 instance_data[iinfo.name] = pir
14249 return instance_data
14251 def _AddNewInstance(self):
14252 """Add new instance data to allocator structure.
14254 This in combination with _AllocatorGetClusterData will create the
14255 correct structure needed as input for the allocator.
14257 The checks for the completeness of the opcode must have already been
14261 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14263 if self.disk_template in constants.DTS_INT_MIRROR:
14264 self.required_nodes = 2
14266 self.required_nodes = 1
14270 "disk_template": self.disk_template,
14273 "vcpus": self.vcpus,
14274 "memory": self.memory,
14275 "disks": self.disks,
14276 "disk_space_total": disk_space,
14278 "required_nodes": self.required_nodes,
14279 "hypervisor": self.hypervisor,
14284 def _AddRelocateInstance(self):
14285 """Add relocate instance data to allocator structure.
14287 This in combination with _IAllocatorGetClusterData will create the
14288 correct structure needed as input for the allocator.
14290 The checks for the completeness of the opcode must have already been
14294 instance = self.cfg.GetInstanceInfo(self.name)
14295 if instance is None:
14296 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14297 " IAllocator" % self.name)
14299 if instance.disk_template not in constants.DTS_MIRRORED:
14300 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14301 errors.ECODE_INVAL)
14303 if instance.disk_template in constants.DTS_INT_MIRROR and \
14304 len(instance.secondary_nodes) != 1:
14305 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14306 errors.ECODE_STATE)
14308 self.required_nodes = 1
14309 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14310 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14314 "disk_space_total": disk_space,
14315 "required_nodes": self.required_nodes,
14316 "relocate_from": self.relocate_from,
14320 def _AddNodeEvacuate(self):
14321 """Get data for node-evacuate requests.
14325 "instances": self.instances,
14326 "evac_mode": self.evac_mode,
14329 def _AddChangeGroup(self):
14330 """Get data for node-evacuate requests.
14334 "instances": self.instances,
14335 "target_groups": self.target_groups,
14338 def _BuildInputData(self, fn, keydata):
14339 """Build input data structures.
14342 self._ComputeClusterData()
14345 request["type"] = self.mode
14346 for keyname, keytype in keydata:
14347 if keyname not in request:
14348 raise errors.ProgrammerError("Request parameter %s is missing" %
14350 val = request[keyname]
14351 if not keytype(val):
14352 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14353 " validation, value %s, expected"
14354 " type %s" % (keyname, val, keytype))
14355 self.in_data["request"] = request
14357 self.in_text = serializer.Dump(self.in_data)
14359 _STRING_LIST = ht.TListOf(ht.TString)
14360 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14361 # pylint: disable=E1101
14362 # Class '...' has no 'OP_ID' member
14363 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14364 opcodes.OpInstanceMigrate.OP_ID,
14365 opcodes.OpInstanceReplaceDisks.OP_ID])
14369 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14370 ht.TItems([ht.TNonEmptyString,
14371 ht.TNonEmptyString,
14372 ht.TListOf(ht.TNonEmptyString),
14375 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14376 ht.TItems([ht.TNonEmptyString,
14379 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14380 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14383 constants.IALLOCATOR_MODE_ALLOC:
14386 ("name", ht.TString),
14387 ("memory", ht.TInt),
14388 ("disks", ht.TListOf(ht.TDict)),
14389 ("disk_template", ht.TString),
14390 ("os", ht.TString),
14391 ("tags", _STRING_LIST),
14392 ("nics", ht.TListOf(ht.TDict)),
14393 ("vcpus", ht.TInt),
14394 ("hypervisor", ht.TString),
14396 constants.IALLOCATOR_MODE_RELOC:
14397 (_AddRelocateInstance,
14398 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14400 constants.IALLOCATOR_MODE_NODE_EVAC:
14401 (_AddNodeEvacuate, [
14402 ("instances", _STRING_LIST),
14403 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14405 constants.IALLOCATOR_MODE_CHG_GROUP:
14406 (_AddChangeGroup, [
14407 ("instances", _STRING_LIST),
14408 ("target_groups", _STRING_LIST),
14412 def Run(self, name, validate=True, call_fn=None):
14413 """Run an instance allocator and return the results.
14416 if call_fn is None:
14417 call_fn = self.rpc.call_iallocator_runner
14419 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14420 result.Raise("Failure while running the iallocator script")
14422 self.out_text = result.payload
14424 self._ValidateResult()
14426 def _ValidateResult(self):
14427 """Process the allocator results.
14429 This will process and if successful save the result in
14430 self.out_data and the other parameters.
14434 rdict = serializer.Load(self.out_text)
14435 except Exception, err:
14436 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14438 if not isinstance(rdict, dict):
14439 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14441 # TODO: remove backwards compatiblity in later versions
14442 if "nodes" in rdict and "result" not in rdict:
14443 rdict["result"] = rdict["nodes"]
14446 for key in "success", "info", "result":
14447 if key not in rdict:
14448 raise errors.OpExecError("Can't parse iallocator results:"
14449 " missing key '%s'" % key)
14450 setattr(self, key, rdict[key])
14452 if not self._result_check(self.result):
14453 raise errors.OpExecError("Iallocator returned invalid result,"
14454 " expected %s, got %s" %
14455 (self._result_check, self.result),
14456 errors.ECODE_INVAL)
14458 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14459 assert self.relocate_from is not None
14460 assert self.required_nodes == 1
14462 node2group = dict((name, ndata["group"])
14463 for (name, ndata) in self.in_data["nodes"].items())
14465 fn = compat.partial(self._NodesToGroups, node2group,
14466 self.in_data["nodegroups"])
14468 instance = self.cfg.GetInstanceInfo(self.name)
14469 request_groups = fn(self.relocate_from + [instance.primary_node])
14470 result_groups = fn(rdict["result"] + [instance.primary_node])
14472 if self.success and not set(result_groups).issubset(request_groups):
14473 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14474 " differ from original groups (%s)" %
14475 (utils.CommaJoin(result_groups),
14476 utils.CommaJoin(request_groups)))
14478 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14479 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14481 self.out_data = rdict
14484 def _NodesToGroups(node2group, groups, nodes):
14485 """Returns a list of unique group names for a list of nodes.
14487 @type node2group: dict
14488 @param node2group: Map from node name to group UUID
14490 @param groups: Group information
14492 @param nodes: Node names
14499 group_uuid = node2group[node]
14501 # Ignore unknown node
14505 group = groups[group_uuid]
14507 # Can't find group, let's use UUID
14508 group_name = group_uuid
14510 group_name = group["name"]
14512 result.add(group_name)
14514 return sorted(result)
14517 class LUTestAllocator(NoHooksLU):
14518 """Run allocator tests.
14520 This LU runs the allocator tests
14523 def CheckPrereq(self):
14524 """Check prerequisites.
14526 This checks the opcode parameters depending on the director and mode test.
14529 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14530 for attr in ["memory", "disks", "disk_template",
14531 "os", "tags", "nics", "vcpus"]:
14532 if not hasattr(self.op, attr):
14533 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14534 attr, errors.ECODE_INVAL)
14535 iname = self.cfg.ExpandInstanceName(self.op.name)
14536 if iname is not None:
14537 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14538 iname, errors.ECODE_EXISTS)
14539 if not isinstance(self.op.nics, list):
14540 raise errors.OpPrereqError("Invalid parameter 'nics'",
14541 errors.ECODE_INVAL)
14542 if not isinstance(self.op.disks, list):
14543 raise errors.OpPrereqError("Invalid parameter 'disks'",
14544 errors.ECODE_INVAL)
14545 for row in self.op.disks:
14546 if (not isinstance(row, dict) or
14547 constants.IDISK_SIZE not in row or
14548 not isinstance(row[constants.IDISK_SIZE], int) or
14549 constants.IDISK_MODE not in row or
14550 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14551 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14552 " parameter", errors.ECODE_INVAL)
14553 if self.op.hypervisor is None:
14554 self.op.hypervisor = self.cfg.GetHypervisorType()
14555 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14556 fname = _ExpandInstanceName(self.cfg, self.op.name)
14557 self.op.name = fname
14558 self.relocate_from = \
14559 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14560 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14561 constants.IALLOCATOR_MODE_NODE_EVAC):
14562 if not self.op.instances:
14563 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14564 self.op.instances = _GetWantedInstances(self, self.op.instances)
14566 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14567 self.op.mode, errors.ECODE_INVAL)
14569 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14570 if self.op.allocator is None:
14571 raise errors.OpPrereqError("Missing allocator name",
14572 errors.ECODE_INVAL)
14573 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14574 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14575 self.op.direction, errors.ECODE_INVAL)
14577 def Exec(self, feedback_fn):
14578 """Run the allocator test.
14581 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14582 ial = IAllocator(self.cfg, self.rpc,
14585 memory=self.op.memory,
14586 disks=self.op.disks,
14587 disk_template=self.op.disk_template,
14591 vcpus=self.op.vcpus,
14592 hypervisor=self.op.hypervisor,
14594 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14595 ial = IAllocator(self.cfg, self.rpc,
14598 relocate_from=list(self.relocate_from),
14600 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14601 ial = IAllocator(self.cfg, self.rpc,
14603 instances=self.op.instances,
14604 target_groups=self.op.target_groups)
14605 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14606 ial = IAllocator(self.cfg, self.rpc,
14608 instances=self.op.instances,
14609 evac_mode=self.op.evac_mode)
14611 raise errors.ProgrammerError("Uncatched mode %s in"
14612 " LUTestAllocator.Exec", self.op.mode)
14614 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14615 result = ial.in_text
14617 ial.Run(self.op.allocator, validate=False)
14618 result = ial.out_text
14622 #: Query type implementations
14624 constants.QR_INSTANCE: _InstanceQuery,
14625 constants.QR_NODE: _NodeQuery,
14626 constants.QR_GROUP: _GroupQuery,
14627 constants.QR_OS: _OsQuery,
14630 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14633 def _GetQueryImplementation(name):
14634 """Returns the implemtnation for a query type.
14636 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14640 return _QUERY_IMPL[name]
14642 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14643 errors.ECODE_INVAL)