4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
45 from ganeti import ssh
46 from ganeti import utils
47 from ganeti import errors
48 from ganeti import hypervisor
49 from ganeti import locking
50 from ganeti import constants
51 from ganeti import objects
52 from ganeti import serializer
53 from ganeti import ssconf
54 from ganeti import uidpool
55 from ganeti import compat
56 from ganeti import masterd
57 from ganeti import netutils
58 from ganeti import query
59 from ganeti import qlang
60 from ganeti import opcodes
62 from ganeti import rpc
64 import ganeti.masterd.instance # pylint: disable=W0611
67 #: Size of DRBD meta block device
71 INSTANCE_UP = [constants.ADMINST_UP]
72 INSTANCE_DOWN = [constants.ADMINST_DOWN]
73 INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
79 """Data container for LU results with jobs.
81 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82 by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83 contained in the C{jobs} attribute and include the job IDs in the opcode
87 def __init__(self, jobs, **kwargs):
88 """Initializes this class.
90 Additional return values can be specified as keyword arguments.
92 @type jobs: list of lists of L{opcode.OpCode}
93 @param jobs: A list of lists of opcode objects
100 class LogicalUnit(object):
101 """Logical Unit base class.
103 Subclasses must follow these rules:
104 - implement ExpandNames
105 - implement CheckPrereq (except when tasklets are used)
106 - implement Exec (except when tasklets are used)
107 - implement BuildHooksEnv
108 - implement BuildHooksNodes
109 - redefine HPATH and HTYPE
110 - optionally redefine their run requirements:
111 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113 Note that all commands require root permissions.
115 @ivar dry_run_result: the value (if any) that will be returned to the caller
116 in dry-run mode (signalled by opcode dry_run parameter)
123 def __init__(self, processor, op, context, rpc_runner):
124 """Constructor for LogicalUnit.
126 This needs to be overridden in derived classes in order to check op
130 self.proc = processor
132 self.cfg = context.cfg
133 self.glm = context.glm
135 self.owned_locks = context.glm.list_owned
136 self.context = context
137 self.rpc = rpc_runner
138 # Dicts used to declare locking needs to mcpu
139 self.needed_locks = None
140 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
142 self.remove_locks = {}
143 # Used to force good behavior when calling helper functions
144 self.recalculate_locks = {}
146 self.Log = processor.Log # pylint: disable=C0103
147 self.LogWarning = processor.LogWarning # pylint: disable=C0103
148 self.LogInfo = processor.LogInfo # pylint: disable=C0103
149 self.LogStep = processor.LogStep # pylint: disable=C0103
150 # support for dry-run
151 self.dry_run_result = None
152 # support for generic debug attribute
153 if (not hasattr(self.op, "debug_level") or
154 not isinstance(self.op.debug_level, int)):
155 self.op.debug_level = 0
160 # Validate opcode parameters and set defaults
161 self.op.Validate(True)
163 self.CheckArguments()
165 def CheckArguments(self):
166 """Check syntactic validity for the opcode arguments.
168 This method is for doing a simple syntactic check and ensure
169 validity of opcode parameters, without any cluster-related
170 checks. While the same can be accomplished in ExpandNames and/or
171 CheckPrereq, doing these separate is better because:
173 - ExpandNames is left as as purely a lock-related function
174 - CheckPrereq is run after we have acquired locks (and possible
177 The function is allowed to change the self.op attribute so that
178 later methods can no longer worry about missing parameters.
183 def ExpandNames(self):
184 """Expand names for this LU.
186 This method is called before starting to execute the opcode, and it should
187 update all the parameters of the opcode to their canonical form (e.g. a
188 short node name must be fully expanded after this method has successfully
189 completed). This way locking, hooks, logging, etc. can work correctly.
191 LUs which implement this method must also populate the self.needed_locks
192 member, as a dict with lock levels as keys, and a list of needed lock names
195 - use an empty dict if you don't need any lock
196 - if you don't need any lock at a particular level omit that level
197 - don't put anything for the BGL level
198 - if you want all locks at a level use locking.ALL_SET as a value
200 If you need to share locks (rather than acquire them exclusively) at one
201 level you can modify self.share_locks, setting a true value (usually 1) for
202 that level. By default locks are not shared.
204 This function can also define a list of tasklets, which then will be
205 executed in order instead of the usual LU-level CheckPrereq and Exec
206 functions, if those are not defined by the LU.
210 # Acquire all nodes and one instance
211 self.needed_locks = {
212 locking.LEVEL_NODE: locking.ALL_SET,
213 locking.LEVEL_INSTANCE: ['instance1.example.com'],
215 # Acquire just two nodes
216 self.needed_locks = {
217 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
220 self.needed_locks = {} # No, you can't leave it to the default value None
223 # The implementation of this method is mandatory only if the new LU is
224 # concurrent, so that old LUs don't need to be changed all at the same
227 self.needed_locks = {} # Exclusive LUs don't need locks.
229 raise NotImplementedError
231 def DeclareLocks(self, level):
232 """Declare LU locking needs for a level
234 While most LUs can just declare their locking needs at ExpandNames time,
235 sometimes there's the need to calculate some locks after having acquired
236 the ones before. This function is called just before acquiring locks at a
237 particular level, but after acquiring the ones at lower levels, and permits
238 such calculations. It can be used to modify self.needed_locks, and by
239 default it does nothing.
241 This function is only called if you have something already set in
242 self.needed_locks for the level.
244 @param level: Locking level which is going to be locked
245 @type level: member of ganeti.locking.LEVELS
249 def CheckPrereq(self):
250 """Check prerequisites for this LU.
252 This method should check that the prerequisites for the execution
253 of this LU are fulfilled. It can do internode communication, but
254 it should be idempotent - no cluster or system changes are
257 The method should raise errors.OpPrereqError in case something is
258 not fulfilled. Its return value is ignored.
260 This method should also update all the parameters of the opcode to
261 their canonical form if it hasn't been done by ExpandNames before.
264 if self.tasklets is not None:
265 for (idx, tl) in enumerate(self.tasklets):
266 logging.debug("Checking prerequisites for tasklet %s/%s",
267 idx + 1, len(self.tasklets))
272 def Exec(self, feedback_fn):
275 This method should implement the actual work. It should raise
276 errors.OpExecError for failures that are somewhat dealt with in
280 if self.tasklets is not None:
281 for (idx, tl) in enumerate(self.tasklets):
282 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
285 raise NotImplementedError
287 def BuildHooksEnv(self):
288 """Build hooks environment for this LU.
291 @return: Dictionary containing the environment that will be used for
292 running the hooks for this LU. The keys of the dict must not be prefixed
293 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294 will extend the environment with additional variables. If no environment
295 should be defined, an empty dictionary should be returned (not C{None}).
296 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
300 raise NotImplementedError
302 def BuildHooksNodes(self):
303 """Build list of nodes to run LU's hooks.
305 @rtype: tuple; (list, list)
306 @return: Tuple containing a list of node names on which the hook
307 should run before the execution and a list of node names on which the
308 hook should run after the execution. No nodes should be returned as an
309 empty list (and not None).
310 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
314 raise NotImplementedError
316 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317 """Notify the LU about the results of its hooks.
319 This method is called every time a hooks phase is executed, and notifies
320 the Logical Unit about the hooks' result. The LU can then use it to alter
321 its result based on the hooks. By default the method does nothing and the
322 previous result is passed back unchanged but any LU can define it if it
323 wants to use the local cluster hook-scripts somehow.
325 @param phase: one of L{constants.HOOKS_PHASE_POST} or
326 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327 @param hook_results: the results of the multi-node hooks rpc call
328 @param feedback_fn: function used send feedback back to the caller
329 @param lu_result: the previous Exec result this LU had, or None
331 @return: the new Exec result, based on the previous result
335 # API must be kept, thus we ignore the unused argument and could
336 # be a function warnings
337 # pylint: disable=W0613,R0201
340 def _ExpandAndLockInstance(self):
341 """Helper function to expand and lock an instance.
343 Many LUs that work on an instance take its name in self.op.instance_name
344 and need to expand it and then declare the expanded name for locking. This
345 function does it, and then updates self.op.instance_name to the expanded
346 name. It also initializes needed_locks as a dict, if this hasn't been done
350 if self.needed_locks is None:
351 self.needed_locks = {}
353 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354 "_ExpandAndLockInstance called with instance-level locks set"
355 self.op.instance_name = _ExpandInstanceName(self.cfg,
356 self.op.instance_name)
357 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
359 def _LockInstancesNodes(self, primary_only=False,
360 level=locking.LEVEL_NODE):
361 """Helper function to declare instances' nodes for locking.
363 This function should be called after locking one or more instances to lock
364 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365 with all primary or secondary nodes for instances already locked and
366 present in self.needed_locks[locking.LEVEL_INSTANCE].
368 It should be called from DeclareLocks, and for safety only works if
369 self.recalculate_locks[locking.LEVEL_NODE] is set.
371 In the future it may grow parameters to just lock some instance's nodes, or
372 to just lock primaries or secondary nodes, if needed.
374 If should be called in DeclareLocks in a way similar to::
376 if level == locking.LEVEL_NODE:
377 self._LockInstancesNodes()
379 @type primary_only: boolean
380 @param primary_only: only lock primary nodes of locked instances
381 @param level: Which lock level to use for locking nodes
384 assert level in self.recalculate_locks, \
385 "_LockInstancesNodes helper function called with no nodes to recalculate"
387 # TODO: check if we're really been called with the instance locks held
389 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390 # future we might want to have different behaviors depending on the value
391 # of self.recalculate_locks[locking.LEVEL_NODE]
393 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395 wanted_nodes.append(instance.primary_node)
397 wanted_nodes.extend(instance.secondary_nodes)
399 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400 self.needed_locks[level] = wanted_nodes
401 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402 self.needed_locks[level].extend(wanted_nodes)
404 raise errors.ProgrammerError("Unknown recalculation mode")
406 del self.recalculate_locks[level]
409 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410 """Simple LU which runs no hooks.
412 This LU is intended as a parent for other LogicalUnits which will
413 run no hooks, in order to reduce duplicate code.
419 def BuildHooksEnv(self):
420 """Empty BuildHooksEnv for NoHooksLu.
422 This just raises an error.
425 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
427 def BuildHooksNodes(self):
428 """Empty BuildHooksNodes for NoHooksLU.
431 raise AssertionError("BuildHooksNodes called for NoHooksLU")
435 """Tasklet base class.
437 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438 they can mix legacy code with tasklets. Locking needs to be done in the LU,
439 tasklets know nothing about locks.
441 Subclasses must follow these rules:
442 - Implement CheckPrereq
446 def __init__(self, lu):
453 def CheckPrereq(self):
454 """Check prerequisites for this tasklets.
456 This method should check whether the prerequisites for the execution of
457 this tasklet are fulfilled. It can do internode communication, but it
458 should be idempotent - no cluster or system changes are allowed.
460 The method should raise errors.OpPrereqError in case something is not
461 fulfilled. Its return value is ignored.
463 This method should also update all parameters to their canonical form if it
464 hasn't been done before.
469 def Exec(self, feedback_fn):
470 """Execute the tasklet.
472 This method should implement the actual work. It should raise
473 errors.OpExecError for failures that are somewhat dealt with in code, or
477 raise NotImplementedError
481 """Base for query utility classes.
484 #: Attribute holding field definitions
487 def __init__(self, qfilter, fields, use_locking):
488 """Initializes this class.
491 self.use_locking = use_locking
493 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
495 self.requested_data = self.query.RequestedData()
496 self.names = self.query.RequestedNames()
498 # Sort only if no names were requested
499 self.sort_by_name = not self.names
501 self.do_locking = None
504 def _GetNames(self, lu, all_names, lock_level):
505 """Helper function to determine names asked for in the query.
509 names = lu.owned_locks(lock_level)
513 if self.wanted == locking.ALL_SET:
514 assert not self.names
515 # caller didn't specify names, so ordering is not important
516 return utils.NiceSort(names)
518 # caller specified names and we must keep the same order
520 assert not self.do_locking or lu.glm.is_owned(lock_level)
522 missing = set(self.wanted).difference(names)
524 raise errors.OpExecError("Some items were removed before retrieving"
525 " their data: %s" % missing)
527 # Return expanded names
530 def ExpandNames(self, lu):
531 """Expand names for this query.
533 See L{LogicalUnit.ExpandNames}.
536 raise NotImplementedError()
538 def DeclareLocks(self, lu, level):
539 """Declare locks for this query.
541 See L{LogicalUnit.DeclareLocks}.
544 raise NotImplementedError()
546 def _GetQueryData(self, lu):
547 """Collects all data for this query.
549 @return: Query data object
552 raise NotImplementedError()
554 def NewStyleQuery(self, lu):
555 """Collect data and execute query.
558 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559 sort_by_name=self.sort_by_name)
561 def OldStyleQuery(self, lu):
562 """Collect data and execute query.
565 return self.query.OldStyleQuery(self._GetQueryData(lu),
566 sort_by_name=self.sort_by_name)
570 """Returns a dict declaring all lock levels shared.
573 return dict.fromkeys(locking.LEVELS, 1)
576 def _MakeLegacyNodeInfo(data):
577 """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
579 Converts the data into a single dictionary. This is fine for most use cases,
580 but some require information from more than one volume group or hypervisor.
583 (bootid, (vg_info, ), (hv_info, )) = data
585 return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
590 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591 """Checks if the owned node groups are still correct for an instance.
593 @type cfg: L{config.ConfigWriter}
594 @param cfg: The cluster configuration
595 @type instance_name: string
596 @param instance_name: Instance name
597 @type owned_groups: set or frozenset
598 @param owned_groups: List of currently owned node groups
601 inst_groups = cfg.GetInstanceNodeGroups(instance_name)
603 if not owned_groups.issuperset(inst_groups):
604 raise errors.OpPrereqError("Instance %s's node groups changed since"
605 " locks were acquired, current groups are"
606 " are '%s', owning groups '%s'; retry the"
609 utils.CommaJoin(inst_groups),
610 utils.CommaJoin(owned_groups)),
616 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617 """Checks if the instances in a node group are still correct.
619 @type cfg: L{config.ConfigWriter}
620 @param cfg: The cluster configuration
621 @type group_uuid: string
622 @param group_uuid: Node group UUID
623 @type owned_instances: set or frozenset
624 @param owned_instances: List of currently owned instances
627 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628 if owned_instances != wanted_instances:
629 raise errors.OpPrereqError("Instances in node group '%s' changed since"
630 " locks were acquired, wanted '%s', have '%s';"
631 " retry the operation" %
633 utils.CommaJoin(wanted_instances),
634 utils.CommaJoin(owned_instances)),
637 return wanted_instances
640 def _SupportsOob(cfg, node):
641 """Tells if node supports OOB.
643 @type cfg: L{config.ConfigWriter}
644 @param cfg: The cluster configuration
645 @type node: L{objects.Node}
646 @param node: The node
647 @return: The OOB script if supported or an empty string otherwise
650 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
653 def _GetWantedNodes(lu, nodes):
654 """Returns list of checked and expanded node names.
656 @type lu: L{LogicalUnit}
657 @param lu: the logical unit on whose behalf we execute
659 @param nodes: list of node names or None for all nodes
661 @return: the list of nodes, sorted
662 @raise errors.ProgrammerError: if the nodes parameter is wrong type
666 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
668 return utils.NiceSort(lu.cfg.GetNodeList())
671 def _GetWantedInstances(lu, instances):
672 """Returns list of checked and expanded instance names.
674 @type lu: L{LogicalUnit}
675 @param lu: the logical unit on whose behalf we execute
676 @type instances: list
677 @param instances: list of instance names or None for all instances
679 @return: the list of instances, sorted
680 @raise errors.OpPrereqError: if the instances parameter is wrong type
681 @raise errors.OpPrereqError: if any of the passed instances is not found
685 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
687 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
691 def _GetUpdatedParams(old_params, update_dict,
692 use_default=True, use_none=False):
693 """Return the new version of a parameter dictionary.
695 @type old_params: dict
696 @param old_params: old parameters
697 @type update_dict: dict
698 @param update_dict: dict containing new parameter values, or
699 constants.VALUE_DEFAULT to reset the parameter to its default
701 @param use_default: boolean
702 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703 values as 'to be deleted' values
704 @param use_none: boolean
705 @type use_none: whether to recognise C{None} values as 'to be
708 @return: the new parameter dictionary
711 params_copy = copy.deepcopy(old_params)
712 for key, val in update_dict.iteritems():
713 if ((use_default and val == constants.VALUE_DEFAULT) or
714 (use_none and val is None)):
720 params_copy[key] = val
724 def _UpdateAndVerifySubDict(base, updates, type_check):
725 """Updates and verifies a dict with sub dicts of the same type.
727 @param base: The dict with the old data
728 @param updates: The dict with the new data
729 @param type_check: Dict suitable to ForceDictType to verify correct types
730 @returns: A new dict with updated and verified values
734 new = _GetUpdatedParams(old, value)
735 utils.ForceDictType(new, type_check)
738 ret = copy.deepcopy(base)
739 ret.update(dict((key, fn(base.get(key, {}), value))
740 for key, value in updates.items()))
744 def _MergeAndVerifyHvState(op_input, obj_input):
745 """Combines the hv state from an opcode with the one of the object
747 @param op_input: The input dict from the opcode
748 @param obj_input: The input dict from the objects
749 @return: The verified and updated dict
753 invalid_hvs = set(op_input) - constants.HYPER_TYPES
755 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
756 " %s" % utils.CommaJoin(invalid_hvs),
758 if obj_input is None:
760 type_check = constants.HVSTS_PARAMETER_TYPES
761 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
766 def _MergeAndVerifyDiskState(op_input, obj_input):
767 """Combines the disk state from an opcode with the one of the object
769 @param op_input: The input dict from the opcode
770 @param obj_input: The input dict from the objects
771 @return: The verified and updated dict
774 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
776 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
777 utils.CommaJoin(invalid_dst),
779 type_check = constants.DSS_PARAMETER_TYPES
780 if obj_input is None:
782 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
784 for key, value in op_input.items())
789 def _ReleaseLocks(lu, level, names=None, keep=None):
790 """Releases locks owned by an LU.
792 @type lu: L{LogicalUnit}
793 @param level: Lock level
794 @type names: list or None
795 @param names: Names of locks to release
796 @type keep: list or None
797 @param keep: Names of locks to retain
800 assert not (keep is not None and names is not None), \
801 "Only one of the 'names' and the 'keep' parameters can be given"
803 if names is not None:
804 should_release = names.__contains__
806 should_release = lambda name: name not in keep
808 should_release = None
810 owned = lu.owned_locks(level)
812 # Not owning any lock at this level, do nothing
819 # Determine which locks to release
821 if should_release(name):
826 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
828 # Release just some locks
829 lu.glm.release(level, names=release)
831 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
834 lu.glm.release(level)
836 assert not lu.glm.is_owned(level), "No locks should be owned"
839 def _MapInstanceDisksToNodes(instances):
840 """Creates a map from (node, volume) to instance name.
842 @type instances: list of L{objects.Instance}
843 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
846 return dict(((node, vol), inst.name)
847 for inst in instances
848 for (node, vols) in inst.MapLVsByNode().items()
852 def _RunPostHook(lu, node_name):
853 """Runs the post-hook for an opcode on a single node.
856 hm = lu.proc.BuildHooksManager(lu)
858 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
860 # pylint: disable=W0702
861 lu.LogWarning("Errors occurred running hooks on %s" % node_name)
864 def _CheckOutputFields(static, dynamic, selected):
865 """Checks whether all selected fields are valid.
867 @type static: L{utils.FieldSet}
868 @param static: static fields set
869 @type dynamic: L{utils.FieldSet}
870 @param dynamic: dynamic fields set
877 delta = f.NonMatching(selected)
879 raise errors.OpPrereqError("Unknown output fields selected: %s"
880 % ",".join(delta), errors.ECODE_INVAL)
883 def _CheckGlobalHvParams(params):
884 """Validates that given hypervisor params are not global ones.
886 This will ensure that instances don't get customised versions of
890 used_globals = constants.HVC_GLOBALS.intersection(params)
892 msg = ("The following hypervisor parameters are global and cannot"
893 " be customized at instance level, please modify them at"
894 " cluster level: %s" % utils.CommaJoin(used_globals))
895 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
898 def _CheckNodeOnline(lu, node, msg=None):
899 """Ensure that a given node is online.
901 @param lu: the LU on behalf of which we make the check
902 @param node: the node to check
903 @param msg: if passed, should be a message to replace the default one
904 @raise errors.OpPrereqError: if the node is offline
908 msg = "Can't use offline node"
909 if lu.cfg.GetNodeInfo(node).offline:
910 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
913 def _CheckNodeNotDrained(lu, node):
914 """Ensure that a given node is not drained.
916 @param lu: the LU on behalf of which we make the check
917 @param node: the node to check
918 @raise errors.OpPrereqError: if the node is drained
921 if lu.cfg.GetNodeInfo(node).drained:
922 raise errors.OpPrereqError("Can't use drained node %s" % node,
926 def _CheckNodeVmCapable(lu, node):
927 """Ensure that a given node is vm capable.
929 @param lu: the LU on behalf of which we make the check
930 @param node: the node to check
931 @raise errors.OpPrereqError: if the node is not vm capable
934 if not lu.cfg.GetNodeInfo(node).vm_capable:
935 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
939 def _CheckNodeHasOS(lu, node, os_name, force_variant):
940 """Ensure that a node supports a given OS.
942 @param lu: the LU on behalf of which we make the check
943 @param node: the node to check
944 @param os_name: the OS to query about
945 @param force_variant: whether to ignore variant errors
946 @raise errors.OpPrereqError: if the node is not supporting the OS
949 result = lu.rpc.call_os_get(node, os_name)
950 result.Raise("OS '%s' not in supported OS list for node %s" %
952 prereq=True, ecode=errors.ECODE_INVAL)
953 if not force_variant:
954 _CheckOSVariant(result.payload, os_name)
957 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
958 """Ensure that a node has the given secondary ip.
960 @type lu: L{LogicalUnit}
961 @param lu: the LU on behalf of which we make the check
963 @param node: the node to check
964 @type secondary_ip: string
965 @param secondary_ip: the ip to check
966 @type prereq: boolean
967 @param prereq: whether to throw a prerequisite or an execute error
968 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
969 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
972 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
973 result.Raise("Failure checking secondary ip on node %s" % node,
974 prereq=prereq, ecode=errors.ECODE_ENVIRON)
975 if not result.payload:
976 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
977 " please fix and re-run this command" % secondary_ip)
979 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
981 raise errors.OpExecError(msg)
984 def _GetClusterDomainSecret():
985 """Reads the cluster domain secret.
988 return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
992 def _CheckInstanceState(lu, instance, req_states, msg=None):
993 """Ensure that an instance is in one of the required states.
995 @param lu: the LU on behalf of which we make the check
996 @param instance: the instance to check
997 @param msg: if passed, should be a message to replace the default one
998 @raise errors.OpPrereqError: if the instance is not in the required state
1002 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1003 if instance.admin_state not in req_states:
1004 raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
1005 (instance, instance.admin_state, msg),
1008 if constants.ADMINST_UP not in req_states:
1009 pnode = instance.primary_node
1010 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1011 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1012 prereq=True, ecode=errors.ECODE_ENVIRON)
1014 if instance.name in ins_l.payload:
1015 raise errors.OpPrereqError("Instance %s is running, %s" %
1016 (instance.name, msg), errors.ECODE_STATE)
1019 def _CheckMinMaxSpecs(name, ipolicy, value):
1020 """Checks if value is in the desired range.
1022 @param name: name of the parameter for which we perform the check
1023 @param ipolicy: dictionary containing min, max and std values
1024 @param value: actual value that we want to use
1025 @return: None or element not meeting the criteria
1029 if value in [None, constants.VALUE_AUTO]:
1031 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1032 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1033 if value > max_v or min_v > value:
1034 return ("%s value %s is not in range [%s, %s]" %
1035 (name, value, min_v, max_v))
1039 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1040 nic_count, disk_sizes,
1041 _check_spec_fn=_CheckMinMaxSpecs):
1042 """Verifies ipolicy against provided specs.
1045 @param ipolicy: The ipolicy
1047 @param mem_size: The memory size
1048 @type cpu_count: int
1049 @param cpu_count: Used cpu cores
1050 @type disk_count: int
1051 @param disk_count: Number of disks used
1052 @type nic_count: int
1053 @param nic_count: Number of nics used
1054 @type disk_sizes: list of ints
1055 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1056 @param _check_spec_fn: The checking function (unittest only)
1057 @return: A list of violations, or an empty list of no violations are found
1060 assert disk_count == len(disk_sizes)
1063 (constants.ISPEC_MEM_SIZE, mem_size),
1064 (constants.ISPEC_CPU_COUNT, cpu_count),
1065 (constants.ISPEC_DISK_COUNT, disk_count),
1066 (constants.ISPEC_NIC_COUNT, nic_count),
1067 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
1070 (_check_spec_fn(name, ipolicy, value)
1071 for (name, value) in test_settings))
1074 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1075 _compute_fn=_ComputeIPolicySpecViolation):
1076 """Compute if instance meets the specs of ipolicy.
1079 @param ipolicy: The ipolicy to verify against
1080 @type instance: L{objects.Instance}
1081 @param instance: The instance to verify
1082 @param _compute_fn: The function to verify ipolicy (unittest only)
1083 @see: L{_ComputeIPolicySpecViolation}
1086 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1087 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1088 disk_count = len(instance.disks)
1089 disk_sizes = [disk.size for disk in instance.disks]
1090 nic_count = len(instance.nics)
1092 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1096 def _ComputeIPolicyInstanceSpecViolation(ipolicy, instance_spec,
1097 _compute_fn=_ComputeIPolicySpecViolation):
1098 """Compute if instance specs meets the specs of ipolicy.
1101 @param ipolicy: The ipolicy to verify against
1102 @param instance_spec: dict
1103 @param instance_spec: The instance spec to verify
1104 @param _compute_fn: The function to verify ipolicy (unittest only)
1105 @see: L{_ComputeIPolicySpecViolation}
1108 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1109 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1110 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1111 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1112 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1114 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1118 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1120 _compute_fn=_ComputeIPolicyInstanceViolation):
1121 """Compute if instance meets the specs of the new target group.
1123 @param ipolicy: The ipolicy to verify
1124 @param instance: The instance object to verify
1125 @param current_group: The current group of the instance
1126 @param target_group: The new group of the instance
1127 @param _compute_fn: The function to verify ipolicy (unittest only)
1128 @see: L{_ComputeIPolicySpecViolation}
1131 if current_group == target_group:
1134 return _compute_fn(ipolicy, instance)
1137 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1138 _compute_fn=_ComputeIPolicyNodeViolation):
1139 """Checks that the target node is correct in terms of instance policy.
1141 @param ipolicy: The ipolicy to verify
1142 @param instance: The instance object to verify
1143 @param node: The new node to relocate
1144 @param ignore: Ignore violations of the ipolicy
1145 @param _compute_fn: The function to verify ipolicy (unittest only)
1146 @see: L{_ComputeIPolicySpecViolation}
1149 res = _compute_fn(ipolicy, instance, instance.primary_node.group, node.group)
1152 msg = ("Instance does not meet target node group's (%s) instance"
1153 " policy: %s") % (node.group, utils.CommaJoin(res))
1157 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1160 def _ExpandItemName(fn, name, kind):
1161 """Expand an item name.
1163 @param fn: the function to use for expansion
1164 @param name: requested item name
1165 @param kind: text description ('Node' or 'Instance')
1166 @return: the resolved (full) name
1167 @raise errors.OpPrereqError: if the item is not found
1170 full_name = fn(name)
1171 if full_name is None:
1172 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1177 def _ExpandNodeName(cfg, name):
1178 """Wrapper over L{_ExpandItemName} for nodes."""
1179 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1182 def _ExpandInstanceName(cfg, name):
1183 """Wrapper over L{_ExpandItemName} for instance."""
1184 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1187 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1188 minmem, maxmem, vcpus, nics, disk_template, disks,
1189 bep, hvp, hypervisor_name, tags):
1190 """Builds instance related env variables for hooks
1192 This builds the hook environment from individual variables.
1195 @param name: the name of the instance
1196 @type primary_node: string
1197 @param primary_node: the name of the instance's primary node
1198 @type secondary_nodes: list
1199 @param secondary_nodes: list of secondary nodes as strings
1200 @type os_type: string
1201 @param os_type: the name of the instance's OS
1202 @type status: string
1203 @param status: the desired status of the instance
1204 @type minmem: string
1205 @param minmem: the minimum memory size of the instance
1206 @type maxmem: string
1207 @param maxmem: the maximum memory size of the instance
1209 @param vcpus: the count of VCPUs the instance has
1211 @param nics: list of tuples (ip, mac, mode, link) representing
1212 the NICs the instance has
1213 @type disk_template: string
1214 @param disk_template: the disk template of the instance
1216 @param disks: the list of (size, mode) pairs
1218 @param bep: the backend parameters for the instance
1220 @param hvp: the hypervisor parameters for the instance
1221 @type hypervisor_name: string
1222 @param hypervisor_name: the hypervisor for the instance
1224 @param tags: list of instance tags as strings
1226 @return: the hook environment for this instance
1231 "INSTANCE_NAME": name,
1232 "INSTANCE_PRIMARY": primary_node,
1233 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1234 "INSTANCE_OS_TYPE": os_type,
1235 "INSTANCE_STATUS": status,
1236 "INSTANCE_MINMEM": minmem,
1237 "INSTANCE_MAXMEM": maxmem,
1238 # TODO(2.7) remove deprecated "memory" value
1239 "INSTANCE_MEMORY": maxmem,
1240 "INSTANCE_VCPUS": vcpus,
1241 "INSTANCE_DISK_TEMPLATE": disk_template,
1242 "INSTANCE_HYPERVISOR": hypervisor_name,
1245 nic_count = len(nics)
1246 for idx, (ip, mac, mode, link) in enumerate(nics):
1249 env["INSTANCE_NIC%d_IP" % idx] = ip
1250 env["INSTANCE_NIC%d_MAC" % idx] = mac
1251 env["INSTANCE_NIC%d_MODE" % idx] = mode
1252 env["INSTANCE_NIC%d_LINK" % idx] = link
1253 if mode == constants.NIC_MODE_BRIDGED:
1254 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1258 env["INSTANCE_NIC_COUNT"] = nic_count
1261 disk_count = len(disks)
1262 for idx, (size, mode) in enumerate(disks):
1263 env["INSTANCE_DISK%d_SIZE" % idx] = size
1264 env["INSTANCE_DISK%d_MODE" % idx] = mode
1268 env["INSTANCE_DISK_COUNT"] = disk_count
1273 env["INSTANCE_TAGS"] = " ".join(tags)
1275 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1276 for key, value in source.items():
1277 env["INSTANCE_%s_%s" % (kind, key)] = value
1282 def _NICListToTuple(lu, nics):
1283 """Build a list of nic information tuples.
1285 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1286 value in LUInstanceQueryData.
1288 @type lu: L{LogicalUnit}
1289 @param lu: the logical unit on whose behalf we execute
1290 @type nics: list of L{objects.NIC}
1291 @param nics: list of nics to convert to hooks tuples
1295 cluster = lu.cfg.GetClusterInfo()
1299 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1300 mode = filled_params[constants.NIC_MODE]
1301 link = filled_params[constants.NIC_LINK]
1302 hooks_nics.append((ip, mac, mode, link))
1306 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1307 """Builds instance related env variables for hooks from an object.
1309 @type lu: L{LogicalUnit}
1310 @param lu: the logical unit on whose behalf we execute
1311 @type instance: L{objects.Instance}
1312 @param instance: the instance for which we should build the
1314 @type override: dict
1315 @param override: dictionary with key/values that will override
1318 @return: the hook environment dictionary
1321 cluster = lu.cfg.GetClusterInfo()
1322 bep = cluster.FillBE(instance)
1323 hvp = cluster.FillHV(instance)
1325 "name": instance.name,
1326 "primary_node": instance.primary_node,
1327 "secondary_nodes": instance.secondary_nodes,
1328 "os_type": instance.os,
1329 "status": instance.admin_state,
1330 "maxmem": bep[constants.BE_MAXMEM],
1331 "minmem": bep[constants.BE_MINMEM],
1332 "vcpus": bep[constants.BE_VCPUS],
1333 "nics": _NICListToTuple(lu, instance.nics),
1334 "disk_template": instance.disk_template,
1335 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1338 "hypervisor_name": instance.hypervisor,
1339 "tags": instance.tags,
1342 args.update(override)
1343 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1346 def _AdjustCandidatePool(lu, exceptions):
1347 """Adjust the candidate pool after node operations.
1350 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1352 lu.LogInfo("Promoted nodes to master candidate role: %s",
1353 utils.CommaJoin(node.name for node in mod_list))
1354 for name in mod_list:
1355 lu.context.ReaddNode(name)
1356 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1358 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1362 def _DecideSelfPromotion(lu, exceptions=None):
1363 """Decide whether I should promote myself as a master candidate.
1366 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1367 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1368 # the new node will increase mc_max with one, so:
1369 mc_should = min(mc_should + 1, cp_size)
1370 return mc_now < mc_should
1373 def _CalculateGroupIPolicy(cluster, group):
1374 """Calculate instance policy for group.
1377 return cluster.SimpleFillIPolicy(group.ipolicy)
1380 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1381 """Check that the brigdes needed by a list of nics exist.
1384 cluster = lu.cfg.GetClusterInfo()
1385 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1386 brlist = [params[constants.NIC_LINK] for params in paramslist
1387 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1389 result = lu.rpc.call_bridges_exist(target_node, brlist)
1390 result.Raise("Error checking bridges on destination node '%s'" %
1391 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1394 def _CheckInstanceBridgesExist(lu, instance, node=None):
1395 """Check that the brigdes needed by an instance exist.
1399 node = instance.primary_node
1400 _CheckNicsBridgesExist(lu, instance.nics, node)
1403 def _CheckOSVariant(os_obj, name):
1404 """Check whether an OS name conforms to the os variants specification.
1406 @type os_obj: L{objects.OS}
1407 @param os_obj: OS object to check
1409 @param name: OS name passed by the user, to check for validity
1412 variant = objects.OS.GetVariant(name)
1413 if not os_obj.supported_variants:
1415 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1416 " passed)" % (os_obj.name, variant),
1420 raise errors.OpPrereqError("OS name must include a variant",
1423 if variant not in os_obj.supported_variants:
1424 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1427 def _GetNodeInstancesInner(cfg, fn):
1428 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1431 def _GetNodeInstances(cfg, node_name):
1432 """Returns a list of all primary and secondary instances on a node.
1436 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1439 def _GetNodePrimaryInstances(cfg, node_name):
1440 """Returns primary instances on a node.
1443 return _GetNodeInstancesInner(cfg,
1444 lambda inst: node_name == inst.primary_node)
1447 def _GetNodeSecondaryInstances(cfg, node_name):
1448 """Returns secondary instances on a node.
1451 return _GetNodeInstancesInner(cfg,
1452 lambda inst: node_name in inst.secondary_nodes)
1455 def _GetStorageTypeArgs(cfg, storage_type):
1456 """Returns the arguments for a storage type.
1459 # Special case for file storage
1460 if storage_type == constants.ST_FILE:
1461 # storage.FileStorage wants a list of storage directories
1462 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1467 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1470 for dev in instance.disks:
1471 cfg.SetDiskID(dev, node_name)
1473 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1474 result.Raise("Failed to get disk status from node %s" % node_name,
1475 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1477 for idx, bdev_status in enumerate(result.payload):
1478 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1484 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1485 """Check the sanity of iallocator and node arguments and use the
1486 cluster-wide iallocator if appropriate.
1488 Check that at most one of (iallocator, node) is specified. If none is
1489 specified, then the LU's opcode's iallocator slot is filled with the
1490 cluster-wide default iallocator.
1492 @type iallocator_slot: string
1493 @param iallocator_slot: the name of the opcode iallocator slot
1494 @type node_slot: string
1495 @param node_slot: the name of the opcode target node slot
1498 node = getattr(lu.op, node_slot, None)
1499 iallocator = getattr(lu.op, iallocator_slot, None)
1501 if node is not None and iallocator is not None:
1502 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1504 elif node is None and iallocator is None:
1505 default_iallocator = lu.cfg.GetDefaultIAllocator()
1506 if default_iallocator:
1507 setattr(lu.op, iallocator_slot, default_iallocator)
1509 raise errors.OpPrereqError("No iallocator or node given and no"
1510 " cluster-wide default iallocator found;"
1511 " please specify either an iallocator or a"
1512 " node, or set a cluster-wide default"
1516 def _GetDefaultIAllocator(cfg, iallocator):
1517 """Decides on which iallocator to use.
1519 @type cfg: L{config.ConfigWriter}
1520 @param cfg: Cluster configuration object
1521 @type iallocator: string or None
1522 @param iallocator: Iallocator specified in opcode
1524 @return: Iallocator name
1528 # Use default iallocator
1529 iallocator = cfg.GetDefaultIAllocator()
1532 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1533 " opcode nor as a cluster-wide default",
1539 class LUClusterPostInit(LogicalUnit):
1540 """Logical unit for running hooks after cluster initialization.
1543 HPATH = "cluster-init"
1544 HTYPE = constants.HTYPE_CLUSTER
1546 def BuildHooksEnv(self):
1551 "OP_TARGET": self.cfg.GetClusterName(),
1554 def BuildHooksNodes(self):
1555 """Build hooks nodes.
1558 return ([], [self.cfg.GetMasterNode()])
1560 def Exec(self, feedback_fn):
1567 class LUClusterDestroy(LogicalUnit):
1568 """Logical unit for destroying the cluster.
1571 HPATH = "cluster-destroy"
1572 HTYPE = constants.HTYPE_CLUSTER
1574 def BuildHooksEnv(self):
1579 "OP_TARGET": self.cfg.GetClusterName(),
1582 def BuildHooksNodes(self):
1583 """Build hooks nodes.
1588 def CheckPrereq(self):
1589 """Check prerequisites.
1591 This checks whether the cluster is empty.
1593 Any errors are signaled by raising errors.OpPrereqError.
1596 master = self.cfg.GetMasterNode()
1598 nodelist = self.cfg.GetNodeList()
1599 if len(nodelist) != 1 or nodelist[0] != master:
1600 raise errors.OpPrereqError("There are still %d node(s) in"
1601 " this cluster." % (len(nodelist) - 1),
1603 instancelist = self.cfg.GetInstanceList()
1605 raise errors.OpPrereqError("There are still %d instance(s) in"
1606 " this cluster." % len(instancelist),
1609 def Exec(self, feedback_fn):
1610 """Destroys the cluster.
1613 master_params = self.cfg.GetMasterNetworkParameters()
1615 # Run post hooks on master node before it's removed
1616 _RunPostHook(self, master_params.name)
1618 ems = self.cfg.GetUseExternalMipScript()
1619 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1622 self.LogWarning("Error disabling the master IP address: %s",
1625 return master_params.name
1628 def _VerifyCertificate(filename):
1629 """Verifies a certificate for L{LUClusterVerifyConfig}.
1631 @type filename: string
1632 @param filename: Path to PEM file
1636 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1637 utils.ReadFile(filename))
1638 except Exception, err: # pylint: disable=W0703
1639 return (LUClusterVerifyConfig.ETYPE_ERROR,
1640 "Failed to load X509 certificate %s: %s" % (filename, err))
1643 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1644 constants.SSL_CERT_EXPIRATION_ERROR)
1647 fnamemsg = "While verifying %s: %s" % (filename, msg)
1652 return (None, fnamemsg)
1653 elif errcode == utils.CERT_WARNING:
1654 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1655 elif errcode == utils.CERT_ERROR:
1656 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1658 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1661 def _GetAllHypervisorParameters(cluster, instances):
1662 """Compute the set of all hypervisor parameters.
1664 @type cluster: L{objects.Cluster}
1665 @param cluster: the cluster object
1666 @param instances: list of L{objects.Instance}
1667 @param instances: additional instances from which to obtain parameters
1668 @rtype: list of (origin, hypervisor, parameters)
1669 @return: a list with all parameters found, indicating the hypervisor they
1670 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1675 for hv_name in cluster.enabled_hypervisors:
1676 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1678 for os_name, os_hvp in cluster.os_hvp.items():
1679 for hv_name, hv_params in os_hvp.items():
1681 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1682 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1684 # TODO: collapse identical parameter values in a single one
1685 for instance in instances:
1686 if instance.hvparams:
1687 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1688 cluster.FillHV(instance)))
1693 class _VerifyErrors(object):
1694 """Mix-in for cluster/group verify LUs.
1696 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1697 self.op and self._feedback_fn to be available.)
1701 ETYPE_FIELD = "code"
1702 ETYPE_ERROR = "ERROR"
1703 ETYPE_WARNING = "WARNING"
1705 def _Error(self, ecode, item, msg, *args, **kwargs):
1706 """Format an error message.
1708 Based on the opcode's error_codes parameter, either format a
1709 parseable error code, or a simpler error string.
1711 This must be called only from Exec and functions called from Exec.
1714 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1715 itype, etxt, _ = ecode
1716 # first complete the msg
1719 # then format the whole message
1720 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1721 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1727 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1728 # and finally report it via the feedback_fn
1729 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1731 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1732 """Log an error message if the passed condition is True.
1736 or self.op.debug_simulate_errors) # pylint: disable=E1101
1738 # If the error code is in the list of ignored errors, demote the error to a
1740 (_, etxt, _) = ecode
1741 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1742 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1745 self._Error(ecode, *args, **kwargs)
1747 # do not mark the operation as failed for WARN cases only
1748 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1749 self.bad = self.bad or cond
1752 class LUClusterVerify(NoHooksLU):
1753 """Submits all jobs necessary to verify the cluster.
1758 def ExpandNames(self):
1759 self.needed_locks = {}
1761 def Exec(self, feedback_fn):
1764 if self.op.group_name:
1765 groups = [self.op.group_name]
1766 depends_fn = lambda: None
1768 groups = self.cfg.GetNodeGroupList()
1770 # Verify global configuration
1772 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1775 # Always depend on global verification
1776 depends_fn = lambda: [(-len(jobs), [])]
1778 jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1779 ignore_errors=self.op.ignore_errors,
1780 depends=depends_fn())]
1781 for group in groups)
1783 # Fix up all parameters
1784 for op in itertools.chain(*jobs): # pylint: disable=W0142
1785 op.debug_simulate_errors = self.op.debug_simulate_errors
1786 op.verbose = self.op.verbose
1787 op.error_codes = self.op.error_codes
1789 op.skip_checks = self.op.skip_checks
1790 except AttributeError:
1791 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1793 return ResultWithJobs(jobs)
1796 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1797 """Verifies the cluster config.
1802 def _VerifyHVP(self, hvp_data):
1803 """Verifies locally the syntax of the hypervisor parameters.
1806 for item, hv_name, hv_params in hvp_data:
1807 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1810 hv_class = hypervisor.GetHypervisor(hv_name)
1811 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1812 hv_class.CheckParameterSyntax(hv_params)
1813 except errors.GenericError, err:
1814 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1816 def ExpandNames(self):
1817 # Information can be safely retrieved as the BGL is acquired in exclusive
1819 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1820 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1821 self.all_node_info = self.cfg.GetAllNodesInfo()
1822 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1823 self.needed_locks = {}
1825 def Exec(self, feedback_fn):
1826 """Verify integrity of cluster, performing various test on nodes.
1830 self._feedback_fn = feedback_fn
1832 feedback_fn("* Verifying cluster config")
1834 for msg in self.cfg.VerifyConfig():
1835 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1837 feedback_fn("* Verifying cluster certificate files")
1839 for cert_filename in constants.ALL_CERT_FILES:
1840 (errcode, msg) = _VerifyCertificate(cert_filename)
1841 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1843 feedback_fn("* Verifying hypervisor parameters")
1845 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1846 self.all_inst_info.values()))
1848 feedback_fn("* Verifying all nodes belong to an existing group")
1850 # We do this verification here because, should this bogus circumstance
1851 # occur, it would never be caught by VerifyGroup, which only acts on
1852 # nodes/instances reachable from existing node groups.
1854 dangling_nodes = set(node.name for node in self.all_node_info.values()
1855 if node.group not in self.all_group_info)
1857 dangling_instances = {}
1858 no_node_instances = []
1860 for inst in self.all_inst_info.values():
1861 if inst.primary_node in dangling_nodes:
1862 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1863 elif inst.primary_node not in self.all_node_info:
1864 no_node_instances.append(inst.name)
1869 utils.CommaJoin(dangling_instances.get(node.name,
1871 for node in dangling_nodes]
1873 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1875 "the following nodes (and their instances) belong to a non"
1876 " existing group: %s", utils.CommaJoin(pretty_dangling))
1878 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1880 "the following instances have a non-existing primary-node:"
1881 " %s", utils.CommaJoin(no_node_instances))
1886 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1887 """Verifies the status of a node group.
1890 HPATH = "cluster-verify"
1891 HTYPE = constants.HTYPE_CLUSTER
1894 _HOOKS_INDENT_RE = re.compile("^", re.M)
1896 class NodeImage(object):
1897 """A class representing the logical and physical status of a node.
1900 @ivar name: the node name to which this object refers
1901 @ivar volumes: a structure as returned from
1902 L{ganeti.backend.GetVolumeList} (runtime)
1903 @ivar instances: a list of running instances (runtime)
1904 @ivar pinst: list of configured primary instances (config)
1905 @ivar sinst: list of configured secondary instances (config)
1906 @ivar sbp: dictionary of {primary-node: list of instances} for all
1907 instances for which this node is secondary (config)
1908 @ivar mfree: free memory, as reported by hypervisor (runtime)
1909 @ivar dfree: free disk, as reported by the node (runtime)
1910 @ivar offline: the offline status (config)
1911 @type rpc_fail: boolean
1912 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1913 not whether the individual keys were correct) (runtime)
1914 @type lvm_fail: boolean
1915 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1916 @type hyp_fail: boolean
1917 @ivar hyp_fail: whether the RPC call didn't return the instance list
1918 @type ghost: boolean
1919 @ivar ghost: whether this is a known node or not (config)
1920 @type os_fail: boolean
1921 @ivar os_fail: whether the RPC call didn't return valid OS data
1923 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1924 @type vm_capable: boolean
1925 @ivar vm_capable: whether the node can host instances
1928 def __init__(self, offline=False, name=None, vm_capable=True):
1937 self.offline = offline
1938 self.vm_capable = vm_capable
1939 self.rpc_fail = False
1940 self.lvm_fail = False
1941 self.hyp_fail = False
1943 self.os_fail = False
1946 def ExpandNames(self):
1947 # This raises errors.OpPrereqError on its own:
1948 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1950 # Get instances in node group; this is unsafe and needs verification later
1951 inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1953 self.needed_locks = {
1954 locking.LEVEL_INSTANCE: inst_names,
1955 locking.LEVEL_NODEGROUP: [self.group_uuid],
1956 locking.LEVEL_NODE: [],
1959 self.share_locks = _ShareAll()
1961 def DeclareLocks(self, level):
1962 if level == locking.LEVEL_NODE:
1963 # Get members of node group; this is unsafe and needs verification later
1964 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1966 all_inst_info = self.cfg.GetAllInstancesInfo()
1968 # In Exec(), we warn about mirrored instances that have primary and
1969 # secondary living in separate node groups. To fully verify that
1970 # volumes for these instances are healthy, we will need to do an
1971 # extra call to their secondaries. We ensure here those nodes will
1973 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1974 # Important: access only the instances whose lock is owned
1975 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1976 nodes.update(all_inst_info[inst].secondary_nodes)
1978 self.needed_locks[locking.LEVEL_NODE] = nodes
1980 def CheckPrereq(self):
1981 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1982 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1984 group_nodes = set(self.group_info.members)
1985 group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1988 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1990 unlocked_instances = \
1991 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1994 raise errors.OpPrereqError("Missing lock for nodes: %s" %
1995 utils.CommaJoin(unlocked_nodes))
1997 if unlocked_instances:
1998 raise errors.OpPrereqError("Missing lock for instances: %s" %
1999 utils.CommaJoin(unlocked_instances))
2001 self.all_node_info = self.cfg.GetAllNodesInfo()
2002 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2004 self.my_node_names = utils.NiceSort(group_nodes)
2005 self.my_inst_names = utils.NiceSort(group_instances)
2007 self.my_node_info = dict((name, self.all_node_info[name])
2008 for name in self.my_node_names)
2010 self.my_inst_info = dict((name, self.all_inst_info[name])
2011 for name in self.my_inst_names)
2013 # We detect here the nodes that will need the extra RPC calls for verifying
2014 # split LV volumes; they should be locked.
2015 extra_lv_nodes = set()
2017 for inst in self.my_inst_info.values():
2018 if inst.disk_template in constants.DTS_INT_MIRROR:
2019 group = self.my_node_info[inst.primary_node].group
2020 for nname in inst.secondary_nodes:
2021 if self.all_node_info[nname].group != group:
2022 extra_lv_nodes.add(nname)
2024 unlocked_lv_nodes = \
2025 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2027 if unlocked_lv_nodes:
2028 raise errors.OpPrereqError("these nodes could be locked: %s" %
2029 utils.CommaJoin(unlocked_lv_nodes))
2030 self.extra_lv_nodes = list(extra_lv_nodes)
2032 def _VerifyNode(self, ninfo, nresult):
2033 """Perform some basic validation on data returned from a node.
2035 - check the result data structure is well formed and has all the
2037 - check ganeti version
2039 @type ninfo: L{objects.Node}
2040 @param ninfo: the node to check
2041 @param nresult: the results from the node
2043 @return: whether overall this call was successful (and we can expect
2044 reasonable values in the respose)
2048 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2050 # main result, nresult should be a non-empty dict
2051 test = not nresult or not isinstance(nresult, dict)
2052 _ErrorIf(test, constants.CV_ENODERPC, node,
2053 "unable to verify node: no data returned")
2057 # compares ganeti version
2058 local_version = constants.PROTOCOL_VERSION
2059 remote_version = nresult.get("version", None)
2060 test = not (remote_version and
2061 isinstance(remote_version, (list, tuple)) and
2062 len(remote_version) == 2)
2063 _ErrorIf(test, constants.CV_ENODERPC, node,
2064 "connection to node returned invalid data")
2068 test = local_version != remote_version[0]
2069 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2070 "incompatible protocol versions: master %s,"
2071 " node %s", local_version, remote_version[0])
2075 # node seems compatible, we can actually try to look into its results
2077 # full package version
2078 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2079 constants.CV_ENODEVERSION, node,
2080 "software version mismatch: master %s, node %s",
2081 constants.RELEASE_VERSION, remote_version[1],
2082 code=self.ETYPE_WARNING)
2084 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2085 if ninfo.vm_capable and isinstance(hyp_result, dict):
2086 for hv_name, hv_result in hyp_result.iteritems():
2087 test = hv_result is not None
2088 _ErrorIf(test, constants.CV_ENODEHV, node,
2089 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2091 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2092 if ninfo.vm_capable and isinstance(hvp_result, list):
2093 for item, hv_name, hv_result in hvp_result:
2094 _ErrorIf(True, constants.CV_ENODEHV, node,
2095 "hypervisor %s parameter verify failure (source %s): %s",
2096 hv_name, item, hv_result)
2098 test = nresult.get(constants.NV_NODESETUP,
2099 ["Missing NODESETUP results"])
2100 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2105 def _VerifyNodeTime(self, ninfo, nresult,
2106 nvinfo_starttime, nvinfo_endtime):
2107 """Check the node time.
2109 @type ninfo: L{objects.Node}
2110 @param ninfo: the node to check
2111 @param nresult: the remote results for the node
2112 @param nvinfo_starttime: the start time of the RPC call
2113 @param nvinfo_endtime: the end time of the RPC call
2117 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2119 ntime = nresult.get(constants.NV_TIME, None)
2121 ntime_merged = utils.MergeTime(ntime)
2122 except (ValueError, TypeError):
2123 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2126 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2127 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2128 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2129 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2133 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2134 "Node time diverges by at least %s from master node time",
2137 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2138 """Check the node LVM results.
2140 @type ninfo: L{objects.Node}
2141 @param ninfo: the node to check
2142 @param nresult: the remote results for the node
2143 @param vg_name: the configured VG name
2150 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2152 # checks vg existence and size > 20G
2153 vglist = nresult.get(constants.NV_VGLIST, None)
2155 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2157 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2158 constants.MIN_VG_SIZE)
2159 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2162 pvlist = nresult.get(constants.NV_PVLIST, None)
2163 test = pvlist is None
2164 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2166 # check that ':' is not present in PV names, since it's a
2167 # special character for lvcreate (denotes the range of PEs to
2169 for _, pvname, owner_vg in pvlist:
2170 test = ":" in pvname
2171 _ErrorIf(test, constants.CV_ENODELVM, node,
2172 "Invalid character ':' in PV '%s' of VG '%s'",
2175 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2176 """Check the node bridges.
2178 @type ninfo: L{objects.Node}
2179 @param ninfo: the node to check
2180 @param nresult: the remote results for the node
2181 @param bridges: the expected list of bridges
2188 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2190 missing = nresult.get(constants.NV_BRIDGES, None)
2191 test = not isinstance(missing, list)
2192 _ErrorIf(test, constants.CV_ENODENET, node,
2193 "did not return valid bridge information")
2195 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2196 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2198 def _VerifyNodeUserScripts(self, ninfo, nresult):
2199 """Check the results of user scripts presence and executability on the node
2201 @type ninfo: L{objects.Node}
2202 @param ninfo: the node to check
2203 @param nresult: the remote results for the node
2208 test = not constants.NV_USERSCRIPTS in nresult
2209 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2210 "did not return user scripts information")
2212 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2214 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2215 "user scripts not present or not executable: %s" %
2216 utils.CommaJoin(sorted(broken_scripts)))
2218 def _VerifyNodeNetwork(self, ninfo, nresult):
2219 """Check the node network connectivity results.
2221 @type ninfo: L{objects.Node}
2222 @param ninfo: the node to check
2223 @param nresult: the remote results for the node
2227 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2229 test = constants.NV_NODELIST not in nresult
2230 _ErrorIf(test, constants.CV_ENODESSH, node,
2231 "node hasn't returned node ssh connectivity data")
2233 if nresult[constants.NV_NODELIST]:
2234 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2235 _ErrorIf(True, constants.CV_ENODESSH, node,
2236 "ssh communication with node '%s': %s", a_node, a_msg)
2238 test = constants.NV_NODENETTEST not in nresult
2239 _ErrorIf(test, constants.CV_ENODENET, node,
2240 "node hasn't returned node tcp connectivity data")
2242 if nresult[constants.NV_NODENETTEST]:
2243 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2245 _ErrorIf(True, constants.CV_ENODENET, node,
2246 "tcp communication with node '%s': %s",
2247 anode, nresult[constants.NV_NODENETTEST][anode])
2249 test = constants.NV_MASTERIP not in nresult
2250 _ErrorIf(test, constants.CV_ENODENET, node,
2251 "node hasn't returned node master IP reachability data")
2253 if not nresult[constants.NV_MASTERIP]:
2254 if node == self.master_node:
2255 msg = "the master node cannot reach the master IP (not configured?)"
2257 msg = "cannot reach the master IP"
2258 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2260 def _VerifyInstancePolicy(self, instance):
2261 """Verify instance specs against instance policy set on node group level.
2265 cluster = self.cfg.GetClusterInfo()
2266 full_beparams = cluster.FillBE(instance)
2267 ipolicy = cluster.SimpleFillIPolicy(self.group_info.ipolicy)
2269 mem_size = full_beparams.get(constants.BE_MAXMEM, None)
2270 cpu_count = full_beparams.get(constants.BE_VCPUS, None)
2271 disk_count = len(instance.disks)
2272 disk_sizes = [disk.size for disk in instance.disks]
2273 nic_count = len(instance.nics)
2276 (constants.ISPEC_MEM_SIZE, mem_size),
2277 (constants.ISPEC_CPU_COUNT, cpu_count),
2278 (constants.ISPEC_DISK_COUNT, disk_count),
2279 (constants.ISPEC_NIC_COUNT, nic_count),
2280 ] + map((lambda d: (constants.ISPEC_DISK_SIZE, d)), disk_sizes)
2282 for (name, value) in test_settings:
2283 test_result = _CheckMinMaxSpecs(name, ipolicy, value)
2284 self._ErrorIf(test_result is not None,
2285 constants.CV_EINSTANCEPOLICY, instance.name,
2288 def _VerifyInstance(self, instance, instanceconfig, node_image,
2290 """Verify an instance.
2292 This function checks to see if the required block devices are
2293 available on the instance's node.
2296 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2297 node_current = instanceconfig.primary_node
2299 node_vol_should = {}
2300 instanceconfig.MapLVsByNode(node_vol_should)
2302 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), self.group_info)
2303 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2304 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, err)
2306 for node in node_vol_should:
2307 n_img = node_image[node]
2308 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2309 # ignore missing volumes on offline or broken nodes
2311 for volume in node_vol_should[node]:
2312 test = volume not in n_img.volumes
2313 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2314 "volume %s missing on node %s", volume, node)
2316 if instanceconfig.admin_state == constants.ADMINST_UP:
2317 pri_img = node_image[node_current]
2318 test = instance not in pri_img.instances and not pri_img.offline
2319 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2320 "instance not running on its primary node %s",
2323 diskdata = [(nname, success, status, idx)
2324 for (nname, disks) in diskstatus.items()
2325 for idx, (success, status) in enumerate(disks)]
2327 for nname, success, bdev_status, idx in diskdata:
2328 # the 'ghost node' construction in Exec() ensures that we have a
2330 snode = node_image[nname]
2331 bad_snode = snode.ghost or snode.offline
2332 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2333 not success and not bad_snode,
2334 constants.CV_EINSTANCEFAULTYDISK, instance,
2335 "couldn't retrieve status for disk/%s on %s: %s",
2336 idx, nname, bdev_status)
2337 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2338 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2339 constants.CV_EINSTANCEFAULTYDISK, instance,
2340 "disk/%s on %s is faulty", idx, nname)
2342 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2343 """Verify if there are any unknown volumes in the cluster.
2345 The .os, .swap and backup volumes are ignored. All other volumes are
2346 reported as unknown.
2348 @type reserved: L{ganeti.utils.FieldSet}
2349 @param reserved: a FieldSet of reserved volume names
2352 for node, n_img in node_image.items():
2353 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2354 # skip non-healthy nodes
2356 for volume in n_img.volumes:
2357 test = ((node not in node_vol_should or
2358 volume not in node_vol_should[node]) and
2359 not reserved.Matches(volume))
2360 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2361 "volume %s is unknown", volume)
2363 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2364 """Verify N+1 Memory Resilience.
2366 Check that if one single node dies we can still start all the
2367 instances it was primary for.
2370 cluster_info = self.cfg.GetClusterInfo()
2371 for node, n_img in node_image.items():
2372 # This code checks that every node which is now listed as
2373 # secondary has enough memory to host all instances it is
2374 # supposed to should a single other node in the cluster fail.
2375 # FIXME: not ready for failover to an arbitrary node
2376 # FIXME: does not support file-backed instances
2377 # WARNING: we currently take into account down instances as well
2378 # as up ones, considering that even if they're down someone
2379 # might want to start them even in the event of a node failure.
2381 # we're skipping offline nodes from the N+1 warning, since
2382 # most likely we don't have good memory infromation from them;
2383 # we already list instances living on such nodes, and that's
2386 #TODO(dynmem): use MINMEM for checking
2387 #TODO(dynmem): also consider ballooning out other instances
2388 for prinode, instances in n_img.sbp.items():
2390 for instance in instances:
2391 bep = cluster_info.FillBE(instance_cfg[instance])
2392 if bep[constants.BE_AUTO_BALANCE]:
2393 needed_mem += bep[constants.BE_MAXMEM]
2394 test = n_img.mfree < needed_mem
2395 self._ErrorIf(test, constants.CV_ENODEN1, node,
2396 "not enough memory to accomodate instance failovers"
2397 " should node %s fail (%dMiB needed, %dMiB available)",
2398 prinode, needed_mem, n_img.mfree)
2401 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2402 (files_all, files_opt, files_mc, files_vm)):
2403 """Verifies file checksums collected from all nodes.
2405 @param errorif: Callback for reporting errors
2406 @param nodeinfo: List of L{objects.Node} objects
2407 @param master_node: Name of master node
2408 @param all_nvinfo: RPC results
2411 # Define functions determining which nodes to consider for a file
2414 (files_mc, lambda node: (node.master_candidate or
2415 node.name == master_node)),
2416 (files_vm, lambda node: node.vm_capable),
2419 # Build mapping from filename to list of nodes which should have the file
2421 for (files, fn) in files2nodefn:
2423 filenodes = nodeinfo
2425 filenodes = filter(fn, nodeinfo)
2426 nodefiles.update((filename,
2427 frozenset(map(operator.attrgetter("name"), filenodes)))
2428 for filename in files)
2430 assert set(nodefiles) == (files_all | files_mc | files_vm)
2432 fileinfo = dict((filename, {}) for filename in nodefiles)
2433 ignore_nodes = set()
2435 for node in nodeinfo:
2437 ignore_nodes.add(node.name)
2440 nresult = all_nvinfo[node.name]
2442 if nresult.fail_msg or not nresult.payload:
2445 node_files = nresult.payload.get(constants.NV_FILELIST, None)
2447 test = not (node_files and isinstance(node_files, dict))
2448 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2449 "Node did not return file checksum data")
2451 ignore_nodes.add(node.name)
2454 # Build per-checksum mapping from filename to nodes having it
2455 for (filename, checksum) in node_files.items():
2456 assert filename in nodefiles
2457 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2459 for (filename, checksums) in fileinfo.items():
2460 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2462 # Nodes having the file
2463 with_file = frozenset(node_name
2464 for nodes in fileinfo[filename].values()
2465 for node_name in nodes) - ignore_nodes
2467 expected_nodes = nodefiles[filename] - ignore_nodes
2469 # Nodes missing file
2470 missing_file = expected_nodes - with_file
2472 if filename in files_opt:
2474 errorif(missing_file and missing_file != expected_nodes,
2475 constants.CV_ECLUSTERFILECHECK, None,
2476 "File %s is optional, but it must exist on all or no"
2477 " nodes (not found on %s)",
2478 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2480 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2481 "File %s is missing from node(s) %s", filename,
2482 utils.CommaJoin(utils.NiceSort(missing_file)))
2484 # Warn if a node has a file it shouldn't
2485 unexpected = with_file - expected_nodes
2487 constants.CV_ECLUSTERFILECHECK, None,
2488 "File %s should not exist on node(s) %s",
2489 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2491 # See if there are multiple versions of the file
2492 test = len(checksums) > 1
2494 variants = ["variant %s on %s" %
2495 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2496 for (idx, (checksum, nodes)) in
2497 enumerate(sorted(checksums.items()))]
2501 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2502 "File %s found with %s different checksums (%s)",
2503 filename, len(checksums), "; ".join(variants))
2505 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2507 """Verifies and the node DRBD status.
2509 @type ninfo: L{objects.Node}
2510 @param ninfo: the node to check
2511 @param nresult: the remote results for the node
2512 @param instanceinfo: the dict of instances
2513 @param drbd_helper: the configured DRBD usermode helper
2514 @param drbd_map: the DRBD map as returned by
2515 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2519 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2522 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2523 test = (helper_result == None)
2524 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2525 "no drbd usermode helper returned")
2527 status, payload = helper_result
2529 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2530 "drbd usermode helper check unsuccessful: %s", payload)
2531 test = status and (payload != drbd_helper)
2532 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2533 "wrong drbd usermode helper: %s", payload)
2535 # compute the DRBD minors
2537 for minor, instance in drbd_map[node].items():
2538 test = instance not in instanceinfo
2539 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2540 "ghost instance '%s' in temporary DRBD map", instance)
2541 # ghost instance should not be running, but otherwise we
2542 # don't give double warnings (both ghost instance and
2543 # unallocated minor in use)
2545 node_drbd[minor] = (instance, False)
2547 instance = instanceinfo[instance]
2548 node_drbd[minor] = (instance.name,
2549 instance.admin_state == constants.ADMINST_UP)
2551 # and now check them
2552 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2553 test = not isinstance(used_minors, (tuple, list))
2554 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2555 "cannot parse drbd status file: %s", str(used_minors))
2557 # we cannot check drbd status
2560 for minor, (iname, must_exist) in node_drbd.items():
2561 test = minor not in used_minors and must_exist
2562 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2563 "drbd minor %d of instance %s is not active", minor, iname)
2564 for minor in used_minors:
2565 test = minor not in node_drbd
2566 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2567 "unallocated drbd minor %d is in use", minor)
2569 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2570 """Builds the node OS structures.
2572 @type ninfo: L{objects.Node}
2573 @param ninfo: the node to check
2574 @param nresult: the remote results for the node
2575 @param nimg: the node image object
2579 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581 remote_os = nresult.get(constants.NV_OSLIST, None)
2582 test = (not isinstance(remote_os, list) or
2583 not compat.all(isinstance(v, list) and len(v) == 7
2584 for v in remote_os))
2586 _ErrorIf(test, constants.CV_ENODEOS, node,
2587 "node hasn't returned valid OS data")
2596 for (name, os_path, status, diagnose,
2597 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2599 if name not in os_dict:
2602 # parameters is a list of lists instead of list of tuples due to
2603 # JSON lacking a real tuple type, fix it:
2604 parameters = [tuple(v) for v in parameters]
2605 os_dict[name].append((os_path, status, diagnose,
2606 set(variants), set(parameters), set(api_ver)))
2608 nimg.oslist = os_dict
2610 def _VerifyNodeOS(self, ninfo, nimg, base):
2611 """Verifies the node OS list.
2613 @type ninfo: L{objects.Node}
2614 @param ninfo: the node to check
2615 @param nimg: the node image object
2616 @param base: the 'template' node we match against (e.g. from the master)
2620 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2622 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2624 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2625 for os_name, os_data in nimg.oslist.items():
2626 assert os_data, "Empty OS status for OS %s?!" % os_name
2627 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2628 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2629 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2630 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2631 "OS '%s' has multiple entries (first one shadows the rest): %s",
2632 os_name, utils.CommaJoin([v[0] for v in os_data]))
2633 # comparisons with the 'base' image
2634 test = os_name not in base.oslist
2635 _ErrorIf(test, constants.CV_ENODEOS, node,
2636 "Extra OS %s not present on reference node (%s)",
2640 assert base.oslist[os_name], "Base node has empty OS status?"
2641 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2643 # base OS is invalid, skipping
2645 for kind, a, b in [("API version", f_api, b_api),
2646 ("variants list", f_var, b_var),
2647 ("parameters", beautify_params(f_param),
2648 beautify_params(b_param))]:
2649 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2650 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2651 kind, os_name, base.name,
2652 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2654 # check any missing OSes
2655 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2656 _ErrorIf(missing, constants.CV_ENODEOS, node,
2657 "OSes present on reference node %s but missing on this node: %s",
2658 base.name, utils.CommaJoin(missing))
2660 def _VerifyOob(self, ninfo, nresult):
2661 """Verifies out of band functionality of a node.
2663 @type ninfo: L{objects.Node}
2664 @param ninfo: the node to check
2665 @param nresult: the remote results for the node
2669 # We just have to verify the paths on master and/or master candidates
2670 # as the oob helper is invoked on the master
2671 if ((ninfo.master_candidate or ninfo.master_capable) and
2672 constants.NV_OOB_PATHS in nresult):
2673 for path_result in nresult[constants.NV_OOB_PATHS]:
2674 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2676 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2677 """Verifies and updates the node volume data.
2679 This function will update a L{NodeImage}'s internal structures
2680 with data from the remote call.
2682 @type ninfo: L{objects.Node}
2683 @param ninfo: the node to check
2684 @param nresult: the remote results for the node
2685 @param nimg: the node image object
2686 @param vg_name: the configured VG name
2690 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2692 nimg.lvm_fail = True
2693 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2696 elif isinstance(lvdata, basestring):
2697 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2698 utils.SafeEncode(lvdata))
2699 elif not isinstance(lvdata, dict):
2700 _ErrorIf(True, constants.CV_ENODELVM, node,
2701 "rpc call to node failed (lvlist)")
2703 nimg.volumes = lvdata
2704 nimg.lvm_fail = False
2706 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2707 """Verifies and updates the node instance list.
2709 If the listing was successful, then updates this node's instance
2710 list. Otherwise, it marks the RPC call as failed for the instance
2713 @type ninfo: L{objects.Node}
2714 @param ninfo: the node to check
2715 @param nresult: the remote results for the node
2716 @param nimg: the node image object
2719 idata = nresult.get(constants.NV_INSTANCELIST, None)
2720 test = not isinstance(idata, list)
2721 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2722 "rpc call to node failed (instancelist): %s",
2723 utils.SafeEncode(str(idata)))
2725 nimg.hyp_fail = True
2727 nimg.instances = idata
2729 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2730 """Verifies and computes a node information map
2732 @type ninfo: L{objects.Node}
2733 @param ninfo: the node to check
2734 @param nresult: the remote results for the node
2735 @param nimg: the node image object
2736 @param vg_name: the configured VG name
2740 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2742 # try to read free memory (from the hypervisor)
2743 hv_info = nresult.get(constants.NV_HVINFO, None)
2744 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2745 _ErrorIf(test, constants.CV_ENODEHV, node,
2746 "rpc call to node failed (hvinfo)")
2749 nimg.mfree = int(hv_info["memory_free"])
2750 except (ValueError, TypeError):
2751 _ErrorIf(True, constants.CV_ENODERPC, node,
2752 "node returned invalid nodeinfo, check hypervisor")
2754 # FIXME: devise a free space model for file based instances as well
2755 if vg_name is not None:
2756 test = (constants.NV_VGLIST not in nresult or
2757 vg_name not in nresult[constants.NV_VGLIST])
2758 _ErrorIf(test, constants.CV_ENODELVM, node,
2759 "node didn't return data for the volume group '%s'"
2760 " - it is either missing or broken", vg_name)
2763 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2764 except (ValueError, TypeError):
2765 _ErrorIf(True, constants.CV_ENODERPC, node,
2766 "node returned invalid LVM info, check LVM status")
2768 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2769 """Gets per-disk status information for all instances.
2771 @type nodelist: list of strings
2772 @param nodelist: Node names
2773 @type node_image: dict of (name, L{objects.Node})
2774 @param node_image: Node objects
2775 @type instanceinfo: dict of (name, L{objects.Instance})
2776 @param instanceinfo: Instance objects
2777 @rtype: {instance: {node: [(succes, payload)]}}
2778 @return: a dictionary of per-instance dictionaries with nodes as
2779 keys and disk information as values; the disk information is a
2780 list of tuples (success, payload)
2783 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2786 node_disks_devonly = {}
2787 diskless_instances = set()
2788 diskless = constants.DT_DISKLESS
2790 for nname in nodelist:
2791 node_instances = list(itertools.chain(node_image[nname].pinst,
2792 node_image[nname].sinst))
2793 diskless_instances.update(inst for inst in node_instances
2794 if instanceinfo[inst].disk_template == diskless)
2795 disks = [(inst, disk)
2796 for inst in node_instances
2797 for disk in instanceinfo[inst].disks]
2800 # No need to collect data
2803 node_disks[nname] = disks
2805 # Creating copies as SetDiskID below will modify the objects and that can
2806 # lead to incorrect data returned from nodes
2807 devonly = [dev.Copy() for (_, dev) in disks]
2810 self.cfg.SetDiskID(dev, nname)
2812 node_disks_devonly[nname] = devonly
2814 assert len(node_disks) == len(node_disks_devonly)
2816 # Collect data from all nodes with disks
2817 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2820 assert len(result) == len(node_disks)
2824 for (nname, nres) in result.items():
2825 disks = node_disks[nname]
2828 # No data from this node
2829 data = len(disks) * [(False, "node offline")]
2832 _ErrorIf(msg, constants.CV_ENODERPC, nname,
2833 "while getting disk information: %s", msg)
2835 # No data from this node
2836 data = len(disks) * [(False, msg)]
2839 for idx, i in enumerate(nres.payload):
2840 if isinstance(i, (tuple, list)) and len(i) == 2:
2843 logging.warning("Invalid result from node %s, entry %d: %s",
2845 data.append((False, "Invalid result from the remote node"))
2847 for ((inst, _), status) in zip(disks, data):
2848 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2850 # Add empty entries for diskless instances.
2851 for inst in diskless_instances:
2852 assert inst not in instdisk
2855 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2856 len(nnames) <= len(instanceinfo[inst].all_nodes) and
2857 compat.all(isinstance(s, (tuple, list)) and
2858 len(s) == 2 for s in statuses)
2859 for inst, nnames in instdisk.items()
2860 for nname, statuses in nnames.items())
2861 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2866 def _SshNodeSelector(group_uuid, all_nodes):
2867 """Create endless iterators for all potential SSH check hosts.
2870 nodes = [node for node in all_nodes
2871 if (node.group != group_uuid and
2873 keyfunc = operator.attrgetter("group")
2875 return map(itertools.cycle,
2876 [sorted(map(operator.attrgetter("name"), names))
2877 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2881 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2882 """Choose which nodes should talk to which other nodes.
2884 We will make nodes contact all nodes in their group, and one node from
2887 @warning: This algorithm has a known issue if one node group is much
2888 smaller than others (e.g. just one node). In such a case all other
2889 nodes will talk to the single node.
2892 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2893 sel = cls._SshNodeSelector(group_uuid, all_nodes)
2895 return (online_nodes,
2896 dict((name, sorted([i.next() for i in sel]))
2897 for name in online_nodes))
2899 def BuildHooksEnv(self):
2902 Cluster-Verify hooks just ran in the post phase and their failure makes
2903 the output be logged in the verify output and the verification to fail.
2907 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2910 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2911 for node in self.my_node_info.values())
2915 def BuildHooksNodes(self):
2916 """Build hooks nodes.
2919 return ([], self.my_node_names)
2921 def Exec(self, feedback_fn):
2922 """Verify integrity of the node group, performing various test on nodes.
2925 # This method has too many local variables. pylint: disable=R0914
2926 feedback_fn("* Verifying group '%s'" % self.group_info.name)
2928 if not self.my_node_names:
2930 feedback_fn("* Empty node group, skipping verification")
2934 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2935 verbose = self.op.verbose
2936 self._feedback_fn = feedback_fn
2938 vg_name = self.cfg.GetVGName()
2939 drbd_helper = self.cfg.GetDRBDHelper()
2940 cluster = self.cfg.GetClusterInfo()
2941 groupinfo = self.cfg.GetAllNodeGroupsInfo()
2942 hypervisors = cluster.enabled_hypervisors
2943 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2945 i_non_redundant = [] # Non redundant instances
2946 i_non_a_balanced = [] # Non auto-balanced instances
2947 i_offline = 0 # Count of offline instances
2948 n_offline = 0 # Count of offline nodes
2949 n_drained = 0 # Count of nodes being drained
2950 node_vol_should = {}
2952 # FIXME: verify OS list
2955 filemap = _ComputeAncillaryFiles(cluster, False)
2957 # do local checksums
2958 master_node = self.master_node = self.cfg.GetMasterNode()
2959 master_ip = self.cfg.GetMasterIP()
2961 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2964 if self.cfg.GetUseExternalMipScript():
2965 user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2967 node_verify_param = {
2968 constants.NV_FILELIST:
2969 utils.UniqueSequence(filename
2970 for files in filemap
2971 for filename in files),
2972 constants.NV_NODELIST:
2973 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2974 self.all_node_info.values()),
2975 constants.NV_HYPERVISOR: hypervisors,
2976 constants.NV_HVPARAMS:
2977 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2978 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2979 for node in node_data_list
2980 if not node.offline],
2981 constants.NV_INSTANCELIST: hypervisors,
2982 constants.NV_VERSION: None,
2983 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2984 constants.NV_NODESETUP: None,
2985 constants.NV_TIME: None,
2986 constants.NV_MASTERIP: (master_node, master_ip),
2987 constants.NV_OSLIST: None,
2988 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2989 constants.NV_USERSCRIPTS: user_scripts,
2992 if vg_name is not None:
2993 node_verify_param[constants.NV_VGLIST] = None
2994 node_verify_param[constants.NV_LVLIST] = vg_name
2995 node_verify_param[constants.NV_PVLIST] = [vg_name]
2996 node_verify_param[constants.NV_DRBDLIST] = None
2999 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3002 # FIXME: this needs to be changed per node-group, not cluster-wide
3004 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3005 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3006 bridges.add(default_nicpp[constants.NIC_LINK])
3007 for instance in self.my_inst_info.values():
3008 for nic in instance.nics:
3009 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3010 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3011 bridges.add(full_nic[constants.NIC_LINK])
3014 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3016 # Build our expected cluster state
3017 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3019 vm_capable=node.vm_capable))
3020 for node in node_data_list)
3024 for node in self.all_node_info.values():
3025 path = _SupportsOob(self.cfg, node)
3026 if path and path not in oob_paths:
3027 oob_paths.append(path)
3030 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3032 for instance in self.my_inst_names:
3033 inst_config = self.my_inst_info[instance]
3035 for nname in inst_config.all_nodes:
3036 if nname not in node_image:
3037 gnode = self.NodeImage(name=nname)
3038 gnode.ghost = (nname not in self.all_node_info)
3039 node_image[nname] = gnode
3041 inst_config.MapLVsByNode(node_vol_should)
3043 pnode = inst_config.primary_node
3044 node_image[pnode].pinst.append(instance)
3046 for snode in inst_config.secondary_nodes:
3047 nimg = node_image[snode]
3048 nimg.sinst.append(instance)
3049 if pnode not in nimg.sbp:
3050 nimg.sbp[pnode] = []
3051 nimg.sbp[pnode].append(instance)
3053 # At this point, we have the in-memory data structures complete,
3054 # except for the runtime information, which we'll gather next
3056 # Due to the way our RPC system works, exact response times cannot be
3057 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3058 # time before and after executing the request, we can at least have a time
3060 nvinfo_starttime = time.time()
3061 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3063 self.cfg.GetClusterName())
3064 nvinfo_endtime = time.time()
3066 if self.extra_lv_nodes and vg_name is not None:
3068 self.rpc.call_node_verify(self.extra_lv_nodes,
3069 {constants.NV_LVLIST: vg_name},
3070 self.cfg.GetClusterName())
3072 extra_lv_nvinfo = {}
3074 all_drbd_map = self.cfg.ComputeDRBDMap()
3076 feedback_fn("* Gathering disk information (%s nodes)" %
3077 len(self.my_node_names))
3078 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3081 feedback_fn("* Verifying configuration file consistency")
3083 # If not all nodes are being checked, we need to make sure the master node
3084 # and a non-checked vm_capable node are in the list.
3085 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3087 vf_nvinfo = all_nvinfo.copy()
3088 vf_node_info = list(self.my_node_info.values())
3089 additional_nodes = []
3090 if master_node not in self.my_node_info:
3091 additional_nodes.append(master_node)
3092 vf_node_info.append(self.all_node_info[master_node])
3093 # Add the first vm_capable node we find which is not included
3094 for node in absent_nodes:
3095 nodeinfo = self.all_node_info[node]
3096 if nodeinfo.vm_capable and not nodeinfo.offline:
3097 additional_nodes.append(node)
3098 vf_node_info.append(self.all_node_info[node])
3100 key = constants.NV_FILELIST
3101 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3102 {key: node_verify_param[key]},
3103 self.cfg.GetClusterName()))
3105 vf_nvinfo = all_nvinfo
3106 vf_node_info = self.my_node_info.values()
3108 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3110 feedback_fn("* Verifying node status")
3114 for node_i in node_data_list:
3116 nimg = node_image[node]
3120 feedback_fn("* Skipping offline node %s" % (node,))
3124 if node == master_node:
3126 elif node_i.master_candidate:
3127 ntype = "master candidate"
3128 elif node_i.drained:
3134 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3136 msg = all_nvinfo[node].fail_msg
3137 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3140 nimg.rpc_fail = True
3143 nresult = all_nvinfo[node].payload
3145 nimg.call_ok = self._VerifyNode(node_i, nresult)
3146 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3147 self._VerifyNodeNetwork(node_i, nresult)
3148 self._VerifyNodeUserScripts(node_i, nresult)
3149 self._VerifyOob(node_i, nresult)
3152 self._VerifyNodeLVM(node_i, nresult, vg_name)
3153 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3156 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3157 self._UpdateNodeInstances(node_i, nresult, nimg)
3158 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3159 self._UpdateNodeOS(node_i, nresult, nimg)
3161 if not nimg.os_fail:
3162 if refos_img is None:
3164 self._VerifyNodeOS(node_i, nimg, refos_img)
3165 self._VerifyNodeBridges(node_i, nresult, bridges)
3167 # Check whether all running instancies are primary for the node. (This
3168 # can no longer be done from _VerifyInstance below, since some of the
3169 # wrong instances could be from other node groups.)
3170 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3172 for inst in non_primary_inst:
3173 # FIXME: investigate best way to handle offline insts
3174 if inst.admin_state == constants.ADMINST_OFFLINE:
3176 feedback_fn("* Skipping offline instance %s" % inst.name)
3179 test = inst in self.all_inst_info
3180 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3181 "instance should not run on node %s", node_i.name)
3182 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3183 "node is running unknown instance %s", inst)
3185 for node, result in extra_lv_nvinfo.items():
3186 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3187 node_image[node], vg_name)
3189 feedback_fn("* Verifying instance status")
3190 for instance in self.my_inst_names:
3192 feedback_fn("* Verifying instance %s" % instance)
3193 inst_config = self.my_inst_info[instance]
3194 self._VerifyInstance(instance, inst_config, node_image,
3196 inst_nodes_offline = []
3198 pnode = inst_config.primary_node
3199 pnode_img = node_image[pnode]
3200 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3201 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3202 " primary node failed", instance)
3204 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3206 constants.CV_EINSTANCEBADNODE, instance,
3207 "instance is marked as running and lives on offline node %s",
3208 inst_config.primary_node)
3210 # If the instance is non-redundant we cannot survive losing its primary
3211 # node, so we are not N+1 compliant. On the other hand we have no disk
3212 # templates with more than one secondary so that situation is not well
3214 # FIXME: does not support file-backed instances
3215 if not inst_config.secondary_nodes:
3216 i_non_redundant.append(instance)
3218 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3219 constants.CV_EINSTANCELAYOUT,
3220 instance, "instance has multiple secondary nodes: %s",
3221 utils.CommaJoin(inst_config.secondary_nodes),
3222 code=self.ETYPE_WARNING)
3224 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3225 pnode = inst_config.primary_node
3226 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3227 instance_groups = {}
3229 for node in instance_nodes:
3230 instance_groups.setdefault(self.all_node_info[node].group,
3234 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3235 # Sort so that we always list the primary node first.
3236 for group, nodes in sorted(instance_groups.items(),
3237 key=lambda (_, nodes): pnode in nodes,
3240 self._ErrorIf(len(instance_groups) > 1,
3241 constants.CV_EINSTANCESPLITGROUPS,
3242 instance, "instance has primary and secondary nodes in"
3243 " different groups: %s", utils.CommaJoin(pretty_list),
3244 code=self.ETYPE_WARNING)
3246 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3247 i_non_a_balanced.append(instance)
3249 for snode in inst_config.secondary_nodes:
3250 s_img = node_image[snode]
3251 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3252 snode, "instance %s, connection to secondary node failed",
3256 inst_nodes_offline.append(snode)
3258 # warn that the instance lives on offline nodes
3259 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3260 "instance has offline secondary node(s) %s",
3261 utils.CommaJoin(inst_nodes_offline))
3262 # ... or ghost/non-vm_capable nodes
3263 for node in inst_config.all_nodes:
3264 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3265 instance, "instance lives on ghost node %s", node)
3266 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3267 instance, "instance lives on non-vm_capable node %s", node)
3269 feedback_fn("* Verifying orphan volumes")
3270 reserved = utils.FieldSet(*cluster.reserved_lvs)
3272 # We will get spurious "unknown volume" warnings if any node of this group
3273 # is secondary for an instance whose primary is in another group. To avoid
3274 # them, we find these instances and add their volumes to node_vol_should.
3275 for inst in self.all_inst_info.values():
3276 for secondary in inst.secondary_nodes:
3277 if (secondary in self.my_node_info
3278 and inst.name not in self.my_inst_info):
3279 inst.MapLVsByNode(node_vol_should)
3282 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3284 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3285 feedback_fn("* Verifying N+1 Memory redundancy")
3286 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3288 feedback_fn("* Other Notes")
3290 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3291 % len(i_non_redundant))
3293 if i_non_a_balanced:
3294 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3295 % len(i_non_a_balanced))
3298 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3301 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3304 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3308 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3309 """Analyze the post-hooks' result
3311 This method analyses the hook result, handles it, and sends some
3312 nicely-formatted feedback back to the user.
3314 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3315 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3316 @param hooks_results: the results of the multi-node hooks rpc call
3317 @param feedback_fn: function used send feedback back to the caller
3318 @param lu_result: previous Exec result
3319 @return: the new Exec result, based on the previous result
3323 # We only really run POST phase hooks, only for non-empty groups,
3324 # and are only interested in their results
3325 if not self.my_node_names:
3328 elif phase == constants.HOOKS_PHASE_POST:
3329 # Used to change hooks' output to proper indentation
3330 feedback_fn("* Hooks Results")
3331 assert hooks_results, "invalid result from hooks"
3333 for node_name in hooks_results:
3334 res = hooks_results[node_name]
3336 test = msg and not res.offline
3337 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3338 "Communication failure in hooks execution: %s", msg)
3339 if res.offline or msg:
3340 # No need to investigate payload if node is offline or gave
3343 for script, hkr, output in res.payload:
3344 test = hkr == constants.HKR_FAIL
3345 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3346 "Script %s failed, output:", script)
3348 output = self._HOOKS_INDENT_RE.sub(" ", output)
3349 feedback_fn("%s" % output)
3355 class LUClusterVerifyDisks(NoHooksLU):
3356 """Verifies the cluster disks status.
3361 def ExpandNames(self):
3362 self.share_locks = _ShareAll()
3363 self.needed_locks = {
3364 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3367 def Exec(self, feedback_fn):
3368 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3370 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3371 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3372 for group in group_names])
3375 class LUGroupVerifyDisks(NoHooksLU):
3376 """Verifies the status of all disks in a node group.
3381 def ExpandNames(self):
3382 # Raises errors.OpPrereqError on its own if group can't be found
3383 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3385 self.share_locks = _ShareAll()
3386 self.needed_locks = {
3387 locking.LEVEL_INSTANCE: [],
3388 locking.LEVEL_NODEGROUP: [],
3389 locking.LEVEL_NODE: [],
3392 def DeclareLocks(self, level):
3393 if level == locking.LEVEL_INSTANCE:
3394 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3396 # Lock instances optimistically, needs verification once node and group
3397 # locks have been acquired
3398 self.needed_locks[locking.LEVEL_INSTANCE] = \
3399 self.cfg.GetNodeGroupInstances(self.group_uuid)
3401 elif level == locking.LEVEL_NODEGROUP:
3402 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3404 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3405 set([self.group_uuid] +
3406 # Lock all groups used by instances optimistically; this requires
3407 # going via the node before it's locked, requiring verification
3410 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3411 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3413 elif level == locking.LEVEL_NODE:
3414 # This will only lock the nodes in the group to be verified which contain
3416 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3417 self._LockInstancesNodes()
3419 # Lock all nodes in group to be verified
3420 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3421 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3422 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3424 def CheckPrereq(self):
3425 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3426 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3427 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3429 assert self.group_uuid in owned_groups
3431 # Check if locked instances are still correct
3432 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3434 # Get instance information
3435 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3437 # Check if node groups for locked instances are still correct
3438 for (instance_name, inst) in self.instances.items():
3439 assert owned_nodes.issuperset(inst.all_nodes), \
3440 "Instance %s's nodes changed while we kept the lock" % instance_name
3442 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3445 assert self.group_uuid in inst_groups, \
3446 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3448 def Exec(self, feedback_fn):
3449 """Verify integrity of cluster disks.
3451 @rtype: tuple of three items
3452 @return: a tuple of (dict of node-to-node_error, list of instances
3453 which need activate-disks, dict of instance: (node, volume) for
3458 res_instances = set()
3461 nv_dict = _MapInstanceDisksToNodes([inst
3462 for inst in self.instances.values()
3463 if inst.admin_state == constants.ADMINST_UP])
3466 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3467 set(self.cfg.GetVmCapableNodeList()))
3469 node_lvs = self.rpc.call_lv_list(nodes, [])
3471 for (node, node_res) in node_lvs.items():
3472 if node_res.offline:
3475 msg = node_res.fail_msg
3477 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3478 res_nodes[node] = msg
3481 for lv_name, (_, _, lv_online) in node_res.payload.items():
3482 inst = nv_dict.pop((node, lv_name), None)
3483 if not (lv_online or inst is None):
3484 res_instances.add(inst)
3486 # any leftover items in nv_dict are missing LVs, let's arrange the data
3488 for key, inst in nv_dict.iteritems():
3489 res_missing.setdefault(inst, []).append(list(key))
3491 return (res_nodes, list(res_instances), res_missing)
3494 class LUClusterRepairDiskSizes(NoHooksLU):
3495 """Verifies the cluster disks sizes.
3500 def ExpandNames(self):
3501 if self.op.instances:
3502 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3503 self.needed_locks = {
3504 locking.LEVEL_NODE_RES: [],
3505 locking.LEVEL_INSTANCE: self.wanted_names,
3507 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3509 self.wanted_names = None
3510 self.needed_locks = {
3511 locking.LEVEL_NODE_RES: locking.ALL_SET,
3512 locking.LEVEL_INSTANCE: locking.ALL_SET,
3514 self.share_locks = {
3515 locking.LEVEL_NODE_RES: 1,
3516 locking.LEVEL_INSTANCE: 0,
3519 def DeclareLocks(self, level):
3520 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3521 self._LockInstancesNodes(primary_only=True, level=level)
3523 def CheckPrereq(self):
3524 """Check prerequisites.
3526 This only checks the optional instance list against the existing names.
3529 if self.wanted_names is None:
3530 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3532 self.wanted_instances = \
3533 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3535 def _EnsureChildSizes(self, disk):
3536 """Ensure children of the disk have the needed disk size.
3538 This is valid mainly for DRBD8 and fixes an issue where the
3539 children have smaller disk size.
3541 @param disk: an L{ganeti.objects.Disk} object
3544 if disk.dev_type == constants.LD_DRBD8:
3545 assert disk.children, "Empty children for DRBD8?"
3546 fchild = disk.children[0]
3547 mismatch = fchild.size < disk.size
3549 self.LogInfo("Child disk has size %d, parent %d, fixing",
3550 fchild.size, disk.size)
3551 fchild.size = disk.size
3553 # and we recurse on this child only, not on the metadev
3554 return self._EnsureChildSizes(fchild) or mismatch
3558 def Exec(self, feedback_fn):
3559 """Verify the size of cluster disks.
3562 # TODO: check child disks too
3563 # TODO: check differences in size between primary/secondary nodes
3565 for instance in self.wanted_instances:
3566 pnode = instance.primary_node
3567 if pnode not in per_node_disks:
3568 per_node_disks[pnode] = []
3569 for idx, disk in enumerate(instance.disks):
3570 per_node_disks[pnode].append((instance, idx, disk))
3572 assert not (frozenset(per_node_disks.keys()) -
3573 self.owned_locks(locking.LEVEL_NODE_RES)), \
3574 "Not owning correct locks"
3575 assert not self.owned_locks(locking.LEVEL_NODE)
3578 for node, dskl in per_node_disks.items():
3579 newl = [v[2].Copy() for v in dskl]
3581 self.cfg.SetDiskID(dsk, node)
3582 result = self.rpc.call_blockdev_getsize(node, newl)
3584 self.LogWarning("Failure in blockdev_getsize call to node"
3585 " %s, ignoring", node)
3587 if len(result.payload) != len(dskl):
3588 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3589 " result.payload=%s", node, len(dskl), result.payload)
3590 self.LogWarning("Invalid result from node %s, ignoring node results",
3593 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3595 self.LogWarning("Disk %d of instance %s did not return size"
3596 " information, ignoring", idx, instance.name)
3598 if not isinstance(size, (int, long)):
3599 self.LogWarning("Disk %d of instance %s did not return valid"
3600 " size information, ignoring", idx, instance.name)
3603 if size != disk.size:
3604 self.LogInfo("Disk %d of instance %s has mismatched size,"
3605 " correcting: recorded %d, actual %d", idx,
3606 instance.name, disk.size, size)
3608 self.cfg.Update(instance, feedback_fn)
3609 changed.append((instance.name, idx, size))
3610 if self._EnsureChildSizes(disk):
3611 self.cfg.Update(instance, feedback_fn)
3612 changed.append((instance.name, idx, disk.size))
3616 class LUClusterRename(LogicalUnit):
3617 """Rename the cluster.
3620 HPATH = "cluster-rename"
3621 HTYPE = constants.HTYPE_CLUSTER
3623 def BuildHooksEnv(self):
3628 "OP_TARGET": self.cfg.GetClusterName(),
3629 "NEW_NAME": self.op.name,
3632 def BuildHooksNodes(self):
3633 """Build hooks nodes.
3636 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3638 def CheckPrereq(self):
3639 """Verify that the passed name is a valid one.
3642 hostname = netutils.GetHostname(name=self.op.name,
3643 family=self.cfg.GetPrimaryIPFamily())
3645 new_name = hostname.name
3646 self.ip = new_ip = hostname.ip
3647 old_name = self.cfg.GetClusterName()
3648 old_ip = self.cfg.GetMasterIP()
3649 if new_name == old_name and new_ip == old_ip:
3650 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3651 " cluster has changed",
3653 if new_ip != old_ip:
3654 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3655 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3656 " reachable on the network" %
3657 new_ip, errors.ECODE_NOTUNIQUE)
3659 self.op.name = new_name
3661 def Exec(self, feedback_fn):
3662 """Rename the cluster.
3665 clustername = self.op.name
3668 # shutdown the master IP
3669 master_params = self.cfg.GetMasterNetworkParameters()
3670 ems = self.cfg.GetUseExternalMipScript()
3671 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3673 result.Raise("Could not disable the master role")
3676 cluster = self.cfg.GetClusterInfo()
3677 cluster.cluster_name = clustername
3678 cluster.master_ip = new_ip
3679 self.cfg.Update(cluster, feedback_fn)
3681 # update the known hosts file
3682 ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3683 node_list = self.cfg.GetOnlineNodeList()
3685 node_list.remove(master_params.name)
3688 _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3690 master_params.ip = new_ip
3691 result = self.rpc.call_node_activate_master_ip(master_params.name,
3693 msg = result.fail_msg
3695 self.LogWarning("Could not re-enable the master role on"
3696 " the master, please restart manually: %s", msg)
3701 def _ValidateNetmask(cfg, netmask):
3702 """Checks if a netmask is valid.
3704 @type cfg: L{config.ConfigWriter}
3705 @param cfg: The cluster configuration
3707 @param netmask: the netmask to be verified
3708 @raise errors.OpPrereqError: if the validation fails
3711 ip_family = cfg.GetPrimaryIPFamily()
3713 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3714 except errors.ProgrammerError:
3715 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3717 if not ipcls.ValidateNetmask(netmask):
3718 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3722 class LUClusterSetParams(LogicalUnit):
3723 """Change the parameters of the cluster.
3726 HPATH = "cluster-modify"
3727 HTYPE = constants.HTYPE_CLUSTER
3730 def CheckArguments(self):
3734 if self.op.uid_pool:
3735 uidpool.CheckUidPool(self.op.uid_pool)
3737 if self.op.add_uids:
3738 uidpool.CheckUidPool(self.op.add_uids)
3740 if self.op.remove_uids:
3741 uidpool.CheckUidPool(self.op.remove_uids)
3743 if self.op.master_netmask is not None:
3744 _ValidateNetmask(self.cfg, self.op.master_netmask)
3746 if self.op.diskparams:
3747 for dt_params in self.op.diskparams.values():
3748 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3750 def ExpandNames(self):
3751 # FIXME: in the future maybe other cluster params won't require checking on
3752 # all nodes to be modified.
3753 self.needed_locks = {
3754 locking.LEVEL_NODE: locking.ALL_SET,
3756 self.share_locks[locking.LEVEL_NODE] = 1
3758 def BuildHooksEnv(self):
3763 "OP_TARGET": self.cfg.GetClusterName(),
3764 "NEW_VG_NAME": self.op.vg_name,
3767 def BuildHooksNodes(self):
3768 """Build hooks nodes.
3771 mn = self.cfg.GetMasterNode()
3774 def CheckPrereq(self):
3775 """Check prerequisites.
3777 This checks whether the given params don't conflict and
3778 if the given volume group is valid.
3781 if self.op.vg_name is not None and not self.op.vg_name:
3782 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3783 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3784 " instances exist", errors.ECODE_INVAL)
3786 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3787 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3788 raise errors.OpPrereqError("Cannot disable drbd helper while"
3789 " drbd-based instances exist",
3792 node_list = self.owned_locks(locking.LEVEL_NODE)
3794 # if vg_name not None, checks given volume group on all nodes
3796 vglist = self.rpc.call_vg_list(node_list)
3797 for node in node_list:
3798 msg = vglist[node].fail_msg
3800 # ignoring down node
3801 self.LogWarning("Error while gathering data on node %s"
3802 " (ignoring node): %s", node, msg)
3804 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3806 constants.MIN_VG_SIZE)
3808 raise errors.OpPrereqError("Error on node '%s': %s" %
3809 (node, vgstatus), errors.ECODE_ENVIRON)
3811 if self.op.drbd_helper:
3812 # checks given drbd helper on all nodes
3813 helpers = self.rpc.call_drbd_helper(node_list)
3814 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3816 self.LogInfo("Not checking drbd helper on offline node %s", node)
3818 msg = helpers[node].fail_msg
3820 raise errors.OpPrereqError("Error checking drbd helper on node"
3821 " '%s': %s" % (node, msg),
3822 errors.ECODE_ENVIRON)
3823 node_helper = helpers[node].payload
3824 if node_helper != self.op.drbd_helper:
3825 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3826 (node, node_helper), errors.ECODE_ENVIRON)
3828 self.cluster = cluster = self.cfg.GetClusterInfo()
3829 # validate params changes
3830 if self.op.beparams:
3831 objects.UpgradeBeParams(self.op.beparams)
3832 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3833 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3835 if self.op.ndparams:
3836 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3837 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3839 # TODO: we need a more general way to handle resetting
3840 # cluster-level parameters to default values
3841 if self.new_ndparams["oob_program"] == "":
3842 self.new_ndparams["oob_program"] = \
3843 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3845 if self.op.hv_state:
3846 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
3847 self.cluster.hv_state_static)
3848 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
3849 for hv, values in new_hv_state.items())
3851 if self.op.disk_state:
3852 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
3853 self.cluster.disk_state_static)
3854 self.new_disk_state = \
3855 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
3856 for name, values in svalues.items()))
3857 for storage, svalues in new_disk_state.items())
3861 for key, value in self.op.ipolicy.items():
3862 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
3863 ipolicy[key] = _GetUpdatedParams(cluster.ipolicy.get(key, {}),
3865 objects.InstancePolicy.CheckParameterSyntax(ipolicy)
3866 self.new_ipolicy = ipolicy
3868 if self.op.nicparams:
3869 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3870 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3871 objects.NIC.CheckParameterSyntax(self.new_nicparams)
3874 # check all instances for consistency
3875 for instance in self.cfg.GetAllInstancesInfo().values():
3876 for nic_idx, nic in enumerate(instance.nics):
3877 params_copy = copy.deepcopy(nic.nicparams)
3878 params_filled = objects.FillDict(self.new_nicparams, params_copy)
3880 # check parameter syntax
3882 objects.NIC.CheckParameterSyntax(params_filled)
3883 except errors.ConfigurationError, err:
3884 nic_errors.append("Instance %s, nic/%d: %s" %
3885 (instance.name, nic_idx, err))
3887 # if we're moving instances to routed, check that they have an ip
3888 target_mode = params_filled[constants.NIC_MODE]
3889 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3890 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3891 " address" % (instance.name, nic_idx))
3893 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3894 "\n".join(nic_errors))
3896 # hypervisor list/parameters
3897 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3898 if self.op.hvparams:
3899 for hv_name, hv_dict in self.op.hvparams.items():
3900 if hv_name not in self.new_hvparams:
3901 self.new_hvparams[hv_name] = hv_dict
3903 self.new_hvparams[hv_name].update(hv_dict)
3905 # disk template parameters
3906 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3907 if self.op.diskparams:
3908 for dt_name, dt_params in self.op.diskparams.items():
3909 if dt_name not in self.op.diskparams:
3910 self.new_diskparams[dt_name] = dt_params
3912 self.new_diskparams[dt_name].update(dt_params)
3914 # os hypervisor parameters
3915 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3917 for os_name, hvs in self.op.os_hvp.items():
3918 if os_name not in self.new_os_hvp:
3919 self.new_os_hvp[os_name] = hvs
3921 for hv_name, hv_dict in hvs.items():
3922 if hv_name not in self.new_os_hvp[os_name]:
3923 self.new_os_hvp[os_name][hv_name] = hv_dict
3925 self.new_os_hvp[os_name][hv_name].update(hv_dict)
3928 self.new_osp = objects.FillDict(cluster.osparams, {})
3929 if self.op.osparams:
3930 for os_name, osp in self.op.osparams.items():
3931 if os_name not in self.new_osp:
3932 self.new_osp[os_name] = {}
3934 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3937 if not self.new_osp[os_name]:
3938 # we removed all parameters
3939 del self.new_osp[os_name]
3941 # check the parameter validity (remote check)
3942 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3943 os_name, self.new_osp[os_name])
3945 # changes to the hypervisor list
3946 if self.op.enabled_hypervisors is not None:
3947 self.hv_list = self.op.enabled_hypervisors
3948 for hv in self.hv_list:
3949 # if the hypervisor doesn't already exist in the cluster
3950 # hvparams, we initialize it to empty, and then (in both
3951 # cases) we make sure to fill the defaults, as we might not
3952 # have a complete defaults list if the hypervisor wasn't
3954 if hv not in new_hvp:
3956 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3957 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3959 self.hv_list = cluster.enabled_hypervisors
3961 if self.op.hvparams or self.op.enabled_hypervisors is not None:
3962 # either the enabled list has changed, or the parameters have, validate
3963 for hv_name, hv_params in self.new_hvparams.items():
3964 if ((self.op.hvparams and hv_name in self.op.hvparams) or
3965 (self.op.enabled_hypervisors and
3966 hv_name in self.op.enabled_hypervisors)):
3967 # either this is a new hypervisor, or its parameters have changed
3968 hv_class = hypervisor.GetHypervisor(hv_name)
3969 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3970 hv_class.CheckParameterSyntax(hv_params)
3971 _CheckHVParams(self, node_list, hv_name, hv_params)
3974 # no need to check any newly-enabled hypervisors, since the
3975 # defaults have already been checked in the above code-block
3976 for os_name, os_hvp in self.new_os_hvp.items():
3977 for hv_name, hv_params in os_hvp.items():
3978 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3979 # we need to fill in the new os_hvp on top of the actual hv_p
3980 cluster_defaults = self.new_hvparams.get(hv_name, {})
3981 new_osp = objects.FillDict(cluster_defaults, hv_params)
3982 hv_class = hypervisor.GetHypervisor(hv_name)
3983 hv_class.CheckParameterSyntax(new_osp)
3984 _CheckHVParams(self, node_list, hv_name, new_osp)
3986 if self.op.default_iallocator:
3987 alloc_script = utils.FindFile(self.op.default_iallocator,
3988 constants.IALLOCATOR_SEARCH_PATH,
3990 if alloc_script is None:
3991 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3992 " specified" % self.op.default_iallocator,
3995 def Exec(self, feedback_fn):
3996 """Change the parameters of the cluster.
3999 if self.op.vg_name is not None:
4000 new_volume = self.op.vg_name
4003 if new_volume != self.cfg.GetVGName():
4004 self.cfg.SetVGName(new_volume)
4006 feedback_fn("Cluster LVM configuration already in desired"
4007 " state, not changing")
4008 if self.op.drbd_helper is not None:
4009 new_helper = self.op.drbd_helper
4012 if new_helper != self.cfg.GetDRBDHelper():
4013 self.cfg.SetDRBDHelper(new_helper)
4015 feedback_fn("Cluster DRBD helper already in desired state,"
4017 if self.op.hvparams:
4018 self.cluster.hvparams = self.new_hvparams
4020 self.cluster.os_hvp = self.new_os_hvp
4021 if self.op.enabled_hypervisors is not None:
4022 self.cluster.hvparams = self.new_hvparams
4023 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4024 if self.op.beparams:
4025 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4026 if self.op.nicparams:
4027 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4029 self.cluster.ipolicy = self.new_ipolicy
4030 if self.op.osparams:
4031 self.cluster.osparams = self.new_osp
4032 if self.op.ndparams:
4033 self.cluster.ndparams = self.new_ndparams
4034 if self.op.diskparams:
4035 self.cluster.diskparams = self.new_diskparams
4036 if self.op.hv_state:
4037 self.cluster.hv_state_static = self.new_hv_state
4038 if self.op.disk_state:
4039 self.cluster.disk_state_static = self.new_disk_state
4041 if self.op.candidate_pool_size is not None:
4042 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4043 # we need to update the pool size here, otherwise the save will fail
4044 _AdjustCandidatePool(self, [])
4046 if self.op.maintain_node_health is not None:
4047 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4048 feedback_fn("Note: CONFD was disabled at build time, node health"
4049 " maintenance is not useful (still enabling it)")
4050 self.cluster.maintain_node_health = self.op.maintain_node_health
4052 if self.op.prealloc_wipe_disks is not None:
4053 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4055 if self.op.add_uids is not None:
4056 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4058 if self.op.remove_uids is not None:
4059 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4061 if self.op.uid_pool is not None:
4062 self.cluster.uid_pool = self.op.uid_pool
4064 if self.op.default_iallocator is not None:
4065 self.cluster.default_iallocator = self.op.default_iallocator
4067 if self.op.reserved_lvs is not None:
4068 self.cluster.reserved_lvs = self.op.reserved_lvs
4070 if self.op.use_external_mip_script is not None:
4071 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4073 def helper_os(aname, mods, desc):
4075 lst = getattr(self.cluster, aname)
4076 for key, val in mods:
4077 if key == constants.DDM_ADD:
4079 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4082 elif key == constants.DDM_REMOVE:
4086 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4088 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4090 if self.op.hidden_os:
4091 helper_os("hidden_os", self.op.hidden_os, "hidden")
4093 if self.op.blacklisted_os:
4094 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4096 if self.op.master_netdev:
4097 master_params = self.cfg.GetMasterNetworkParameters()
4098 ems = self.cfg.GetUseExternalMipScript()
4099 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4100 self.cluster.master_netdev)
4101 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4103 result.Raise("Could not disable the master ip")
4104 feedback_fn("Changing master_netdev from %s to %s" %
4105 (master_params.netdev, self.op.master_netdev))
4106 self.cluster.master_netdev = self.op.master_netdev
4108 if self.op.master_netmask:
4109 master_params = self.cfg.GetMasterNetworkParameters()
4110 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4111 result = self.rpc.call_node_change_master_netmask(master_params.name,
4112 master_params.netmask,
4113 self.op.master_netmask,
4115 master_params.netdev)
4117 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4120 self.cluster.master_netmask = self.op.master_netmask
4122 self.cfg.Update(self.cluster, feedback_fn)
4124 if self.op.master_netdev:
4125 master_params = self.cfg.GetMasterNetworkParameters()
4126 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4127 self.op.master_netdev)
4128 ems = self.cfg.GetUseExternalMipScript()
4129 result = self.rpc.call_node_activate_master_ip(master_params.name,
4132 self.LogWarning("Could not re-enable the master ip on"
4133 " the master, please restart manually: %s",
4137 def _UploadHelper(lu, nodes, fname):
4138 """Helper for uploading a file and showing warnings.
4141 if os.path.exists(fname):
4142 result = lu.rpc.call_upload_file(nodes, fname)
4143 for to_node, to_result in result.items():
4144 msg = to_result.fail_msg
4146 msg = ("Copy of file %s to node %s failed: %s" %
4147 (fname, to_node, msg))
4148 lu.proc.LogWarning(msg)
4151 def _ComputeAncillaryFiles(cluster, redist):
4152 """Compute files external to Ganeti which need to be consistent.
4154 @type redist: boolean
4155 @param redist: Whether to include files which need to be redistributed
4158 # Compute files for all nodes
4160 constants.SSH_KNOWN_HOSTS_FILE,
4161 constants.CONFD_HMAC_KEY,
4162 constants.CLUSTER_DOMAIN_SECRET_FILE,
4163 constants.SPICE_CERT_FILE,
4164 constants.SPICE_CACERT_FILE,
4165 constants.RAPI_USERS_FILE,
4169 files_all.update(constants.ALL_CERT_FILES)
4170 files_all.update(ssconf.SimpleStore().GetFileList())
4172 # we need to ship at least the RAPI certificate
4173 files_all.add(constants.RAPI_CERT_FILE)
4175 if cluster.modify_etc_hosts:
4176 files_all.add(constants.ETC_HOSTS)
4178 # Files which are optional, these must:
4179 # - be present in one other category as well
4180 # - either exist or not exist on all nodes of that category (mc, vm all)
4182 constants.RAPI_USERS_FILE,
4185 # Files which should only be on master candidates
4189 files_mc.add(constants.CLUSTER_CONF_FILE)
4191 # FIXME: this should also be replicated but Ganeti doesn't support files_mc
4193 files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
4195 # Files which should only be on VM-capable nodes
4196 files_vm = set(filename
4197 for hv_name in cluster.enabled_hypervisors
4198 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4200 files_opt |= set(filename
4201 for hv_name in cluster.enabled_hypervisors
4202 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4204 # Filenames in each category must be unique
4205 all_files_set = files_all | files_mc | files_vm
4206 assert (len(all_files_set) ==
4207 sum(map(len, [files_all, files_mc, files_vm]))), \
4208 "Found file listed in more than one file list"
4210 # Optional files must be present in one other category
4211 assert all_files_set.issuperset(files_opt), \
4212 "Optional file not in a different required list"
4214 return (files_all, files_opt, files_mc, files_vm)
4217 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4218 """Distribute additional files which are part of the cluster configuration.
4220 ConfigWriter takes care of distributing the config and ssconf files, but
4221 there are more files which should be distributed to all nodes. This function
4222 makes sure those are copied.
4224 @param lu: calling logical unit
4225 @param additional_nodes: list of nodes not in the config to distribute to
4226 @type additional_vm: boolean
4227 @param additional_vm: whether the additional nodes are vm-capable or not
4230 # Gather target nodes
4231 cluster = lu.cfg.GetClusterInfo()
4232 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4234 online_nodes = lu.cfg.GetOnlineNodeList()
4235 vm_nodes = lu.cfg.GetVmCapableNodeList()
4237 if additional_nodes is not None:
4238 online_nodes.extend(additional_nodes)
4240 vm_nodes.extend(additional_nodes)
4242 # Never distribute to master node
4243 for nodelist in [online_nodes, vm_nodes]:
4244 if master_info.name in nodelist:
4245 nodelist.remove(master_info.name)
4248 (files_all, _, files_mc, files_vm) = \
4249 _ComputeAncillaryFiles(cluster, True)
4251 # Never re-distribute configuration file from here
4252 assert not (constants.CLUSTER_CONF_FILE in files_all or
4253 constants.CLUSTER_CONF_FILE in files_vm)
4254 assert not files_mc, "Master candidates not handled in this function"
4257 (online_nodes, files_all),
4258 (vm_nodes, files_vm),
4262 for (node_list, files) in filemap:
4264 _UploadHelper(lu, node_list, fname)
4267 class LUClusterRedistConf(NoHooksLU):
4268 """Force the redistribution of cluster configuration.
4270 This is a very simple LU.
4275 def ExpandNames(self):
4276 self.needed_locks = {
4277 locking.LEVEL_NODE: locking.ALL_SET,
4279 self.share_locks[locking.LEVEL_NODE] = 1
4281 def Exec(self, feedback_fn):
4282 """Redistribute the configuration.
4285 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4286 _RedistributeAncillaryFiles(self)
4289 class LUClusterActivateMasterIp(NoHooksLU):
4290 """Activate the master IP on the master node.
4293 def Exec(self, feedback_fn):
4294 """Activate the master IP.
4297 master_params = self.cfg.GetMasterNetworkParameters()
4298 ems = self.cfg.GetUseExternalMipScript()
4299 result = self.rpc.call_node_activate_master_ip(master_params.name,
4301 result.Raise("Could not activate the master IP")
4304 class LUClusterDeactivateMasterIp(NoHooksLU):
4305 """Deactivate the master IP on the master node.
4308 def Exec(self, feedback_fn):
4309 """Deactivate the master IP.
4312 master_params = self.cfg.GetMasterNetworkParameters()
4313 ems = self.cfg.GetUseExternalMipScript()
4314 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4316 result.Raise("Could not deactivate the master IP")
4319 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4320 """Sleep and poll for an instance's disk to sync.
4323 if not instance.disks or disks is not None and not disks:
4326 disks = _ExpandCheckDisks(instance, disks)
4329 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4331 node = instance.primary_node
4334 lu.cfg.SetDiskID(dev, node)
4336 # TODO: Convert to utils.Retry
4339 degr_retries = 10 # in seconds, as we sleep 1 second each time
4343 cumul_degraded = False
4344 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4345 msg = rstats.fail_msg
4347 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4350 raise errors.RemoteError("Can't contact node %s for mirror data,"
4351 " aborting." % node)
4354 rstats = rstats.payload
4356 for i, mstat in enumerate(rstats):
4358 lu.LogWarning("Can't compute data for node %s/%s",
4359 node, disks[i].iv_name)
4362 cumul_degraded = (cumul_degraded or
4363 (mstat.is_degraded and mstat.sync_percent is None))
4364 if mstat.sync_percent is not None:
4366 if mstat.estimated_time is not None:
4367 rem_time = ("%s remaining (estimated)" %
4368 utils.FormatSeconds(mstat.estimated_time))
4369 max_time = mstat.estimated_time
4371 rem_time = "no time estimate"
4372 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4373 (disks[i].iv_name, mstat.sync_percent, rem_time))
4375 # if we're done but degraded, let's do a few small retries, to
4376 # make sure we see a stable and not transient situation; therefore
4377 # we force restart of the loop
4378 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4379 logging.info("Degraded disks found, %d retries left", degr_retries)
4387 time.sleep(min(60, max_time))
4390 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4391 return not cumul_degraded
4394 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4395 """Check that mirrors are not degraded.
4397 The ldisk parameter, if True, will change the test from the
4398 is_degraded attribute (which represents overall non-ok status for
4399 the device(s)) to the ldisk (representing the local storage status).
4402 lu.cfg.SetDiskID(dev, node)
4406 if on_primary or dev.AssembleOnSecondary():
4407 rstats = lu.rpc.call_blockdev_find(node, dev)
4408 msg = rstats.fail_msg
4410 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4412 elif not rstats.payload:
4413 lu.LogWarning("Can't find disk on node %s", node)
4417 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4419 result = result and not rstats.payload.is_degraded
4422 for child in dev.children:
4423 result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4428 class LUOobCommand(NoHooksLU):
4429 """Logical unit for OOB handling.
4433 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4435 def ExpandNames(self):
4436 """Gather locks we need.
4439 if self.op.node_names:
4440 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4441 lock_names = self.op.node_names
4443 lock_names = locking.ALL_SET
4445 self.needed_locks = {
4446 locking.LEVEL_NODE: lock_names,
4449 def CheckPrereq(self):
4450 """Check prerequisites.
4453 - the node exists in the configuration
4456 Any errors are signaled by raising errors.OpPrereqError.
4460 self.master_node = self.cfg.GetMasterNode()
4462 assert self.op.power_delay >= 0.0
4464 if self.op.node_names:
4465 if (self.op.command in self._SKIP_MASTER and
4466 self.master_node in self.op.node_names):
4467 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4468 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4470 if master_oob_handler:
4471 additional_text = ("run '%s %s %s' if you want to operate on the"
4472 " master regardless") % (master_oob_handler,
4476 additional_text = "it does not support out-of-band operations"
4478 raise errors.OpPrereqError(("Operating on the master node %s is not"
4479 " allowed for %s; %s") %
4480 (self.master_node, self.op.command,
4481 additional_text), errors.ECODE_INVAL)
4483 self.op.node_names = self.cfg.GetNodeList()
4484 if self.op.command in self._SKIP_MASTER:
4485 self.op.node_names.remove(self.master_node)
4487 if self.op.command in self._SKIP_MASTER:
4488 assert self.master_node not in self.op.node_names
4490 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4492 raise errors.OpPrereqError("Node %s not found" % node_name,
4495 self.nodes.append(node)
4497 if (not self.op.ignore_status and
4498 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4499 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4500 " not marked offline") % node_name,
4503 def Exec(self, feedback_fn):
4504 """Execute OOB and return result if we expect any.
4507 master_node = self.master_node
4510 for idx, node in enumerate(utils.NiceSort(self.nodes,
4511 key=lambda node: node.name)):
4512 node_entry = [(constants.RS_NORMAL, node.name)]
4513 ret.append(node_entry)
4515 oob_program = _SupportsOob(self.cfg, node)
4518 node_entry.append((constants.RS_UNAVAIL, None))
4521 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4522 self.op.command, oob_program, node.name)
4523 result = self.rpc.call_run_oob(master_node, oob_program,
4524 self.op.command, node.name,
4528 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4529 node.name, result.fail_msg)
4530 node_entry.append((constants.RS_NODATA, None))
4533 self._CheckPayload(result)
4534 except errors.OpExecError, err:
4535 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4537 node_entry.append((constants.RS_NODATA, None))
4539 if self.op.command == constants.OOB_HEALTH:
4540 # For health we should log important events
4541 for item, status in result.payload:
4542 if status in [constants.OOB_STATUS_WARNING,
4543 constants.OOB_STATUS_CRITICAL]:
4544 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4545 item, node.name, status)
4547 if self.op.command == constants.OOB_POWER_ON:
4549 elif self.op.command == constants.OOB_POWER_OFF:
4550 node.powered = False
4551 elif self.op.command == constants.OOB_POWER_STATUS:
4552 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4553 if powered != node.powered:
4554 logging.warning(("Recorded power state (%s) of node '%s' does not"
4555 " match actual power state (%s)"), node.powered,
4558 # For configuration changing commands we should update the node
4559 if self.op.command in (constants.OOB_POWER_ON,
4560 constants.OOB_POWER_OFF):
4561 self.cfg.Update(node, feedback_fn)
4563 node_entry.append((constants.RS_NORMAL, result.payload))
4565 if (self.op.command == constants.OOB_POWER_ON and
4566 idx < len(self.nodes) - 1):
4567 time.sleep(self.op.power_delay)
4571 def _CheckPayload(self, result):
4572 """Checks if the payload is valid.
4574 @param result: RPC result
4575 @raises errors.OpExecError: If payload is not valid
4579 if self.op.command == constants.OOB_HEALTH:
4580 if not isinstance(result.payload, list):
4581 errs.append("command 'health' is expected to return a list but got %s" %
4582 type(result.payload))
4584 for item, status in result.payload:
4585 if status not in constants.OOB_STATUSES:
4586 errs.append("health item '%s' has invalid status '%s'" %
4589 if self.op.command == constants.OOB_POWER_STATUS:
4590 if not isinstance(result.payload, dict):
4591 errs.append("power-status is expected to return a dict but got %s" %
4592 type(result.payload))
4594 if self.op.command in [
4595 constants.OOB_POWER_ON,
4596 constants.OOB_POWER_OFF,
4597 constants.OOB_POWER_CYCLE,
4599 if result.payload is not None:
4600 errs.append("%s is expected to not return payload but got '%s'" %
4601 (self.op.command, result.payload))
4604 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4605 utils.CommaJoin(errs))
4608 class _OsQuery(_QueryBase):
4609 FIELDS = query.OS_FIELDS
4611 def ExpandNames(self, lu):
4612 # Lock all nodes in shared mode
4613 # Temporary removal of locks, should be reverted later
4614 # TODO: reintroduce locks when they are lighter-weight
4615 lu.needed_locks = {}
4616 #self.share_locks[locking.LEVEL_NODE] = 1
4617 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4619 # The following variables interact with _QueryBase._GetNames
4621 self.wanted = self.names
4623 self.wanted = locking.ALL_SET
4625 self.do_locking = self.use_locking
4627 def DeclareLocks(self, lu, level):
4631 def _DiagnoseByOS(rlist):
4632 """Remaps a per-node return list into an a per-os per-node dictionary
4634 @param rlist: a map with node names as keys and OS objects as values
4637 @return: a dictionary with osnames as keys and as value another
4638 map, with nodes as keys and tuples of (path, status, diagnose,
4639 variants, parameters, api_versions) as values, eg::
4641 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4642 (/srv/..., False, "invalid api")],
4643 "node2": [(/srv/..., True, "", [], [])]}
4648 # we build here the list of nodes that didn't fail the RPC (at RPC
4649 # level), so that nodes with a non-responding node daemon don't
4650 # make all OSes invalid
4651 good_nodes = [node_name for node_name in rlist
4652 if not rlist[node_name].fail_msg]
4653 for node_name, nr in rlist.items():
4654 if nr.fail_msg or not nr.payload:
4656 for (name, path, status, diagnose, variants,
4657 params, api_versions) in nr.payload:
4658 if name not in all_os:
4659 # build a list of nodes for this os containing empty lists
4660 # for each node in node_list
4662 for nname in good_nodes:
4663 all_os[name][nname] = []
4664 # convert params from [name, help] to (name, help)
4665 params = [tuple(v) for v in params]
4666 all_os[name][node_name].append((path, status, diagnose,
4667 variants, params, api_versions))
4670 def _GetQueryData(self, lu):
4671 """Computes the list of nodes and their attributes.
4674 # Locking is not used
4675 assert not (compat.any(lu.glm.is_owned(level)
4676 for level in locking.LEVELS
4677 if level != locking.LEVEL_CLUSTER) or
4678 self.do_locking or self.use_locking)
4680 valid_nodes = [node.name
4681 for node in lu.cfg.GetAllNodesInfo().values()
4682 if not node.offline and node.vm_capable]
4683 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4684 cluster = lu.cfg.GetClusterInfo()
4688 for (os_name, os_data) in pol.items():
4689 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4690 hidden=(os_name in cluster.hidden_os),
4691 blacklisted=(os_name in cluster.blacklisted_os))
4695 api_versions = set()
4697 for idx, osl in enumerate(os_data.values()):
4698 info.valid = bool(info.valid and osl and osl[0][1])
4702 (node_variants, node_params, node_api) = osl[0][3:6]
4705 variants.update(node_variants)
4706 parameters.update(node_params)
4707 api_versions.update(node_api)
4709 # Filter out inconsistent values
4710 variants.intersection_update(node_variants)
4711 parameters.intersection_update(node_params)
4712 api_versions.intersection_update(node_api)
4714 info.variants = list(variants)
4715 info.parameters = list(parameters)
4716 info.api_versions = list(api_versions)
4718 data[os_name] = info
4720 # Prepare data in requested order
4721 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4725 class LUOsDiagnose(NoHooksLU):
4726 """Logical unit for OS diagnose/query.
4732 def _BuildFilter(fields, names):
4733 """Builds a filter for querying OSes.
4736 name_filter = qlang.MakeSimpleFilter("name", names)
4738 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4739 # respective field is not requested
4740 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4741 for fname in ["hidden", "blacklisted"]
4742 if fname not in fields]
4743 if "valid" not in fields:
4744 status_filter.append([qlang.OP_TRUE, "valid"])
4747 status_filter.insert(0, qlang.OP_AND)
4749 status_filter = None
4751 if name_filter and status_filter:
4752 return [qlang.OP_AND, name_filter, status_filter]
4756 return status_filter
4758 def CheckArguments(self):
4759 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4760 self.op.output_fields, False)
4762 def ExpandNames(self):
4763 self.oq.ExpandNames(self)
4765 def Exec(self, feedback_fn):
4766 return self.oq.OldStyleQuery(self)
4769 class LUNodeRemove(LogicalUnit):
4770 """Logical unit for removing a node.
4773 HPATH = "node-remove"
4774 HTYPE = constants.HTYPE_NODE
4776 def BuildHooksEnv(self):
4779 This doesn't run on the target node in the pre phase as a failed
4780 node would then be impossible to remove.
4784 "OP_TARGET": self.op.node_name,
4785 "NODE_NAME": self.op.node_name,
4788 def BuildHooksNodes(self):
4789 """Build hooks nodes.
4792 all_nodes = self.cfg.GetNodeList()
4794 all_nodes.remove(self.op.node_name)
4796 logging.warning("Node '%s', which is about to be removed, was not found"
4797 " in the list of all nodes", self.op.node_name)
4798 return (all_nodes, all_nodes)
4800 def CheckPrereq(self):
4801 """Check prerequisites.
4804 - the node exists in the configuration
4805 - it does not have primary or secondary instances
4806 - it's not the master
4808 Any errors are signaled by raising errors.OpPrereqError.
4811 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4812 node = self.cfg.GetNodeInfo(self.op.node_name)
4813 assert node is not None
4815 masternode = self.cfg.GetMasterNode()
4816 if node.name == masternode:
4817 raise errors.OpPrereqError("Node is the master node, failover to another"
4818 " node is required", errors.ECODE_INVAL)
4820 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4821 if node.name in instance.all_nodes:
4822 raise errors.OpPrereqError("Instance %s is still running on the node,"
4823 " please remove first" % instance_name,
4825 self.op.node_name = node.name
4828 def Exec(self, feedback_fn):
4829 """Removes the node from the cluster.
4833 logging.info("Stopping the node daemon and removing configs from node %s",
4836 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4838 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4841 # Promote nodes to master candidate as needed
4842 _AdjustCandidatePool(self, exceptions=[node.name])
4843 self.context.RemoveNode(node.name)
4845 # Run post hooks on the node before it's removed
4846 _RunPostHook(self, node.name)
4848 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4849 msg = result.fail_msg
4851 self.LogWarning("Errors encountered on the remote node while leaving"
4852 " the cluster: %s", msg)
4854 # Remove node from our /etc/hosts
4855 if self.cfg.GetClusterInfo().modify_etc_hosts:
4856 master_node = self.cfg.GetMasterNode()
4857 result = self.rpc.call_etc_hosts_modify(master_node,
4858 constants.ETC_HOSTS_REMOVE,
4860 result.Raise("Can't update hosts file with new host data")
4861 _RedistributeAncillaryFiles(self)
4864 class _NodeQuery(_QueryBase):
4865 FIELDS = query.NODE_FIELDS
4867 def ExpandNames(self, lu):
4868 lu.needed_locks = {}
4869 lu.share_locks = _ShareAll()
4872 self.wanted = _GetWantedNodes(lu, self.names)
4874 self.wanted = locking.ALL_SET
4876 self.do_locking = (self.use_locking and
4877 query.NQ_LIVE in self.requested_data)
4880 # If any non-static field is requested we need to lock the nodes
4881 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4883 def DeclareLocks(self, lu, level):
4886 def _GetQueryData(self, lu):
4887 """Computes the list of nodes and their attributes.
4890 all_info = lu.cfg.GetAllNodesInfo()
4892 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4894 # Gather data as requested
4895 if query.NQ_LIVE in self.requested_data:
4896 # filter out non-vm_capable nodes
4897 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4899 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4900 [lu.cfg.GetHypervisorType()])
4901 live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4902 for (name, nresult) in node_data.items()
4903 if not nresult.fail_msg and nresult.payload)
4907 if query.NQ_INST in self.requested_data:
4908 node_to_primary = dict([(name, set()) for name in nodenames])
4909 node_to_secondary = dict([(name, set()) for name in nodenames])
4911 inst_data = lu.cfg.GetAllInstancesInfo()
4913 for inst in inst_data.values():
4914 if inst.primary_node in node_to_primary:
4915 node_to_primary[inst.primary_node].add(inst.name)
4916 for secnode in inst.secondary_nodes:
4917 if secnode in node_to_secondary:
4918 node_to_secondary[secnode].add(inst.name)
4920 node_to_primary = None
4921 node_to_secondary = None
4923 if query.NQ_OOB in self.requested_data:
4924 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4925 for name, node in all_info.iteritems())
4929 if query.NQ_GROUP in self.requested_data:
4930 groups = lu.cfg.GetAllNodeGroupsInfo()
4934 return query.NodeQueryData([all_info[name] for name in nodenames],
4935 live_data, lu.cfg.GetMasterNode(),
4936 node_to_primary, node_to_secondary, groups,
4937 oob_support, lu.cfg.GetClusterInfo())
4940 class LUNodeQuery(NoHooksLU):
4941 """Logical unit for querying nodes.
4944 # pylint: disable=W0142
4947 def CheckArguments(self):
4948 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4949 self.op.output_fields, self.op.use_locking)
4951 def ExpandNames(self):
4952 self.nq.ExpandNames(self)
4954 def DeclareLocks(self, level):
4955 self.nq.DeclareLocks(self, level)
4957 def Exec(self, feedback_fn):
4958 return self.nq.OldStyleQuery(self)
4961 class LUNodeQueryvols(NoHooksLU):
4962 """Logical unit for getting volumes on node(s).
4966 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4967 _FIELDS_STATIC = utils.FieldSet("node")
4969 def CheckArguments(self):
4970 _CheckOutputFields(static=self._FIELDS_STATIC,
4971 dynamic=self._FIELDS_DYNAMIC,
4972 selected=self.op.output_fields)
4974 def ExpandNames(self):
4975 self.share_locks = _ShareAll()
4976 self.needed_locks = {}
4978 if not self.op.nodes:
4979 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4981 self.needed_locks[locking.LEVEL_NODE] = \
4982 _GetWantedNodes(self, self.op.nodes)
4984 def Exec(self, feedback_fn):
4985 """Computes the list of nodes and their attributes.
4988 nodenames = self.owned_locks(locking.LEVEL_NODE)
4989 volumes = self.rpc.call_node_volumes(nodenames)
4991 ilist = self.cfg.GetAllInstancesInfo()
4992 vol2inst = _MapInstanceDisksToNodes(ilist.values())
4995 for node in nodenames:
4996 nresult = volumes[node]
4999 msg = nresult.fail_msg
5001 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5004 node_vols = sorted(nresult.payload,
5005 key=operator.itemgetter("dev"))
5007 for vol in node_vols:
5009 for field in self.op.output_fields:
5012 elif field == "phys":
5016 elif field == "name":
5018 elif field == "size":
5019 val = int(float(vol["size"]))
5020 elif field == "instance":
5021 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5023 raise errors.ParameterError(field)
5024 node_output.append(str(val))
5026 output.append(node_output)
5031 class LUNodeQueryStorage(NoHooksLU):
5032 """Logical unit for getting information on storage units on node(s).
5035 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5038 def CheckArguments(self):
5039 _CheckOutputFields(static=self._FIELDS_STATIC,
5040 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5041 selected=self.op.output_fields)
5043 def ExpandNames(self):
5044 self.share_locks = _ShareAll()
5045 self.needed_locks = {}
5048 self.needed_locks[locking.LEVEL_NODE] = \
5049 _GetWantedNodes(self, self.op.nodes)
5051 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5053 def Exec(self, feedback_fn):
5054 """Computes the list of nodes and their attributes.
5057 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5059 # Always get name to sort by
5060 if constants.SF_NAME in self.op.output_fields:
5061 fields = self.op.output_fields[:]
5063 fields = [constants.SF_NAME] + self.op.output_fields
5065 # Never ask for node or type as it's only known to the LU
5066 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5067 while extra in fields:
5068 fields.remove(extra)
5070 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5071 name_idx = field_idx[constants.SF_NAME]
5073 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5074 data = self.rpc.call_storage_list(self.nodes,
5075 self.op.storage_type, st_args,
5076 self.op.name, fields)
5080 for node in utils.NiceSort(self.nodes):
5081 nresult = data[node]
5085 msg = nresult.fail_msg
5087 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5090 rows = dict([(row[name_idx], row) for row in nresult.payload])
5092 for name in utils.NiceSort(rows.keys()):
5097 for field in self.op.output_fields:
5098 if field == constants.SF_NODE:
5100 elif field == constants.SF_TYPE:
5101 val = self.op.storage_type
5102 elif field in field_idx:
5103 val = row[field_idx[field]]
5105 raise errors.ParameterError(field)
5114 class _InstanceQuery(_QueryBase):
5115 FIELDS = query.INSTANCE_FIELDS
5117 def ExpandNames(self, lu):
5118 lu.needed_locks = {}
5119 lu.share_locks = _ShareAll()
5122 self.wanted = _GetWantedInstances(lu, self.names)
5124 self.wanted = locking.ALL_SET
5126 self.do_locking = (self.use_locking and
5127 query.IQ_LIVE in self.requested_data)
5129 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5130 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5131 lu.needed_locks[locking.LEVEL_NODE] = []
5132 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5134 self.do_grouplocks = (self.do_locking and
5135 query.IQ_NODES in self.requested_data)
5137 def DeclareLocks(self, lu, level):
5139 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5140 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5142 # Lock all groups used by instances optimistically; this requires going
5143 # via the node before it's locked, requiring verification later on
5144 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5146 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5147 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5148 elif level == locking.LEVEL_NODE:
5149 lu._LockInstancesNodes() # pylint: disable=W0212
5152 def _CheckGroupLocks(lu):
5153 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5154 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5156 # Check if node groups for locked instances are still correct
5157 for instance_name in owned_instances:
5158 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5160 def _GetQueryData(self, lu):
5161 """Computes the list of instances and their attributes.
5164 if self.do_grouplocks:
5165 self._CheckGroupLocks(lu)
5167 cluster = lu.cfg.GetClusterInfo()
5168 all_info = lu.cfg.GetAllInstancesInfo()
5170 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5172 instance_list = [all_info[name] for name in instance_names]
5173 nodes = frozenset(itertools.chain(*(inst.all_nodes
5174 for inst in instance_list)))
5175 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5178 wrongnode_inst = set()
5180 # Gather data as requested
5181 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5183 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5185 result = node_data[name]
5187 # offline nodes will be in both lists
5188 assert result.fail_msg
5189 offline_nodes.append(name)
5191 bad_nodes.append(name)
5192 elif result.payload:
5193 for inst in result.payload:
5194 if inst in all_info:
5195 if all_info[inst].primary_node == name:
5196 live_data.update(result.payload)
5198 wrongnode_inst.add(inst)
5200 # orphan instance; we don't list it here as we don't
5201 # handle this case yet in the output of instance listing
5202 logging.warning("Orphan instance '%s' found on node %s",
5204 # else no instance is alive
5208 if query.IQ_DISKUSAGE in self.requested_data:
5209 disk_usage = dict((inst.name,
5210 _ComputeDiskSize(inst.disk_template,
5211 [{constants.IDISK_SIZE: disk.size}
5212 for disk in inst.disks]))
5213 for inst in instance_list)
5217 if query.IQ_CONSOLE in self.requested_data:
5219 for inst in instance_list:
5220 if inst.name in live_data:
5221 # Instance is running
5222 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5224 consinfo[inst.name] = None
5225 assert set(consinfo.keys()) == set(instance_names)
5229 if query.IQ_NODES in self.requested_data:
5230 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5232 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5233 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5234 for uuid in set(map(operator.attrgetter("group"),
5240 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5241 disk_usage, offline_nodes, bad_nodes,
5242 live_data, wrongnode_inst, consinfo,
5246 class LUQuery(NoHooksLU):
5247 """Query for resources/items of a certain kind.
5250 # pylint: disable=W0142
5253 def CheckArguments(self):
5254 qcls = _GetQueryImplementation(self.op.what)
5256 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5258 def ExpandNames(self):
5259 self.impl.ExpandNames(self)
5261 def DeclareLocks(self, level):
5262 self.impl.DeclareLocks(self, level)
5264 def Exec(self, feedback_fn):
5265 return self.impl.NewStyleQuery(self)
5268 class LUQueryFields(NoHooksLU):
5269 """Query for resources/items of a certain kind.
5272 # pylint: disable=W0142
5275 def CheckArguments(self):
5276 self.qcls = _GetQueryImplementation(self.op.what)
5278 def ExpandNames(self):
5279 self.needed_locks = {}
5281 def Exec(self, feedback_fn):
5282 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5285 class LUNodeModifyStorage(NoHooksLU):
5286 """Logical unit for modifying a storage volume on a node.
5291 def CheckArguments(self):
5292 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5294 storage_type = self.op.storage_type
5297 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5299 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5300 " modified" % storage_type,
5303 diff = set(self.op.changes.keys()) - modifiable
5305 raise errors.OpPrereqError("The following fields can not be modified for"
5306 " storage units of type '%s': %r" %
5307 (storage_type, list(diff)),
5310 def ExpandNames(self):
5311 self.needed_locks = {
5312 locking.LEVEL_NODE: self.op.node_name,
5315 def Exec(self, feedback_fn):
5316 """Computes the list of nodes and their attributes.
5319 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5320 result = self.rpc.call_storage_modify(self.op.node_name,
5321 self.op.storage_type, st_args,
5322 self.op.name, self.op.changes)
5323 result.Raise("Failed to modify storage unit '%s' on %s" %
5324 (self.op.name, self.op.node_name))
5327 class LUNodeAdd(LogicalUnit):
5328 """Logical unit for adding node to the cluster.
5332 HTYPE = constants.HTYPE_NODE
5333 _NFLAGS = ["master_capable", "vm_capable"]
5335 def CheckArguments(self):
5336 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5337 # validate/normalize the node name
5338 self.hostname = netutils.GetHostname(name=self.op.node_name,
5339 family=self.primary_ip_family)
5340 self.op.node_name = self.hostname.name
5342 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5343 raise errors.OpPrereqError("Cannot readd the master node",
5346 if self.op.readd and self.op.group:
5347 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5348 " being readded", errors.ECODE_INVAL)
5350 def BuildHooksEnv(self):
5353 This will run on all nodes before, and on all nodes + the new node after.
5357 "OP_TARGET": self.op.node_name,
5358 "NODE_NAME": self.op.node_name,
5359 "NODE_PIP": self.op.primary_ip,
5360 "NODE_SIP": self.op.secondary_ip,
5361 "MASTER_CAPABLE": str(self.op.master_capable),
5362 "VM_CAPABLE": str(self.op.vm_capable),
5365 def BuildHooksNodes(self):
5366 """Build hooks nodes.
5369 # Exclude added node
5370 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5371 post_nodes = pre_nodes + [self.op.node_name, ]
5373 return (pre_nodes, post_nodes)
5375 def CheckPrereq(self):
5376 """Check prerequisites.
5379 - the new node is not already in the config
5381 - its parameters (single/dual homed) matches the cluster
5383 Any errors are signaled by raising errors.OpPrereqError.
5387 hostname = self.hostname
5388 node = hostname.name
5389 primary_ip = self.op.primary_ip = hostname.ip
5390 if self.op.secondary_ip is None:
5391 if self.primary_ip_family == netutils.IP6Address.family:
5392 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5393 " IPv4 address must be given as secondary",
5395 self.op.secondary_ip = primary_ip
5397 secondary_ip = self.op.secondary_ip
5398 if not netutils.IP4Address.IsValid(secondary_ip):
5399 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5400 " address" % secondary_ip, errors.ECODE_INVAL)
5402 node_list = cfg.GetNodeList()
5403 if not self.op.readd and node in node_list:
5404 raise errors.OpPrereqError("Node %s is already in the configuration" %
5405 node, errors.ECODE_EXISTS)
5406 elif self.op.readd and node not in node_list:
5407 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5410 self.changed_primary_ip = False
5412 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5413 if self.op.readd and node == existing_node_name:
5414 if existing_node.secondary_ip != secondary_ip:
5415 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5416 " address configuration as before",
5418 if existing_node.primary_ip != primary_ip:
5419 self.changed_primary_ip = True
5423 if (existing_node.primary_ip == primary_ip or
5424 existing_node.secondary_ip == primary_ip or
5425 existing_node.primary_ip == secondary_ip or
5426 existing_node.secondary_ip == secondary_ip):
5427 raise errors.OpPrereqError("New node ip address(es) conflict with"
5428 " existing node %s" % existing_node.name,
5429 errors.ECODE_NOTUNIQUE)
5431 # After this 'if' block, None is no longer a valid value for the
5432 # _capable op attributes
5434 old_node = self.cfg.GetNodeInfo(node)
5435 assert old_node is not None, "Can't retrieve locked node %s" % node
5436 for attr in self._NFLAGS:
5437 if getattr(self.op, attr) is None:
5438 setattr(self.op, attr, getattr(old_node, attr))
5440 for attr in self._NFLAGS:
5441 if getattr(self.op, attr) is None:
5442 setattr(self.op, attr, True)
5444 if self.op.readd and not self.op.vm_capable:
5445 pri, sec = cfg.GetNodeInstances(node)
5447 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5448 " flag set to false, but it already holds"
5449 " instances" % node,
5452 # check that the type of the node (single versus dual homed) is the
5453 # same as for the master
5454 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5455 master_singlehomed = myself.secondary_ip == myself.primary_ip
5456 newbie_singlehomed = secondary_ip == primary_ip
5457 if master_singlehomed != newbie_singlehomed:
5458 if master_singlehomed:
5459 raise errors.OpPrereqError("The master has no secondary ip but the"
5460 " new node has one",
5463 raise errors.OpPrereqError("The master has a secondary ip but the"
5464 " new node doesn't have one",
5467 # checks reachability
5468 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5469 raise errors.OpPrereqError("Node not reachable by ping",
5470 errors.ECODE_ENVIRON)
5472 if not newbie_singlehomed:
5473 # check reachability from my secondary ip to newbie's secondary ip
5474 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5475 source=myself.secondary_ip):
5476 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5477 " based ping to node daemon port",
5478 errors.ECODE_ENVIRON)
5485 if self.op.master_capable:
5486 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5488 self.master_candidate = False
5491 self.new_node = old_node
5493 node_group = cfg.LookupNodeGroup(self.op.group)
5494 self.new_node = objects.Node(name=node,
5495 primary_ip=primary_ip,
5496 secondary_ip=secondary_ip,
5497 master_candidate=self.master_candidate,
5498 offline=False, drained=False,
5501 if self.op.ndparams:
5502 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5504 if self.op.hv_state:
5505 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5507 if self.op.disk_state:
5508 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5510 def Exec(self, feedback_fn):
5511 """Adds the new node to the cluster.
5514 new_node = self.new_node
5515 node = new_node.name
5517 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5520 # We adding a new node so we assume it's powered
5521 new_node.powered = True
5523 # for re-adds, reset the offline/drained/master-candidate flags;
5524 # we need to reset here, otherwise offline would prevent RPC calls
5525 # later in the procedure; this also means that if the re-add
5526 # fails, we are left with a non-offlined, broken node
5528 new_node.drained = new_node.offline = False # pylint: disable=W0201
5529 self.LogInfo("Readding a node, the offline/drained flags were reset")
5530 # if we demote the node, we do cleanup later in the procedure
5531 new_node.master_candidate = self.master_candidate
5532 if self.changed_primary_ip:
5533 new_node.primary_ip = self.op.primary_ip
5535 # copy the master/vm_capable flags
5536 for attr in self._NFLAGS:
5537 setattr(new_node, attr, getattr(self.op, attr))
5539 # notify the user about any possible mc promotion
5540 if new_node.master_candidate:
5541 self.LogInfo("Node will be a master candidate")
5543 if self.op.ndparams:
5544 new_node.ndparams = self.op.ndparams
5546 new_node.ndparams = {}
5548 if self.op.hv_state:
5549 new_node.hv_state_static = self.new_hv_state
5551 if self.op.disk_state:
5552 new_node.disk_state_static = self.new_disk_state
5554 # check connectivity
5555 result = self.rpc.call_version([node])[node]
5556 result.Raise("Can't get version information from node %s" % node)
5557 if constants.PROTOCOL_VERSION == result.payload:
5558 logging.info("Communication to node %s fine, sw version %s match",
5559 node, result.payload)
5561 raise errors.OpExecError("Version mismatch master version %s,"
5562 " node version %s" %
5563 (constants.PROTOCOL_VERSION, result.payload))
5565 # Add node to our /etc/hosts, and add key to known_hosts
5566 if self.cfg.GetClusterInfo().modify_etc_hosts:
5567 master_node = self.cfg.GetMasterNode()
5568 result = self.rpc.call_etc_hosts_modify(master_node,
5569 constants.ETC_HOSTS_ADD,
5572 result.Raise("Can't update hosts file with new host data")
5574 if new_node.secondary_ip != new_node.primary_ip:
5575 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5578 node_verify_list = [self.cfg.GetMasterNode()]
5579 node_verify_param = {
5580 constants.NV_NODELIST: ([node], {}),
5581 # TODO: do a node-net-test as well?
5584 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5585 self.cfg.GetClusterName())
5586 for verifier in node_verify_list:
5587 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5588 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5590 for failed in nl_payload:
5591 feedback_fn("ssh/hostname verification failed"
5592 " (checking from %s): %s" %
5593 (verifier, nl_payload[failed]))
5594 raise errors.OpExecError("ssh/hostname verification failed")
5597 _RedistributeAncillaryFiles(self)
5598 self.context.ReaddNode(new_node)
5599 # make sure we redistribute the config
5600 self.cfg.Update(new_node, feedback_fn)
5601 # and make sure the new node will not have old files around
5602 if not new_node.master_candidate:
5603 result = self.rpc.call_node_demote_from_mc(new_node.name)
5604 msg = result.fail_msg
5606 self.LogWarning("Node failed to demote itself from master"
5607 " candidate status: %s" % msg)
5609 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5610 additional_vm=self.op.vm_capable)
5611 self.context.AddNode(new_node, self.proc.GetECId())
5614 class LUNodeSetParams(LogicalUnit):
5615 """Modifies the parameters of a node.
5617 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5618 to the node role (as _ROLE_*)
5619 @cvar _R2F: a dictionary from node role to tuples of flags
5620 @cvar _FLAGS: a list of attribute names corresponding to the flags
5623 HPATH = "node-modify"
5624 HTYPE = constants.HTYPE_NODE
5626 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5628 (True, False, False): _ROLE_CANDIDATE,
5629 (False, True, False): _ROLE_DRAINED,
5630 (False, False, True): _ROLE_OFFLINE,
5631 (False, False, False): _ROLE_REGULAR,
5633 _R2F = dict((v, k) for k, v in _F2R.items())
5634 _FLAGS = ["master_candidate", "drained", "offline"]
5636 def CheckArguments(self):
5637 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5638 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5639 self.op.master_capable, self.op.vm_capable,
5640 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5642 if all_mods.count(None) == len(all_mods):
5643 raise errors.OpPrereqError("Please pass at least one modification",
5645 if all_mods.count(True) > 1:
5646 raise errors.OpPrereqError("Can't set the node into more than one"
5647 " state at the same time",
5650 # Boolean value that tells us whether we might be demoting from MC
5651 self.might_demote = (self.op.master_candidate == False or
5652 self.op.offline == True or
5653 self.op.drained == True or
5654 self.op.master_capable == False)
5656 if self.op.secondary_ip:
5657 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5658 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5659 " address" % self.op.secondary_ip,
5662 self.lock_all = self.op.auto_promote and self.might_demote
5663 self.lock_instances = self.op.secondary_ip is not None
5665 def _InstanceFilter(self, instance):
5666 """Filter for getting affected instances.
5669 return (instance.disk_template in constants.DTS_INT_MIRROR and
5670 self.op.node_name in instance.all_nodes)
5672 def ExpandNames(self):
5674 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5676 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5678 # Since modifying a node can have severe effects on currently running
5679 # operations the resource lock is at least acquired in shared mode
5680 self.needed_locks[locking.LEVEL_NODE_RES] = \
5681 self.needed_locks[locking.LEVEL_NODE]
5683 # Get node resource and instance locks in shared mode; they are not used
5684 # for anything but read-only access
5685 self.share_locks[locking.LEVEL_NODE_RES] = 1
5686 self.share_locks[locking.LEVEL_INSTANCE] = 1
5688 if self.lock_instances:
5689 self.needed_locks[locking.LEVEL_INSTANCE] = \
5690 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5692 def BuildHooksEnv(self):
5695 This runs on the master node.
5699 "OP_TARGET": self.op.node_name,
5700 "MASTER_CANDIDATE": str(self.op.master_candidate),
5701 "OFFLINE": str(self.op.offline),
5702 "DRAINED": str(self.op.drained),
5703 "MASTER_CAPABLE": str(self.op.master_capable),
5704 "VM_CAPABLE": str(self.op.vm_capable),
5707 def BuildHooksNodes(self):
5708 """Build hooks nodes.
5711 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5714 def CheckPrereq(self):
5715 """Check prerequisites.
5717 This only checks the instance list against the existing names.
5720 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5722 if self.lock_instances:
5723 affected_instances = \
5724 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5726 # Verify instance locks
5727 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5728 wanted_instances = frozenset(affected_instances.keys())
5729 if wanted_instances - owned_instances:
5730 raise errors.OpPrereqError("Instances affected by changing node %s's"
5731 " secondary IP address have changed since"
5732 " locks were acquired, wanted '%s', have"
5733 " '%s'; retry the operation" %
5735 utils.CommaJoin(wanted_instances),
5736 utils.CommaJoin(owned_instances)),
5739 affected_instances = None
5741 if (self.op.master_candidate is not None or
5742 self.op.drained is not None or
5743 self.op.offline is not None):
5744 # we can't change the master's node flags
5745 if self.op.node_name == self.cfg.GetMasterNode():
5746 raise errors.OpPrereqError("The master role can be changed"
5747 " only via master-failover",
5750 if self.op.master_candidate and not node.master_capable:
5751 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5752 " it a master candidate" % node.name,
5755 if self.op.vm_capable == False:
5756 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5758 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5759 " the vm_capable flag" % node.name,
5762 if node.master_candidate and self.might_demote and not self.lock_all:
5763 assert not self.op.auto_promote, "auto_promote set but lock_all not"
5764 # check if after removing the current node, we're missing master
5766 (mc_remaining, mc_should, _) = \
5767 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5768 if mc_remaining < mc_should:
5769 raise errors.OpPrereqError("Not enough master candidates, please"
5770 " pass auto promote option to allow"
5771 " promotion", errors.ECODE_STATE)
5773 self.old_flags = old_flags = (node.master_candidate,
5774 node.drained, node.offline)
5775 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5776 self.old_role = old_role = self._F2R[old_flags]
5778 # Check for ineffective changes
5779 for attr in self._FLAGS:
5780 if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5781 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5782 setattr(self.op, attr, None)
5784 # Past this point, any flag change to False means a transition
5785 # away from the respective state, as only real changes are kept
5787 # TODO: We might query the real power state if it supports OOB
5788 if _SupportsOob(self.cfg, node):
5789 if self.op.offline is False and not (node.powered or
5790 self.op.powered == True):
5791 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5792 " offline status can be reset") %
5794 elif self.op.powered is not None:
5795 raise errors.OpPrereqError(("Unable to change powered state for node %s"
5796 " as it does not support out-of-band"
5797 " handling") % self.op.node_name)
5799 # If we're being deofflined/drained, we'll MC ourself if needed
5800 if (self.op.drained == False or self.op.offline == False or
5801 (self.op.master_capable and not node.master_capable)):
5802 if _DecideSelfPromotion(self):
5803 self.op.master_candidate = True
5804 self.LogInfo("Auto-promoting node to master candidate")
5806 # If we're no longer master capable, we'll demote ourselves from MC
5807 if self.op.master_capable == False and node.master_candidate:
5808 self.LogInfo("Demoting from master candidate")
5809 self.op.master_candidate = False
5812 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5813 if self.op.master_candidate:
5814 new_role = self._ROLE_CANDIDATE
5815 elif self.op.drained:
5816 new_role = self._ROLE_DRAINED
5817 elif self.op.offline:
5818 new_role = self._ROLE_OFFLINE
5819 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5820 # False is still in new flags, which means we're un-setting (the
5822 new_role = self._ROLE_REGULAR
5823 else: # no new flags, nothing, keep old role
5826 self.new_role = new_role
5828 if old_role == self._ROLE_OFFLINE and new_role != old_role:
5829 # Trying to transition out of offline status
5830 # TODO: Use standard RPC runner, but make sure it works when the node is
5831 # still marked offline
5832 result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5834 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5835 " to report its version: %s" %
5836 (node.name, result.fail_msg),
5839 self.LogWarning("Transitioning node from offline to online state"
5840 " without using re-add. Please make sure the node"
5843 if self.op.secondary_ip:
5844 # Ok even without locking, because this can't be changed by any LU
5845 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5846 master_singlehomed = master.secondary_ip == master.primary_ip
5847 if master_singlehomed and self.op.secondary_ip:
5848 raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5849 " homed cluster", errors.ECODE_INVAL)
5851 assert not (frozenset(affected_instances) -
5852 self.owned_locks(locking.LEVEL_INSTANCE))
5855 if affected_instances:
5856 raise errors.OpPrereqError("Cannot change secondary IP address:"
5857 " offline node has instances (%s)"
5858 " configured to use it" %
5859 utils.CommaJoin(affected_instances.keys()))
5861 # On online nodes, check that no instances are running, and that
5862 # the node has the new ip and we can reach it.
5863 for instance in affected_instances.values():
5864 _CheckInstanceState(self, instance, INSTANCE_DOWN,
5865 msg="cannot change secondary ip")
5867 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5868 if master.name != node.name:
5869 # check reachability from master secondary ip to new secondary ip
5870 if not netutils.TcpPing(self.op.secondary_ip,
5871 constants.DEFAULT_NODED_PORT,
5872 source=master.secondary_ip):
5873 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5874 " based ping to node daemon port",
5875 errors.ECODE_ENVIRON)
5877 if self.op.ndparams:
5878 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5879 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5880 self.new_ndparams = new_ndparams
5882 if self.op.hv_state:
5883 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
5884 self.node.hv_state_static)
5886 if self.op.disk_state:
5887 self.new_disk_state = \
5888 _MergeAndVerifyDiskState(self.op.disk_state,
5889 self.node.disk_state_static)
5891 def Exec(self, feedback_fn):
5896 old_role = self.old_role
5897 new_role = self.new_role
5901 if self.op.ndparams:
5902 node.ndparams = self.new_ndparams
5904 if self.op.powered is not None:
5905 node.powered = self.op.powered
5907 if self.op.hv_state:
5908 node.hv_state_static = self.new_hv_state
5910 if self.op.disk_state:
5911 node.disk_state_static = self.new_disk_state
5913 for attr in ["master_capable", "vm_capable"]:
5914 val = getattr(self.op, attr)
5916 setattr(node, attr, val)
5917 result.append((attr, str(val)))
5919 if new_role != old_role:
5920 # Tell the node to demote itself, if no longer MC and not offline
5921 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5922 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5924 self.LogWarning("Node failed to demote itself: %s", msg)
5926 new_flags = self._R2F[new_role]
5927 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5929 result.append((desc, str(nf)))
5930 (node.master_candidate, node.drained, node.offline) = new_flags
5932 # we locked all nodes, we adjust the CP before updating this node
5934 _AdjustCandidatePool(self, [node.name])
5936 if self.op.secondary_ip:
5937 node.secondary_ip = self.op.secondary_ip
5938 result.append(("secondary_ip", self.op.secondary_ip))
5940 # this will trigger configuration file update, if needed
5941 self.cfg.Update(node, feedback_fn)
5943 # this will trigger job queue propagation or cleanup if the mc
5945 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5946 self.context.ReaddNode(node)
5951 class LUNodePowercycle(NoHooksLU):
5952 """Powercycles a node.
5957 def CheckArguments(self):
5958 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5959 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5960 raise errors.OpPrereqError("The node is the master and the force"
5961 " parameter was not set",
5964 def ExpandNames(self):
5965 """Locking for PowercycleNode.
5967 This is a last-resort option and shouldn't block on other
5968 jobs. Therefore, we grab no locks.
5971 self.needed_locks = {}
5973 def Exec(self, feedback_fn):
5977 result = self.rpc.call_node_powercycle(self.op.node_name,
5978 self.cfg.GetHypervisorType())
5979 result.Raise("Failed to schedule the reboot")
5980 return result.payload
5983 class LUClusterQuery(NoHooksLU):
5984 """Query cluster configuration.
5989 def ExpandNames(self):
5990 self.needed_locks = {}
5992 def Exec(self, feedback_fn):
5993 """Return cluster config.
5996 cluster = self.cfg.GetClusterInfo()
5999 # Filter just for enabled hypervisors
6000 for os_name, hv_dict in cluster.os_hvp.items():
6001 os_hvp[os_name] = {}
6002 for hv_name, hv_params in hv_dict.items():
6003 if hv_name in cluster.enabled_hypervisors:
6004 os_hvp[os_name][hv_name] = hv_params
6006 # Convert ip_family to ip_version
6007 primary_ip_version = constants.IP4_VERSION
6008 if cluster.primary_ip_family == netutils.IP6Address.family:
6009 primary_ip_version = constants.IP6_VERSION
6012 "software_version": constants.RELEASE_VERSION,
6013 "protocol_version": constants.PROTOCOL_VERSION,
6014 "config_version": constants.CONFIG_VERSION,
6015 "os_api_version": max(constants.OS_API_VERSIONS),
6016 "export_version": constants.EXPORT_VERSION,
6017 "architecture": (platform.architecture()[0], platform.machine()),
6018 "name": cluster.cluster_name,
6019 "master": cluster.master_node,
6020 "default_hypervisor": cluster.primary_hypervisor,
6021 "enabled_hypervisors": cluster.enabled_hypervisors,
6022 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6023 for hypervisor_name in cluster.enabled_hypervisors]),
6025 "beparams": cluster.beparams,
6026 "osparams": cluster.osparams,
6027 "ipolicy": cluster.ipolicy,
6028 "nicparams": cluster.nicparams,
6029 "ndparams": cluster.ndparams,
6030 "candidate_pool_size": cluster.candidate_pool_size,
6031 "master_netdev": cluster.master_netdev,
6032 "master_netmask": cluster.master_netmask,
6033 "use_external_mip_script": cluster.use_external_mip_script,
6034 "volume_group_name": cluster.volume_group_name,
6035 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6036 "file_storage_dir": cluster.file_storage_dir,
6037 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6038 "maintain_node_health": cluster.maintain_node_health,
6039 "ctime": cluster.ctime,
6040 "mtime": cluster.mtime,
6041 "uuid": cluster.uuid,
6042 "tags": list(cluster.GetTags()),
6043 "uid_pool": cluster.uid_pool,
6044 "default_iallocator": cluster.default_iallocator,
6045 "reserved_lvs": cluster.reserved_lvs,
6046 "primary_ip_version": primary_ip_version,
6047 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6048 "hidden_os": cluster.hidden_os,
6049 "blacklisted_os": cluster.blacklisted_os,
6055 class LUClusterConfigQuery(NoHooksLU):
6056 """Return configuration values.
6060 _FIELDS_DYNAMIC = utils.FieldSet()
6061 _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
6062 "watcher_pause", "volume_group_name")
6064 def CheckArguments(self):
6065 _CheckOutputFields(static=self._FIELDS_STATIC,
6066 dynamic=self._FIELDS_DYNAMIC,
6067 selected=self.op.output_fields)
6069 def ExpandNames(self):
6070 self.needed_locks = {}
6072 def Exec(self, feedback_fn):
6073 """Dump a representation of the cluster config to the standard output.
6077 for field in self.op.output_fields:
6078 if field == "cluster_name":
6079 entry = self.cfg.GetClusterName()
6080 elif field == "master_node":
6081 entry = self.cfg.GetMasterNode()
6082 elif field == "drain_flag":
6083 entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
6084 elif field == "watcher_pause":
6085 entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
6086 elif field == "volume_group_name":
6087 entry = self.cfg.GetVGName()
6089 raise errors.ParameterError(field)
6090 values.append(entry)
6094 class LUInstanceActivateDisks(NoHooksLU):
6095 """Bring up an instance's disks.
6100 def ExpandNames(self):
6101 self._ExpandAndLockInstance()
6102 self.needed_locks[locking.LEVEL_NODE] = []
6103 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6105 def DeclareLocks(self, level):
6106 if level == locking.LEVEL_NODE:
6107 self._LockInstancesNodes()
6109 def CheckPrereq(self):
6110 """Check prerequisites.
6112 This checks that the instance is in the cluster.
6115 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6116 assert self.instance is not None, \
6117 "Cannot retrieve locked instance %s" % self.op.instance_name
6118 _CheckNodeOnline(self, self.instance.primary_node)
6120 def Exec(self, feedback_fn):
6121 """Activate the disks.
6124 disks_ok, disks_info = \
6125 _AssembleInstanceDisks(self, self.instance,
6126 ignore_size=self.op.ignore_size)
6128 raise errors.OpExecError("Cannot activate block devices")
6133 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6135 """Prepare the block devices for an instance.
6137 This sets up the block devices on all nodes.
6139 @type lu: L{LogicalUnit}
6140 @param lu: the logical unit on whose behalf we execute
6141 @type instance: L{objects.Instance}
6142 @param instance: the instance for whose disks we assemble
6143 @type disks: list of L{objects.Disk} or None
6144 @param disks: which disks to assemble (or all, if None)
6145 @type ignore_secondaries: boolean
6146 @param ignore_secondaries: if true, errors on secondary nodes
6147 won't result in an error return from the function
6148 @type ignore_size: boolean
6149 @param ignore_size: if true, the current known size of the disk
6150 will not be used during the disk activation, useful for cases
6151 when the size is wrong
6152 @return: False if the operation failed, otherwise a list of
6153 (host, instance_visible_name, node_visible_name)
6154 with the mapping from node devices to instance devices
6159 iname = instance.name
6160 disks = _ExpandCheckDisks(instance, disks)
6162 # With the two passes mechanism we try to reduce the window of
6163 # opportunity for the race condition of switching DRBD to primary
6164 # before handshaking occured, but we do not eliminate it
6166 # The proper fix would be to wait (with some limits) until the
6167 # connection has been made and drbd transitions from WFConnection
6168 # into any other network-connected state (Connected, SyncTarget,
6171 # 1st pass, assemble on all nodes in secondary mode
6172 for idx, inst_disk in enumerate(disks):
6173 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6175 node_disk = node_disk.Copy()
6176 node_disk.UnsetSize()
6177 lu.cfg.SetDiskID(node_disk, node)
6178 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
6179 msg = result.fail_msg
6181 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6182 " (is_primary=False, pass=1): %s",
6183 inst_disk.iv_name, node, msg)
6184 if not ignore_secondaries:
6187 # FIXME: race condition on drbd migration to primary
6189 # 2nd pass, do only the primary node
6190 for idx, inst_disk in enumerate(disks):
6193 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6194 if node != instance.primary_node:
6197 node_disk = node_disk.Copy()
6198 node_disk.UnsetSize()
6199 lu.cfg.SetDiskID(node_disk, node)
6200 result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
6201 msg = result.fail_msg
6203 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6204 " (is_primary=True, pass=2): %s",
6205 inst_disk.iv_name, node, msg)
6208 dev_path = result.payload
6210 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6212 # leave the disks configured for the primary node
6213 # this is a workaround that would be fixed better by
6214 # improving the logical/physical id handling
6216 lu.cfg.SetDiskID(disk, instance.primary_node)
6218 return disks_ok, device_info
6221 def _StartInstanceDisks(lu, instance, force):
6222 """Start the disks of an instance.
6225 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6226 ignore_secondaries=force)
6228 _ShutdownInstanceDisks(lu, instance)
6229 if force is not None and not force:
6230 lu.proc.LogWarning("", hint="If the message above refers to a"
6232 " you can retry the operation using '--force'.")
6233 raise errors.OpExecError("Disk consistency error")
6236 class LUInstanceDeactivateDisks(NoHooksLU):
6237 """Shutdown an instance's disks.
6242 def ExpandNames(self):
6243 self._ExpandAndLockInstance()
6244 self.needed_locks[locking.LEVEL_NODE] = []
6245 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6247 def DeclareLocks(self, level):
6248 if level == locking.LEVEL_NODE:
6249 self._LockInstancesNodes()
6251 def CheckPrereq(self):
6252 """Check prerequisites.
6254 This checks that the instance is in the cluster.
6257 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258 assert self.instance is not None, \
6259 "Cannot retrieve locked instance %s" % self.op.instance_name
6261 def Exec(self, feedback_fn):
6262 """Deactivate the disks
6265 instance = self.instance
6267 _ShutdownInstanceDisks(self, instance)
6269 _SafeShutdownInstanceDisks(self, instance)
6272 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6273 """Shutdown block devices of an instance.
6275 This function checks if an instance is running, before calling
6276 _ShutdownInstanceDisks.
6279 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6280 _ShutdownInstanceDisks(lu, instance, disks=disks)
6283 def _ExpandCheckDisks(instance, disks):
6284 """Return the instance disks selected by the disks list
6286 @type disks: list of L{objects.Disk} or None
6287 @param disks: selected disks
6288 @rtype: list of L{objects.Disk}
6289 @return: selected instance disks to act on
6293 return instance.disks
6295 if not set(disks).issubset(instance.disks):
6296 raise errors.ProgrammerError("Can only act on disks belonging to the"
6301 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6302 """Shutdown block devices of an instance.
6304 This does the shutdown on all nodes of the instance.
6306 If the ignore_primary is false, errors on the primary node are
6311 disks = _ExpandCheckDisks(instance, disks)
6314 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6315 lu.cfg.SetDiskID(top_disk, node)
6316 result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6317 msg = result.fail_msg
6319 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6320 disk.iv_name, node, msg)
6321 if ((node == instance.primary_node and not ignore_primary) or
6322 (node != instance.primary_node and not result.offline)):
6327 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6328 """Checks if a node has enough free memory.
6330 This function check if a given node has the needed amount of free
6331 memory. In case the node has less memory or we cannot get the
6332 information from the node, this function raise an OpPrereqError
6335 @type lu: C{LogicalUnit}
6336 @param lu: a logical unit from which we get configuration data
6338 @param node: the node to check
6339 @type reason: C{str}
6340 @param reason: string to use in the error message
6341 @type requested: C{int}
6342 @param requested: the amount of memory in MiB to check for
6343 @type hypervisor_name: C{str}
6344 @param hypervisor_name: the hypervisor to ask for memory stats
6345 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6346 we cannot check the node
6349 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6350 nodeinfo[node].Raise("Can't get data from node %s" % node,
6351 prereq=True, ecode=errors.ECODE_ENVIRON)
6352 (_, _, (hv_info, )) = nodeinfo[node].payload
6354 free_mem = hv_info.get("memory_free", None)
6355 if not isinstance(free_mem, int):
6356 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6357 " was '%s'" % (node, free_mem),
6358 errors.ECODE_ENVIRON)
6359 if requested > free_mem:
6360 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6361 " needed %s MiB, available %s MiB" %
6362 (node, reason, requested, free_mem),
6366 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6367 """Checks if nodes have enough free disk space in the all VGs.
6369 This function check if all given nodes have the needed amount of
6370 free disk. In case any node has less disk or we cannot get the
6371 information from the node, this function raise an OpPrereqError
6374 @type lu: C{LogicalUnit}
6375 @param lu: a logical unit from which we get configuration data
6376 @type nodenames: C{list}
6377 @param nodenames: the list of node names to check
6378 @type req_sizes: C{dict}
6379 @param req_sizes: the hash of vg and corresponding amount of disk in
6381 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6382 or we cannot check the node
6385 for vg, req_size in req_sizes.items():
6386 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6389 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6390 """Checks if nodes have enough free disk space in the specified VG.
6392 This function check if all given nodes have the needed amount of
6393 free disk. In case any node has less disk or we cannot get the
6394 information from the node, this function raise an OpPrereqError
6397 @type lu: C{LogicalUnit}
6398 @param lu: a logical unit from which we get configuration data
6399 @type nodenames: C{list}
6400 @param nodenames: the list of node names to check
6402 @param vg: the volume group to check
6403 @type requested: C{int}
6404 @param requested: the amount of disk in MiB to check for
6405 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6406 or we cannot check the node
6409 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6410 for node in nodenames:
6411 info = nodeinfo[node]
6412 info.Raise("Cannot get current information from node %s" % node,
6413 prereq=True, ecode=errors.ECODE_ENVIRON)
6414 (_, (vg_info, ), _) = info.payload
6415 vg_free = vg_info.get("vg_free", None)
6416 if not isinstance(vg_free, int):
6417 raise errors.OpPrereqError("Can't compute free disk space on node"
6418 " %s for vg %s, result was '%s'" %
6419 (node, vg, vg_free), errors.ECODE_ENVIRON)
6420 if requested > vg_free:
6421 raise errors.OpPrereqError("Not enough disk space on target node %s"
6422 " vg %s: required %d MiB, available %d MiB" %
6423 (node, vg, requested, vg_free),
6427 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6428 """Checks if nodes have enough physical CPUs
6430 This function checks if all given nodes have the needed number of
6431 physical CPUs. In case any node has less CPUs or we cannot get the
6432 information from the node, this function raises an OpPrereqError
6435 @type lu: C{LogicalUnit}
6436 @param lu: a logical unit from which we get configuration data
6437 @type nodenames: C{list}
6438 @param nodenames: the list of node names to check
6439 @type requested: C{int}
6440 @param requested: the minimum acceptable number of physical CPUs
6441 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6442 or we cannot check the node
6445 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6446 for node in nodenames:
6447 info = nodeinfo[node]
6448 info.Raise("Cannot get current information from node %s" % node,
6449 prereq=True, ecode=errors.ECODE_ENVIRON)
6450 (_, _, (hv_info, )) = info.payload
6451 num_cpus = hv_info.get("cpu_total", None)
6452 if not isinstance(num_cpus, int):
6453 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6454 " on node %s, result was '%s'" %
6455 (node, num_cpus), errors.ECODE_ENVIRON)
6456 if requested > num_cpus:
6457 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6458 "required" % (node, num_cpus, requested),
6462 class LUInstanceStartup(LogicalUnit):
6463 """Starts an instance.
6466 HPATH = "instance-start"
6467 HTYPE = constants.HTYPE_INSTANCE
6470 def CheckArguments(self):
6472 if self.op.beparams:
6473 # fill the beparams dict
6474 objects.UpgradeBeParams(self.op.beparams)
6475 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6477 def ExpandNames(self):
6478 self._ExpandAndLockInstance()
6480 def BuildHooksEnv(self):
6483 This runs on master, primary and secondary nodes of the instance.
6487 "FORCE": self.op.force,
6490 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6494 def BuildHooksNodes(self):
6495 """Build hooks nodes.
6498 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6501 def CheckPrereq(self):
6502 """Check prerequisites.
6504 This checks that the instance is in the cluster.
6507 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6508 assert self.instance is not None, \
6509 "Cannot retrieve locked instance %s" % self.op.instance_name
6512 if self.op.hvparams:
6513 # check hypervisor parameter syntax (locally)
6514 cluster = self.cfg.GetClusterInfo()
6515 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6516 filled_hvp = cluster.FillHV(instance)
6517 filled_hvp.update(self.op.hvparams)
6518 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6519 hv_type.CheckParameterSyntax(filled_hvp)
6520 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6522 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6524 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6526 if self.primary_offline and self.op.ignore_offline_nodes:
6527 self.proc.LogWarning("Ignoring offline primary node")
6529 if self.op.hvparams or self.op.beparams:
6530 self.proc.LogWarning("Overridden parameters are ignored")
6532 _CheckNodeOnline(self, instance.primary_node)
6534 bep = self.cfg.GetClusterInfo().FillBE(instance)
6536 # check bridges existence
6537 _CheckInstanceBridgesExist(self, instance)
6539 remote_info = self.rpc.call_instance_info(instance.primary_node,
6541 instance.hypervisor)
6542 remote_info.Raise("Error checking node %s" % instance.primary_node,
6543 prereq=True, ecode=errors.ECODE_ENVIRON)
6544 if not remote_info.payload: # not running already
6545 _CheckNodeFreeMemory(self, instance.primary_node,
6546 "starting instance %s" % instance.name,
6547 bep[constants.BE_MAXMEM], instance.hypervisor)
6549 def Exec(self, feedback_fn):
6550 """Start the instance.
6553 instance = self.instance
6554 force = self.op.force
6556 if not self.op.no_remember:
6557 self.cfg.MarkInstanceUp(instance.name)
6559 if self.primary_offline:
6560 assert self.op.ignore_offline_nodes
6561 self.proc.LogInfo("Primary node offline, marked instance as started")
6563 node_current = instance.primary_node
6565 _StartInstanceDisks(self, instance, force)
6568 self.rpc.call_instance_start(node_current,
6569 (instance, self.op.hvparams,
6571 self.op.startup_paused)
6572 msg = result.fail_msg
6574 _ShutdownInstanceDisks(self, instance)
6575 raise errors.OpExecError("Could not start instance: %s" % msg)
6578 class LUInstanceReboot(LogicalUnit):
6579 """Reboot an instance.
6582 HPATH = "instance-reboot"
6583 HTYPE = constants.HTYPE_INSTANCE
6586 def ExpandNames(self):
6587 self._ExpandAndLockInstance()
6589 def BuildHooksEnv(self):
6592 This runs on master, primary and secondary nodes of the instance.
6596 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6597 "REBOOT_TYPE": self.op.reboot_type,
6598 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6601 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6605 def BuildHooksNodes(self):
6606 """Build hooks nodes.
6609 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6612 def CheckPrereq(self):
6613 """Check prerequisites.
6615 This checks that the instance is in the cluster.
6618 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6619 assert self.instance is not None, \
6620 "Cannot retrieve locked instance %s" % self.op.instance_name
6621 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6622 _CheckNodeOnline(self, instance.primary_node)
6624 # check bridges existence
6625 _CheckInstanceBridgesExist(self, instance)
6627 def Exec(self, feedback_fn):
6628 """Reboot the instance.
6631 instance = self.instance
6632 ignore_secondaries = self.op.ignore_secondaries
6633 reboot_type = self.op.reboot_type
6635 remote_info = self.rpc.call_instance_info(instance.primary_node,
6637 instance.hypervisor)
6638 remote_info.Raise("Error checking node %s" % instance.primary_node)
6639 instance_running = bool(remote_info.payload)
6641 node_current = instance.primary_node
6643 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6644 constants.INSTANCE_REBOOT_HARD]:
6645 for disk in instance.disks:
6646 self.cfg.SetDiskID(disk, node_current)
6647 result = self.rpc.call_instance_reboot(node_current, instance,
6649 self.op.shutdown_timeout)
6650 result.Raise("Could not reboot instance")
6652 if instance_running:
6653 result = self.rpc.call_instance_shutdown(node_current, instance,
6654 self.op.shutdown_timeout)
6655 result.Raise("Could not shutdown instance for full reboot")
6656 _ShutdownInstanceDisks(self, instance)
6658 self.LogInfo("Instance %s was already stopped, starting now",
6660 _StartInstanceDisks(self, instance, ignore_secondaries)
6661 result = self.rpc.call_instance_start(node_current,
6662 (instance, None, None), False)
6663 msg = result.fail_msg
6665 _ShutdownInstanceDisks(self, instance)
6666 raise errors.OpExecError("Could not start instance for"
6667 " full reboot: %s" % msg)
6669 self.cfg.MarkInstanceUp(instance.name)
6672 class LUInstanceShutdown(LogicalUnit):
6673 """Shutdown an instance.
6676 HPATH = "instance-stop"
6677 HTYPE = constants.HTYPE_INSTANCE
6680 def ExpandNames(self):
6681 self._ExpandAndLockInstance()
6683 def BuildHooksEnv(self):
6686 This runs on master, primary and secondary nodes of the instance.
6689 env = _BuildInstanceHookEnvByObject(self, self.instance)
6690 env["TIMEOUT"] = self.op.timeout
6693 def BuildHooksNodes(self):
6694 """Build hooks nodes.
6697 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6700 def CheckPrereq(self):
6701 """Check prerequisites.
6703 This checks that the instance is in the cluster.
6706 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6707 assert self.instance is not None, \
6708 "Cannot retrieve locked instance %s" % self.op.instance_name
6710 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6712 self.primary_offline = \
6713 self.cfg.GetNodeInfo(self.instance.primary_node).offline
6715 if self.primary_offline and self.op.ignore_offline_nodes:
6716 self.proc.LogWarning("Ignoring offline primary node")
6718 _CheckNodeOnline(self, self.instance.primary_node)
6720 def Exec(self, feedback_fn):
6721 """Shutdown the instance.
6724 instance = self.instance
6725 node_current = instance.primary_node
6726 timeout = self.op.timeout
6728 if not self.op.no_remember:
6729 self.cfg.MarkInstanceDown(instance.name)
6731 if self.primary_offline:
6732 assert self.op.ignore_offline_nodes
6733 self.proc.LogInfo("Primary node offline, marked instance as stopped")
6735 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6736 msg = result.fail_msg
6738 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6740 _ShutdownInstanceDisks(self, instance)
6743 class LUInstanceReinstall(LogicalUnit):
6744 """Reinstall an instance.
6747 HPATH = "instance-reinstall"
6748 HTYPE = constants.HTYPE_INSTANCE
6751 def ExpandNames(self):
6752 self._ExpandAndLockInstance()
6754 def BuildHooksEnv(self):
6757 This runs on master, primary and secondary nodes of the instance.
6760 return _BuildInstanceHookEnvByObject(self, self.instance)
6762 def BuildHooksNodes(self):
6763 """Build hooks nodes.
6766 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6769 def CheckPrereq(self):
6770 """Check prerequisites.
6772 This checks that the instance is in the cluster and is not running.
6775 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6776 assert instance is not None, \
6777 "Cannot retrieve locked instance %s" % self.op.instance_name
6778 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6779 " offline, cannot reinstall")
6780 for node in instance.secondary_nodes:
6781 _CheckNodeOnline(self, node, "Instance secondary node offline,"
6782 " cannot reinstall")
6784 if instance.disk_template == constants.DT_DISKLESS:
6785 raise errors.OpPrereqError("Instance '%s' has no disks" %
6786 self.op.instance_name,
6788 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6790 if self.op.os_type is not None:
6792 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6793 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6794 instance_os = self.op.os_type
6796 instance_os = instance.os
6798 nodelist = list(instance.all_nodes)
6800 if self.op.osparams:
6801 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6802 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6803 self.os_inst = i_osdict # the new dict (without defaults)
6807 self.instance = instance
6809 def Exec(self, feedback_fn):
6810 """Reinstall the instance.
6813 inst = self.instance
6815 if self.op.os_type is not None:
6816 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6817 inst.os = self.op.os_type
6818 # Write to configuration
6819 self.cfg.Update(inst, feedback_fn)
6821 _StartInstanceDisks(self, inst, None)
6823 feedback_fn("Running the instance OS create scripts...")
6824 # FIXME: pass debug option from opcode to backend
6825 result = self.rpc.call_instance_os_add(inst.primary_node,
6826 (inst, self.os_inst), True,
6827 self.op.debug_level)
6828 result.Raise("Could not install OS for instance %s on node %s" %
6829 (inst.name, inst.primary_node))
6831 _ShutdownInstanceDisks(self, inst)
6834 class LUInstanceRecreateDisks(LogicalUnit):
6835 """Recreate an instance's missing disks.
6838 HPATH = "instance-recreate-disks"
6839 HTYPE = constants.HTYPE_INSTANCE
6842 def CheckArguments(self):
6843 # normalise the disk list
6844 self.op.disks = sorted(frozenset(self.op.disks))
6846 def ExpandNames(self):
6847 self._ExpandAndLockInstance()
6848 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6850 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6851 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6853 self.needed_locks[locking.LEVEL_NODE] = []
6855 def DeclareLocks(self, level):
6856 if level == locking.LEVEL_NODE:
6857 # if we replace the nodes, we only need to lock the old primary,
6858 # otherwise we need to lock all nodes for disk re-creation
6859 primary_only = bool(self.op.nodes)
6860 self._LockInstancesNodes(primary_only=primary_only)
6861 elif level == locking.LEVEL_NODE_RES:
6863 self.needed_locks[locking.LEVEL_NODE_RES] = \
6864 self.needed_locks[locking.LEVEL_NODE][:]
6866 def BuildHooksEnv(self):
6869 This runs on master, primary and secondary nodes of the instance.
6872 return _BuildInstanceHookEnvByObject(self, self.instance)
6874 def BuildHooksNodes(self):
6875 """Build hooks nodes.
6878 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6881 def CheckPrereq(self):
6882 """Check prerequisites.
6884 This checks that the instance is in the cluster and is not running.
6887 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6888 assert instance is not None, \
6889 "Cannot retrieve locked instance %s" % self.op.instance_name
6891 if len(self.op.nodes) != len(instance.all_nodes):
6892 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6893 " %d replacement nodes were specified" %
6894 (instance.name, len(instance.all_nodes),
6895 len(self.op.nodes)),
6897 assert instance.disk_template != constants.DT_DRBD8 or \
6898 len(self.op.nodes) == 2
6899 assert instance.disk_template != constants.DT_PLAIN or \
6900 len(self.op.nodes) == 1
6901 primary_node = self.op.nodes[0]
6903 primary_node = instance.primary_node
6904 _CheckNodeOnline(self, primary_node)
6906 if instance.disk_template == constants.DT_DISKLESS:
6907 raise errors.OpPrereqError("Instance '%s' has no disks" %
6908 self.op.instance_name, errors.ECODE_INVAL)
6909 # if we replace nodes *and* the old primary is offline, we don't
6911 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6912 assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6913 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6914 if not (self.op.nodes and old_pnode.offline):
6915 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6916 msg="cannot recreate disks")
6918 if not self.op.disks:
6919 self.op.disks = range(len(instance.disks))
6921 for idx in self.op.disks:
6922 if idx >= len(instance.disks):
6923 raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6925 if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6926 raise errors.OpPrereqError("Can't recreate disks partially and"
6927 " change the nodes at the same time",
6929 self.instance = instance
6931 def Exec(self, feedback_fn):
6932 """Recreate the disks.
6935 instance = self.instance
6937 assert (self.owned_locks(locking.LEVEL_NODE) ==
6938 self.owned_locks(locking.LEVEL_NODE_RES))
6941 mods = [] # keeps track of needed logical_id changes
6943 for idx, disk in enumerate(instance.disks):
6944 if idx not in self.op.disks: # disk idx has not been passed in
6947 # update secondaries for disks, if needed
6949 if disk.dev_type == constants.LD_DRBD8:
6950 # need to update the nodes and minors
6951 assert len(self.op.nodes) == 2
6952 assert len(disk.logical_id) == 6 # otherwise disk internals
6954 (_, _, old_port, _, _, old_secret) = disk.logical_id
6955 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6956 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6957 new_minors[0], new_minors[1], old_secret)
6958 assert len(disk.logical_id) == len(new_id)
6959 mods.append((idx, new_id))
6961 # now that we have passed all asserts above, we can apply the mods
6962 # in a single run (to avoid partial changes)
6963 for idx, new_id in mods:
6964 instance.disks[idx].logical_id = new_id
6966 # change primary node, if needed
6968 instance.primary_node = self.op.nodes[0]
6969 self.LogWarning("Changing the instance's nodes, you will have to"
6970 " remove any disks left on the older nodes manually")
6973 self.cfg.Update(instance, feedback_fn)
6975 _CreateDisks(self, instance, to_skip=to_skip)
6978 class LUInstanceRename(LogicalUnit):
6979 """Rename an instance.
6982 HPATH = "instance-rename"
6983 HTYPE = constants.HTYPE_INSTANCE
6985 def CheckArguments(self):
6989 if self.op.ip_check and not self.op.name_check:
6990 # TODO: make the ip check more flexible and not depend on the name check
6991 raise errors.OpPrereqError("IP address check requires a name check",
6994 def BuildHooksEnv(self):
6997 This runs on master, primary and secondary nodes of the instance.
7000 env = _BuildInstanceHookEnvByObject(self, self.instance)
7001 env["INSTANCE_NEW_NAME"] = self.op.new_name
7004 def BuildHooksNodes(self):
7005 """Build hooks nodes.
7008 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7011 def CheckPrereq(self):
7012 """Check prerequisites.
7014 This checks that the instance is in the cluster and is not running.
7017 self.op.instance_name = _ExpandInstanceName(self.cfg,
7018 self.op.instance_name)
7019 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7020 assert instance is not None
7021 _CheckNodeOnline(self, instance.primary_node)
7022 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7023 msg="cannot rename")
7024 self.instance = instance
7026 new_name = self.op.new_name
7027 if self.op.name_check:
7028 hostname = netutils.GetHostname(name=new_name)
7029 if hostname.name != new_name:
7030 self.LogInfo("Resolved given name '%s' to '%s'", new_name,
7032 if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
7033 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
7034 " same as given hostname '%s'") %
7035 (hostname.name, self.op.new_name),
7037 new_name = self.op.new_name = hostname.name
7038 if (self.op.ip_check and
7039 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7040 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7041 (hostname.ip, new_name),
7042 errors.ECODE_NOTUNIQUE)
7044 instance_list = self.cfg.GetInstanceList()
7045 if new_name in instance_list and new_name != instance.name:
7046 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7047 new_name, errors.ECODE_EXISTS)
7049 def Exec(self, feedback_fn):
7050 """Rename the instance.
7053 inst = self.instance
7054 old_name = inst.name
7056 rename_file_storage = False
7057 if (inst.disk_template in constants.DTS_FILEBASED and
7058 self.op.new_name != inst.name):
7059 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7060 rename_file_storage = True
7062 self.cfg.RenameInstance(inst.name, self.op.new_name)
7063 # Change the instance lock. This is definitely safe while we hold the BGL.
7064 # Otherwise the new lock would have to be added in acquired mode.
7066 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7067 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7069 # re-read the instance from the configuration after rename
7070 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7072 if rename_file_storage:
7073 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7074 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7075 old_file_storage_dir,
7076 new_file_storage_dir)
7077 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7078 " (but the instance has been renamed in Ganeti)" %
7079 (inst.primary_node, old_file_storage_dir,
7080 new_file_storage_dir))
7082 _StartInstanceDisks(self, inst, None)
7084 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7085 old_name, self.op.debug_level)
7086 msg = result.fail_msg
7088 msg = ("Could not run OS rename script for instance %s on node %s"
7089 " (but the instance has been renamed in Ganeti): %s" %
7090 (inst.name, inst.primary_node, msg))
7091 self.proc.LogWarning(msg)
7093 _ShutdownInstanceDisks(self, inst)
7098 class LUInstanceRemove(LogicalUnit):
7099 """Remove an instance.
7102 HPATH = "instance-remove"
7103 HTYPE = constants.HTYPE_INSTANCE
7106 def ExpandNames(self):
7107 self._ExpandAndLockInstance()
7108 self.needed_locks[locking.LEVEL_NODE] = []
7109 self.needed_locks[locking.LEVEL_NODE_RES] = []
7110 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7112 def DeclareLocks(self, level):
7113 if level == locking.LEVEL_NODE:
7114 self._LockInstancesNodes()
7115 elif level == locking.LEVEL_NODE_RES:
7117 self.needed_locks[locking.LEVEL_NODE_RES] = \
7118 self.needed_locks[locking.LEVEL_NODE][:]
7120 def BuildHooksEnv(self):
7123 This runs on master, primary and secondary nodes of the instance.
7126 env = _BuildInstanceHookEnvByObject(self, self.instance)
7127 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7130 def BuildHooksNodes(self):
7131 """Build hooks nodes.
7134 nl = [self.cfg.GetMasterNode()]
7135 nl_post = list(self.instance.all_nodes) + nl
7136 return (nl, nl_post)
7138 def CheckPrereq(self):
7139 """Check prerequisites.
7141 This checks that the instance is in the cluster.
7144 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7145 assert self.instance is not None, \
7146 "Cannot retrieve locked instance %s" % self.op.instance_name
7148 def Exec(self, feedback_fn):
7149 """Remove the instance.
7152 instance = self.instance
7153 logging.info("Shutting down instance %s on node %s",
7154 instance.name, instance.primary_node)
7156 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7157 self.op.shutdown_timeout)
7158 msg = result.fail_msg
7160 if self.op.ignore_failures:
7161 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7163 raise errors.OpExecError("Could not shutdown instance %s on"
7165 (instance.name, instance.primary_node, msg))
7167 assert (self.owned_locks(locking.LEVEL_NODE) ==
7168 self.owned_locks(locking.LEVEL_NODE_RES))
7169 assert not (set(instance.all_nodes) -
7170 self.owned_locks(locking.LEVEL_NODE)), \
7171 "Not owning correct locks"
7173 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7176 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7177 """Utility function to remove an instance.
7180 logging.info("Removing block devices for instance %s", instance.name)
7182 if not _RemoveDisks(lu, instance):
7183 if not ignore_failures:
7184 raise errors.OpExecError("Can't remove instance's disks")
7185 feedback_fn("Warning: can't remove instance's disks")
7187 logging.info("Removing instance %s out of cluster config", instance.name)
7189 lu.cfg.RemoveInstance(instance.name)
7191 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7192 "Instance lock removal conflict"
7194 # Remove lock for the instance
7195 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7198 class LUInstanceQuery(NoHooksLU):
7199 """Logical unit for querying instances.
7202 # pylint: disable=W0142
7205 def CheckArguments(self):
7206 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7207 self.op.output_fields, self.op.use_locking)
7209 def ExpandNames(self):
7210 self.iq.ExpandNames(self)
7212 def DeclareLocks(self, level):
7213 self.iq.DeclareLocks(self, level)
7215 def Exec(self, feedback_fn):
7216 return self.iq.OldStyleQuery(self)
7219 class LUInstanceFailover(LogicalUnit):
7220 """Failover an instance.
7223 HPATH = "instance-failover"
7224 HTYPE = constants.HTYPE_INSTANCE
7227 def CheckArguments(self):
7228 """Check the arguments.
7231 self.iallocator = getattr(self.op, "iallocator", None)
7232 self.target_node = getattr(self.op, "target_node", None)
7234 def ExpandNames(self):
7235 self._ExpandAndLockInstance()
7237 if self.op.target_node is not None:
7238 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7240 self.needed_locks[locking.LEVEL_NODE] = []
7241 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7243 ignore_consistency = self.op.ignore_consistency
7244 shutdown_timeout = self.op.shutdown_timeout
7245 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7248 ignore_consistency=ignore_consistency,
7249 shutdown_timeout=shutdown_timeout,
7250 ignore_ipolicy=self.op.ignore_ipolicy)
7251 self.tasklets = [self._migrater]
7253 def DeclareLocks(self, level):
7254 if level == locking.LEVEL_NODE:
7255 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7256 if instance.disk_template in constants.DTS_EXT_MIRROR:
7257 if self.op.target_node is None:
7258 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7260 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7261 self.op.target_node]
7262 del self.recalculate_locks[locking.LEVEL_NODE]
7264 self._LockInstancesNodes()
7266 def BuildHooksEnv(self):
7269 This runs on master, primary and secondary nodes of the instance.
7272 instance = self._migrater.instance
7273 source_node = instance.primary_node
7274 target_node = self.op.target_node
7276 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7277 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7278 "OLD_PRIMARY": source_node,
7279 "NEW_PRIMARY": target_node,
7282 if instance.disk_template in constants.DTS_INT_MIRROR:
7283 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7284 env["NEW_SECONDARY"] = source_node
7286 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7288 env.update(_BuildInstanceHookEnvByObject(self, instance))
7292 def BuildHooksNodes(self):
7293 """Build hooks nodes.
7296 instance = self._migrater.instance
7297 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7298 return (nl, nl + [instance.primary_node])
7301 class LUInstanceMigrate(LogicalUnit):
7302 """Migrate an instance.
7304 This is migration without shutting down, compared to the failover,
7305 which is done with shutdown.
7308 HPATH = "instance-migrate"
7309 HTYPE = constants.HTYPE_INSTANCE
7312 def ExpandNames(self):
7313 self._ExpandAndLockInstance()
7315 if self.op.target_node is not None:
7316 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7318 self.needed_locks[locking.LEVEL_NODE] = []
7319 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7321 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7322 cleanup=self.op.cleanup,
7324 fallback=self.op.allow_failover,
7325 ignore_ipolicy=self.op.ignore_ipolicy)
7326 self.tasklets = [self._migrater]
7328 def DeclareLocks(self, level):
7329 if level == locking.LEVEL_NODE:
7330 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7331 if instance.disk_template in constants.DTS_EXT_MIRROR:
7332 if self.op.target_node is None:
7333 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7335 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7336 self.op.target_node]
7337 del self.recalculate_locks[locking.LEVEL_NODE]
7339 self._LockInstancesNodes()
7341 def BuildHooksEnv(self):
7344 This runs on master, primary and secondary nodes of the instance.
7347 instance = self._migrater.instance
7348 source_node = instance.primary_node
7349 target_node = self.op.target_node
7350 env = _BuildInstanceHookEnvByObject(self, instance)
7352 "MIGRATE_LIVE": self._migrater.live,
7353 "MIGRATE_CLEANUP": self.op.cleanup,
7354 "OLD_PRIMARY": source_node,
7355 "NEW_PRIMARY": target_node,
7358 if instance.disk_template in constants.DTS_INT_MIRROR:
7359 env["OLD_SECONDARY"] = target_node
7360 env["NEW_SECONDARY"] = source_node
7362 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7366 def BuildHooksNodes(self):
7367 """Build hooks nodes.
7370 instance = self._migrater.instance
7371 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7372 return (nl, nl + [instance.primary_node])
7375 class LUInstanceMove(LogicalUnit):
7376 """Move an instance by data-copying.
7379 HPATH = "instance-move"
7380 HTYPE = constants.HTYPE_INSTANCE
7383 def ExpandNames(self):
7384 self._ExpandAndLockInstance()
7385 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7386 self.op.target_node = target_node
7387 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7388 self.needed_locks[locking.LEVEL_NODE_RES] = []
7389 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7391 def DeclareLocks(self, level):
7392 if level == locking.LEVEL_NODE:
7393 self._LockInstancesNodes(primary_only=True)
7394 elif level == locking.LEVEL_NODE_RES:
7396 self.needed_locks[locking.LEVEL_NODE_RES] = \
7397 self.needed_locks[locking.LEVEL_NODE][:]
7399 def BuildHooksEnv(self):
7402 This runs on master, primary and secondary nodes of the instance.
7406 "TARGET_NODE": self.op.target_node,
7407 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7409 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7412 def BuildHooksNodes(self):
7413 """Build hooks nodes.
7417 self.cfg.GetMasterNode(),
7418 self.instance.primary_node,
7419 self.op.target_node,
7423 def CheckPrereq(self):
7424 """Check prerequisites.
7426 This checks that the instance is in the cluster.
7429 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7430 assert self.instance is not None, \
7431 "Cannot retrieve locked instance %s" % self.op.instance_name
7433 node = self.cfg.GetNodeInfo(self.op.target_node)
7434 assert node is not None, \
7435 "Cannot retrieve locked node %s" % self.op.target_node
7437 self.target_node = target_node = node.name
7439 if target_node == instance.primary_node:
7440 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7441 (instance.name, target_node),
7444 bep = self.cfg.GetClusterInfo().FillBE(instance)
7446 for idx, dsk in enumerate(instance.disks):
7447 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7448 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7449 " cannot copy" % idx, errors.ECODE_STATE)
7451 _CheckNodeOnline(self, target_node)
7452 _CheckNodeNotDrained(self, target_node)
7453 _CheckNodeVmCapable(self, target_node)
7454 ipolicy = _CalculateGroupIPolicy(self.cfg.GetClusterInfo(), node.group)
7455 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7456 ignore=self.op.ignore_ipolicy)
7458 if instance.admin_state == constants.ADMINST_UP:
7459 # check memory requirements on the secondary node
7460 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7461 instance.name, bep[constants.BE_MAXMEM],
7462 instance.hypervisor)
7464 self.LogInfo("Not checking memory on the secondary node as"
7465 " instance will not be started")
7467 # check bridge existance
7468 _CheckInstanceBridgesExist(self, instance, node=target_node)
7470 def Exec(self, feedback_fn):
7471 """Move an instance.
7473 The move is done by shutting it down on its present node, copying
7474 the data over (slow) and starting it on the new node.
7477 instance = self.instance
7479 source_node = instance.primary_node
7480 target_node = self.target_node
7482 self.LogInfo("Shutting down instance %s on source node %s",
7483 instance.name, source_node)
7485 assert (self.owned_locks(locking.LEVEL_NODE) ==
7486 self.owned_locks(locking.LEVEL_NODE_RES))
7488 result = self.rpc.call_instance_shutdown(source_node, instance,
7489 self.op.shutdown_timeout)
7490 msg = result.fail_msg
7492 if self.op.ignore_consistency:
7493 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7494 " Proceeding anyway. Please make sure node"
7495 " %s is down. Error details: %s",
7496 instance.name, source_node, source_node, msg)
7498 raise errors.OpExecError("Could not shutdown instance %s on"
7500 (instance.name, source_node, msg))
7502 # create the target disks
7504 _CreateDisks(self, instance, target_node=target_node)
7505 except errors.OpExecError:
7506 self.LogWarning("Device creation failed, reverting...")
7508 _RemoveDisks(self, instance, target_node=target_node)
7510 self.cfg.ReleaseDRBDMinors(instance.name)
7513 cluster_name = self.cfg.GetClusterInfo().cluster_name
7516 # activate, get path, copy the data over
7517 for idx, disk in enumerate(instance.disks):
7518 self.LogInfo("Copying data for disk %d", idx)
7519 result = self.rpc.call_blockdev_assemble(target_node, disk,
7520 instance.name, True, idx)
7522 self.LogWarning("Can't assemble newly created disk %d: %s",
7523 idx, result.fail_msg)
7524 errs.append(result.fail_msg)
7526 dev_path = result.payload
7527 result = self.rpc.call_blockdev_export(source_node, disk,
7528 target_node, dev_path,
7531 self.LogWarning("Can't copy data over for disk %d: %s",
7532 idx, result.fail_msg)
7533 errs.append(result.fail_msg)
7537 self.LogWarning("Some disks failed to copy, aborting")
7539 _RemoveDisks(self, instance, target_node=target_node)
7541 self.cfg.ReleaseDRBDMinors(instance.name)
7542 raise errors.OpExecError("Errors during disk copy: %s" %
7545 instance.primary_node = target_node
7546 self.cfg.Update(instance, feedback_fn)
7548 self.LogInfo("Removing the disks on the original node")
7549 _RemoveDisks(self, instance, target_node=source_node)
7551 # Only start the instance if it's marked as up
7552 if instance.admin_state == constants.ADMINST_UP:
7553 self.LogInfo("Starting instance %s on node %s",
7554 instance.name, target_node)
7556 disks_ok, _ = _AssembleInstanceDisks(self, instance,
7557 ignore_secondaries=True)
7559 _ShutdownInstanceDisks(self, instance)
7560 raise errors.OpExecError("Can't activate the instance's disks")
7562 result = self.rpc.call_instance_start(target_node,
7563 (instance, None, None), False)
7564 msg = result.fail_msg
7566 _ShutdownInstanceDisks(self, instance)
7567 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7568 (instance.name, target_node, msg))
7571 class LUNodeMigrate(LogicalUnit):
7572 """Migrate all instances from a node.
7575 HPATH = "node-migrate"
7576 HTYPE = constants.HTYPE_NODE
7579 def CheckArguments(self):
7582 def ExpandNames(self):
7583 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7585 self.share_locks = _ShareAll()
7586 self.needed_locks = {
7587 locking.LEVEL_NODE: [self.op.node_name],
7590 def BuildHooksEnv(self):
7593 This runs on the master, the primary and all the secondaries.
7597 "NODE_NAME": self.op.node_name,
7600 def BuildHooksNodes(self):
7601 """Build hooks nodes.
7604 nl = [self.cfg.GetMasterNode()]
7607 def CheckPrereq(self):
7610 def Exec(self, feedback_fn):
7611 # Prepare jobs for migration instances
7613 [opcodes.OpInstanceMigrate(instance_name=inst.name,
7616 iallocator=self.op.iallocator,
7617 target_node=self.op.target_node,
7618 ignore_ipolicy=self.op.ignore_ipolicy)]
7619 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7622 # TODO: Run iallocator in this opcode and pass correct placement options to
7623 # OpInstanceMigrate. Since other jobs can modify the cluster between
7624 # running the iallocator and the actual migration, a good consistency model
7625 # will have to be found.
7627 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7628 frozenset([self.op.node_name]))
7630 return ResultWithJobs(jobs)
7633 class TLMigrateInstance(Tasklet):
7634 """Tasklet class for instance migration.
7637 @ivar live: whether the migration will be done live or non-live;
7638 this variable is initalized only after CheckPrereq has run
7639 @type cleanup: boolean
7640 @ivar cleanup: Wheater we cleanup from a failed migration
7641 @type iallocator: string
7642 @ivar iallocator: The iallocator used to determine target_node
7643 @type target_node: string
7644 @ivar target_node: If given, the target_node to reallocate the instance to
7645 @type failover: boolean
7646 @ivar failover: Whether operation results in failover or migration
7647 @type fallback: boolean
7648 @ivar fallback: Whether fallback to failover is allowed if migration not
7650 @type ignore_consistency: boolean
7651 @ivar ignore_consistency: Wheter we should ignore consistency between source
7653 @type shutdown_timeout: int
7654 @ivar shutdown_timeout: In case of failover timeout of the shutdown
7655 @type ignore_ipolicy: bool
7656 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
7661 _MIGRATION_POLL_INTERVAL = 1 # seconds
7662 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7664 def __init__(self, lu, instance_name, cleanup=False,
7665 failover=False, fallback=False,
7666 ignore_consistency=False,
7667 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
7668 ignore_ipolicy=False):
7669 """Initializes this class.
7672 Tasklet.__init__(self, lu)
7675 self.instance_name = instance_name
7676 self.cleanup = cleanup
7677 self.live = False # will be overridden later
7678 self.failover = failover
7679 self.fallback = fallback
7680 self.ignore_consistency = ignore_consistency
7681 self.shutdown_timeout = shutdown_timeout
7682 self.ignore_ipolicy = ignore_ipolicy
7684 def CheckPrereq(self):
7685 """Check prerequisites.
7687 This checks that the instance is in the cluster.
7690 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7691 instance = self.cfg.GetInstanceInfo(instance_name)
7692 assert instance is not None
7693 self.instance = instance
7694 cluster = self.cfg.GetClusterInfo()
7696 if (not self.cleanup and
7697 not instance.admin_state == constants.ADMINST_UP and
7698 not self.failover and self.fallback):
7699 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7700 " switching to failover")
7701 self.failover = True
7703 if instance.disk_template not in constants.DTS_MIRRORED:
7708 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7709 " %s" % (instance.disk_template, text),
7712 if instance.disk_template in constants.DTS_EXT_MIRROR:
7713 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7715 if self.lu.op.iallocator:
7716 self._RunAllocator()
7718 # We set set self.target_node as it is required by
7720 self.target_node = self.lu.op.target_node
7722 # Check that the target node is correct in terms of instance policy
7723 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
7724 ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7725 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7726 ignore=self.ignore_ipolicy)
7728 # self.target_node is already populated, either directly or by the
7730 target_node = self.target_node
7731 if self.target_node == instance.primary_node:
7732 raise errors.OpPrereqError("Cannot migrate instance %s"
7733 " to its primary (%s)" %
7734 (instance.name, instance.primary_node))
7736 if len(self.lu.tasklets) == 1:
7737 # It is safe to release locks only when we're the only tasklet
7739 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7740 keep=[instance.primary_node, self.target_node])
7743 secondary_nodes = instance.secondary_nodes
7744 if not secondary_nodes:
7745 raise errors.ConfigurationError("No secondary node but using"
7746 " %s disk template" %
7747 instance.disk_template)
7748 target_node = secondary_nodes[0]
7749 if self.lu.op.iallocator or (self.lu.op.target_node and
7750 self.lu.op.target_node != target_node):
7752 text = "failed over"
7755 raise errors.OpPrereqError("Instances with disk template %s cannot"
7756 " be %s to arbitrary nodes"
7757 " (neither an iallocator nor a target"
7758 " node can be passed)" %
7759 (instance.disk_template, text),
7761 nodeinfo = self.cfg.GetNodeInfo(target_node)
7762 ipolicy = _CalculateGroupIPolicy(cluster, nodeinfo.group)
7763 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
7764 ignore=self.ignore_ipolicy)
7766 i_be = cluster.FillBE(instance)
7768 # check memory requirements on the secondary node
7769 if not self.failover or instance.admin_state == constants.ADMINST_UP:
7770 _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7771 instance.name, i_be[constants.BE_MAXMEM],
7772 instance.hypervisor)
7774 self.lu.LogInfo("Not checking memory on the secondary node as"
7775 " instance will not be started")
7777 # check if failover must be forced instead of migration
7778 if (not self.cleanup and not self.failover and
7779 i_be[constants.BE_ALWAYS_FAILOVER]):
7781 self.lu.LogInfo("Instance configured to always failover; fallback"
7783 self.failover = True
7785 raise errors.OpPrereqError("This instance has been configured to"
7786 " always failover, please allow failover",
7789 # check bridge existance
7790 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7792 if not self.cleanup:
7793 _CheckNodeNotDrained(self.lu, target_node)
7794 if not self.failover:
7795 result = self.rpc.call_instance_migratable(instance.primary_node,
7797 if result.fail_msg and self.fallback:
7798 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7800 self.failover = True
7802 result.Raise("Can't migrate, please use failover",
7803 prereq=True, ecode=errors.ECODE_STATE)
7805 assert not (self.failover and self.cleanup)
7807 if not self.failover:
7808 if self.lu.op.live is not None and self.lu.op.mode is not None:
7809 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7810 " parameters are accepted",
7812 if self.lu.op.live is not None:
7814 self.lu.op.mode = constants.HT_MIGRATION_LIVE
7816 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7817 # reset the 'live' parameter to None so that repeated
7818 # invocations of CheckPrereq do not raise an exception
7819 self.lu.op.live = None
7820 elif self.lu.op.mode is None:
7821 # read the default value from the hypervisor
7822 i_hv = cluster.FillHV(self.instance, skip_globals=False)
7823 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7825 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7827 # Failover is never live
7830 def _RunAllocator(self):
7831 """Run the allocator based on input opcode.
7834 # FIXME: add a self.ignore_ipolicy option
7835 ial = IAllocator(self.cfg, self.rpc,
7836 mode=constants.IALLOCATOR_MODE_RELOC,
7837 name=self.instance_name,
7838 # TODO See why hail breaks with a single node below
7839 relocate_from=[self.instance.primary_node,
7840 self.instance.primary_node],
7843 ial.Run(self.lu.op.iallocator)
7846 raise errors.OpPrereqError("Can't compute nodes using"
7847 " iallocator '%s': %s" %
7848 (self.lu.op.iallocator, ial.info),
7850 if len(ial.result) != ial.required_nodes:
7851 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7852 " of nodes (%s), required %s" %
7853 (self.lu.op.iallocator, len(ial.result),
7854 ial.required_nodes), errors.ECODE_FAULT)
7855 self.target_node = ial.result[0]
7856 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7857 self.instance_name, self.lu.op.iallocator,
7858 utils.CommaJoin(ial.result))
7860 def _WaitUntilSync(self):
7861 """Poll with custom rpc for disk sync.
7863 This uses our own step-based rpc call.
7866 self.feedback_fn("* wait until resync is done")
7870 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7872 self.instance.disks)
7874 for node, nres in result.items():
7875 nres.Raise("Cannot resync disks on node %s" % node)
7876 node_done, node_percent = nres.payload
7877 all_done = all_done and node_done
7878 if node_percent is not None:
7879 min_percent = min(min_percent, node_percent)
7881 if min_percent < 100:
7882 self.feedback_fn(" - progress: %.1f%%" % min_percent)
7885 def _EnsureSecondary(self, node):
7886 """Demote a node to secondary.
7889 self.feedback_fn("* switching node %s to secondary mode" % node)
7891 for dev in self.instance.disks:
7892 self.cfg.SetDiskID(dev, node)
7894 result = self.rpc.call_blockdev_close(node, self.instance.name,
7895 self.instance.disks)
7896 result.Raise("Cannot change disk to secondary on node %s" % node)
7898 def _GoStandalone(self):
7899 """Disconnect from the network.
7902 self.feedback_fn("* changing into standalone mode")
7903 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7904 self.instance.disks)
7905 for node, nres in result.items():
7906 nres.Raise("Cannot disconnect disks node %s" % node)
7908 def _GoReconnect(self, multimaster):
7909 """Reconnect to the network.
7915 msg = "single-master"
7916 self.feedback_fn("* changing disks into %s mode" % msg)
7917 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7918 self.instance.disks,
7919 self.instance.name, multimaster)
7920 for node, nres in result.items():
7921 nres.Raise("Cannot change disks config on node %s" % node)
7923 def _ExecCleanup(self):
7924 """Try to cleanup after a failed migration.
7926 The cleanup is done by:
7927 - check that the instance is running only on one node
7928 (and update the config if needed)
7929 - change disks on its secondary node to secondary
7930 - wait until disks are fully synchronized
7931 - disconnect from the network
7932 - change disks into single-master mode
7933 - wait again until disks are fully synchronized
7936 instance = self.instance
7937 target_node = self.target_node
7938 source_node = self.source_node
7940 # check running on only one node
7941 self.feedback_fn("* checking where the instance actually runs"
7942 " (if this hangs, the hypervisor might be in"
7944 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7945 for node, result in ins_l.items():
7946 result.Raise("Can't contact node %s" % node)
7948 runningon_source = instance.name in ins_l[source_node].payload
7949 runningon_target = instance.name in ins_l[target_node].payload
7951 if runningon_source and runningon_target:
7952 raise errors.OpExecError("Instance seems to be running on two nodes,"
7953 " or the hypervisor is confused; you will have"
7954 " to ensure manually that it runs only on one"
7955 " and restart this operation")
7957 if not (runningon_source or runningon_target):
7958 raise errors.OpExecError("Instance does not seem to be running at all;"
7959 " in this case it's safer to repair by"
7960 " running 'gnt-instance stop' to ensure disk"
7961 " shutdown, and then restarting it")
7963 if runningon_target:
7964 # the migration has actually succeeded, we need to update the config
7965 self.feedback_fn("* instance running on secondary node (%s),"
7966 " updating config" % target_node)
7967 instance.primary_node = target_node
7968 self.cfg.Update(instance, self.feedback_fn)
7969 demoted_node = source_node
7971 self.feedback_fn("* instance confirmed to be running on its"
7972 " primary node (%s)" % source_node)
7973 demoted_node = target_node
7975 if instance.disk_template in constants.DTS_INT_MIRROR:
7976 self._EnsureSecondary(demoted_node)
7978 self._WaitUntilSync()
7979 except errors.OpExecError:
7980 # we ignore here errors, since if the device is standalone, it
7981 # won't be able to sync
7983 self._GoStandalone()
7984 self._GoReconnect(False)
7985 self._WaitUntilSync()
7987 self.feedback_fn("* done")
7989 def _RevertDiskStatus(self):
7990 """Try to revert the disk status after a failed migration.
7993 target_node = self.target_node
7994 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7998 self._EnsureSecondary(target_node)
7999 self._GoStandalone()
8000 self._GoReconnect(False)
8001 self._WaitUntilSync()
8002 except errors.OpExecError, err:
8003 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8004 " please try to recover the instance manually;"
8005 " error '%s'" % str(err))
8007 def _AbortMigration(self):
8008 """Call the hypervisor code to abort a started migration.
8011 instance = self.instance
8012 target_node = self.target_node
8013 source_node = self.source_node
8014 migration_info = self.migration_info
8016 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8020 abort_msg = abort_result.fail_msg
8022 logging.error("Aborting migration failed on target node %s: %s",
8023 target_node, abort_msg)
8024 # Don't raise an exception here, as we stil have to try to revert the
8025 # disk status, even if this step failed.
8027 abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
8028 instance, False, self.live)
8029 abort_msg = abort_result.fail_msg
8031 logging.error("Aborting migration failed on source node %s: %s",
8032 source_node, abort_msg)
8034 def _ExecMigration(self):
8035 """Migrate an instance.
8037 The migrate is done by:
8038 - change the disks into dual-master mode
8039 - wait until disks are fully synchronized again
8040 - migrate the instance
8041 - change disks on the new secondary node (the old primary) to secondary
8042 - wait until disks are fully synchronized
8043 - change disks into single-master mode
8046 instance = self.instance
8047 target_node = self.target_node
8048 source_node = self.source_node
8050 # Check for hypervisor version mismatch and warn the user.
8051 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8052 None, [self.instance.hypervisor])
8053 for ninfo in nodeinfo.values():
8054 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8056 (_, _, (src_info, )) = nodeinfo[source_node].payload
8057 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8059 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8060 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8061 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8062 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8063 if src_version != dst_version:
8064 self.feedback_fn("* warning: hypervisor version mismatch between"
8065 " source (%s) and target (%s) node" %
8066 (src_version, dst_version))
8068 self.feedback_fn("* checking disk consistency between source and target")
8069 for dev in instance.disks:
8070 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8071 raise errors.OpExecError("Disk %s is degraded or not fully"
8072 " synchronized on target node,"
8073 " aborting migration" % dev.iv_name)
8075 # First get the migration information from the remote node
8076 result = self.rpc.call_migration_info(source_node, instance)
8077 msg = result.fail_msg
8079 log_err = ("Failed fetching source migration information from %s: %s" %
8081 logging.error(log_err)
8082 raise errors.OpExecError(log_err)
8084 self.migration_info = migration_info = result.payload
8086 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8087 # Then switch the disks to master/master mode
8088 self._EnsureSecondary(target_node)
8089 self._GoStandalone()
8090 self._GoReconnect(True)
8091 self._WaitUntilSync()
8093 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8094 result = self.rpc.call_accept_instance(target_node,
8097 self.nodes_ip[target_node])
8099 msg = result.fail_msg
8101 logging.error("Instance pre-migration failed, trying to revert"
8102 " disk status: %s", msg)
8103 self.feedback_fn("Pre-migration failed, aborting")
8104 self._AbortMigration()
8105 self._RevertDiskStatus()
8106 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8107 (instance.name, msg))
8109 self.feedback_fn("* migrating instance to %s" % target_node)
8110 result = self.rpc.call_instance_migrate(source_node, instance,
8111 self.nodes_ip[target_node],
8113 msg = result.fail_msg
8115 logging.error("Instance migration failed, trying to revert"
8116 " disk status: %s", msg)
8117 self.feedback_fn("Migration failed, aborting")
8118 self._AbortMigration()
8119 self._RevertDiskStatus()
8120 raise errors.OpExecError("Could not migrate instance %s: %s" %
8121 (instance.name, msg))
8123 self.feedback_fn("* starting memory transfer")
8124 last_feedback = time.time()
8126 result = self.rpc.call_instance_get_migration_status(source_node,
8128 msg = result.fail_msg
8129 ms = result.payload # MigrationStatus instance
8130 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8131 logging.error("Instance migration failed, trying to revert"
8132 " disk status: %s", msg)
8133 self.feedback_fn("Migration failed, aborting")
8134 self._AbortMigration()
8135 self._RevertDiskStatus()
8136 raise errors.OpExecError("Could not migrate instance %s: %s" %
8137 (instance.name, msg))
8139 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8140 self.feedback_fn("* memory transfer complete")
8143 if (utils.TimeoutExpired(last_feedback,
8144 self._MIGRATION_FEEDBACK_INTERVAL) and
8145 ms.transferred_ram is not None):
8146 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8147 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8148 last_feedback = time.time()
8150 time.sleep(self._MIGRATION_POLL_INTERVAL)
8152 result = self.rpc.call_instance_finalize_migration_src(source_node,
8156 msg = result.fail_msg
8158 logging.error("Instance migration succeeded, but finalization failed"
8159 " on the source node: %s", msg)
8160 raise errors.OpExecError("Could not finalize instance migration: %s" %
8163 instance.primary_node = target_node
8165 # distribute new instance config to the other nodes
8166 self.cfg.Update(instance, self.feedback_fn)
8168 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8172 msg = result.fail_msg
8174 logging.error("Instance migration succeeded, but finalization failed"
8175 " on the target node: %s", msg)
8176 raise errors.OpExecError("Could not finalize instance migration: %s" %
8179 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8180 self._EnsureSecondary(source_node)
8181 self._WaitUntilSync()
8182 self._GoStandalone()
8183 self._GoReconnect(False)
8184 self._WaitUntilSync()
8186 self.feedback_fn("* done")
8188 def _ExecFailover(self):
8189 """Failover an instance.
8191 The failover is done by shutting it down on its present node and
8192 starting it on the secondary.
8195 instance = self.instance
8196 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8198 source_node = instance.primary_node
8199 target_node = self.target_node
8201 if instance.admin_state == constants.ADMINST_UP:
8202 self.feedback_fn("* checking disk consistency between source and target")
8203 for dev in instance.disks:
8204 # for drbd, these are drbd over lvm
8205 if not _CheckDiskConsistency(self.lu, dev, target_node, False):
8206 if primary_node.offline:
8207 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8209 (primary_node.name, dev.iv_name, target_node))
8210 elif not self.ignore_consistency:
8211 raise errors.OpExecError("Disk %s is degraded on target node,"
8212 " aborting failover" % dev.iv_name)
8214 self.feedback_fn("* not checking disk consistency as instance is not"
8217 self.feedback_fn("* shutting down instance on source node")
8218 logging.info("Shutting down instance %s on node %s",
8219 instance.name, source_node)
8221 result = self.rpc.call_instance_shutdown(source_node, instance,
8222 self.shutdown_timeout)
8223 msg = result.fail_msg
8225 if self.ignore_consistency or primary_node.offline:
8226 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8227 " proceeding anyway; please make sure node"
8228 " %s is down; error details: %s",
8229 instance.name, source_node, source_node, msg)
8231 raise errors.OpExecError("Could not shutdown instance %s on"
8233 (instance.name, source_node, msg))
8235 self.feedback_fn("* deactivating the instance's disks on source node")
8236 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8237 raise errors.OpExecError("Can't shut down the instance's disks")
8239 instance.primary_node = target_node
8240 # distribute new instance config to the other nodes
8241 self.cfg.Update(instance, self.feedback_fn)
8243 # Only start the instance if it's marked as up
8244 if instance.admin_state == constants.ADMINST_UP:
8245 self.feedback_fn("* activating the instance's disks on target node %s" %
8247 logging.info("Starting instance %s on node %s",
8248 instance.name, target_node)
8250 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8251 ignore_secondaries=True)
8253 _ShutdownInstanceDisks(self.lu, instance)
8254 raise errors.OpExecError("Can't activate the instance's disks")
8256 self.feedback_fn("* starting the instance on the target node %s" %
8258 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8260 msg = result.fail_msg
8262 _ShutdownInstanceDisks(self.lu, instance)
8263 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8264 (instance.name, target_node, msg))
8266 def Exec(self, feedback_fn):
8267 """Perform the migration.
8270 self.feedback_fn = feedback_fn
8271 self.source_node = self.instance.primary_node
8273 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8274 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8275 self.target_node = self.instance.secondary_nodes[0]
8276 # Otherwise self.target_node has been populated either
8277 # directly, or through an iallocator.
8279 self.all_nodes = [self.source_node, self.target_node]
8280 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8281 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8284 feedback_fn("Failover instance %s" % self.instance.name)
8285 self._ExecFailover()
8287 feedback_fn("Migrating instance %s" % self.instance.name)
8290 return self._ExecCleanup()
8292 return self._ExecMigration()
8295 def _CreateBlockDev(lu, node, instance, device, force_create,
8297 """Create a tree of block devices on a given node.
8299 If this device type has to be created on secondaries, create it and
8302 If not, just recurse to children keeping the same 'force' value.
8304 @param lu: the lu on whose behalf we execute
8305 @param node: the node on which to create the device
8306 @type instance: L{objects.Instance}
8307 @param instance: the instance which owns the device
8308 @type device: L{objects.Disk}
8309 @param device: the device to create
8310 @type force_create: boolean
8311 @param force_create: whether to force creation of this device; this
8312 will be change to True whenever we find a device which has
8313 CreateOnSecondary() attribute
8314 @param info: the extra 'metadata' we should attach to the device
8315 (this will be represented as a LVM tag)
8316 @type force_open: boolean
8317 @param force_open: this parameter will be passes to the
8318 L{backend.BlockdevCreate} function where it specifies
8319 whether we run on primary or not, and it affects both
8320 the child assembly and the device own Open() execution
8323 if device.CreateOnSecondary():
8327 for child in device.children:
8328 _CreateBlockDev(lu, node, instance, child, force_create,
8331 if not force_create:
8334 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8337 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8338 """Create a single block device on a given node.
8340 This will not recurse over children of the device, so they must be
8343 @param lu: the lu on whose behalf we execute
8344 @param node: the node on which to create the device
8345 @type instance: L{objects.Instance}
8346 @param instance: the instance which owns the device
8347 @type device: L{objects.Disk}
8348 @param device: the device to create
8349 @param info: the extra 'metadata' we should attach to the device
8350 (this will be represented as a LVM tag)
8351 @type force_open: boolean
8352 @param force_open: this parameter will be passes to the
8353 L{backend.BlockdevCreate} function where it specifies
8354 whether we run on primary or not, and it affects both
8355 the child assembly and the device own Open() execution
8358 lu.cfg.SetDiskID(device, node)
8359 result = lu.rpc.call_blockdev_create(node, device, device.size,
8360 instance.name, force_open, info)
8361 result.Raise("Can't create block device %s on"
8362 " node %s for instance %s" % (device, node, instance.name))
8363 if device.physical_id is None:
8364 device.physical_id = result.payload
8367 def _GenerateUniqueNames(lu, exts):
8368 """Generate a suitable LV name.
8370 This will generate a logical volume name for the given instance.
8375 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8376 results.append("%s%s" % (new_id, val))
8380 def _ComputeLDParams(disk_template, disk_params):
8381 """Computes Logical Disk parameters from Disk Template parameters.
8383 @type disk_template: string
8384 @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8385 @type disk_params: dict
8386 @param disk_params: disk template parameters; dict(template_name -> parameters
8388 @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8389 contains the LD parameters of the node. The tree is flattened in-order.
8392 if disk_template not in constants.DISK_TEMPLATES:
8393 raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8396 dt_params = disk_params[disk_template]
8397 if disk_template == constants.DT_DRBD8:
8399 constants.LDP_RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8400 constants.LDP_BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8401 constants.LDP_NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8402 constants.LDP_DEFAULT_METAVG: dt_params[constants.DRBD_DEFAULT_METAVG],
8403 constants.LDP_DISK_CUSTOM: dt_params[constants.DRBD_DISK_CUSTOM],
8404 constants.LDP_NET_CUSTOM: dt_params[constants.DRBD_NET_CUSTOM],
8405 constants.LDP_DYNAMIC_RESYNC: dt_params[constants.DRBD_DYNAMIC_RESYNC],
8406 constants.LDP_PLAN_AHEAD: dt_params[constants.DRBD_PLAN_AHEAD],
8407 constants.LDP_FILL_TARGET: dt_params[constants.DRBD_FILL_TARGET],
8408 constants.LDP_DELAY_TARGET: dt_params[constants.DRBD_DELAY_TARGET],
8409 constants.LDP_MAX_RATE: dt_params[constants.DRBD_MAX_RATE],
8410 constants.LDP_MIN_RATE: dt_params[constants.DRBD_MIN_RATE],
8414 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8417 result.append(drbd_params)
8421 constants.LDP_STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8424 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8426 result.append(data_params)
8430 constants.LDP_STRIPES: dt_params[constants.DRBD_META_STRIPES],
8433 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8435 result.append(meta_params)
8437 elif (disk_template == constants.DT_FILE or
8438 disk_template == constants.DT_SHARED_FILE):
8439 result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8441 elif disk_template == constants.DT_PLAIN:
8443 constants.LDP_STRIPES: dt_params[constants.LV_STRIPES],
8446 objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8448 result.append(params)
8450 elif disk_template == constants.DT_BLOCK:
8451 result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8456 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8457 iv_name, p_minor, s_minor, drbd_params, data_params,
8459 """Generate a drbd8 device complete with its children.
8462 assert len(vgnames) == len(names) == 2
8463 port = lu.cfg.AllocatePort()
8464 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8466 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8467 logical_id=(vgnames[0], names[0]),
8469 dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8470 logical_id=(vgnames[1], names[1]),
8472 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8473 logical_id=(primary, secondary, port,
8476 children=[dev_data, dev_meta],
8477 iv_name=iv_name, params=drbd_params)
8481 def _GenerateDiskTemplate(lu, template_name,
8482 instance_name, primary_node,
8483 secondary_nodes, disk_info,
8484 file_storage_dir, file_driver,
8485 base_index, feedback_fn, disk_params):
8486 """Generate the entire disk layout for a given template type.
8489 #TODO: compute space requirements
8491 vgname = lu.cfg.GetVGName()
8492 disk_count = len(disk_info)
8494 ld_params = _ComputeLDParams(template_name, disk_params)
8495 if template_name == constants.DT_DISKLESS:
8497 elif template_name == constants.DT_PLAIN:
8498 if len(secondary_nodes) != 0:
8499 raise errors.ProgrammerError("Wrong template configuration")
8501 names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8502 for i in range(disk_count)])
8503 for idx, disk in enumerate(disk_info):
8504 disk_index = idx + base_index
8505 vg = disk.get(constants.IDISK_VG, vgname)
8506 feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8507 disk_dev = objects.Disk(dev_type=constants.LD_LV,
8508 size=disk[constants.IDISK_SIZE],
8509 logical_id=(vg, names[idx]),
8510 iv_name="disk/%d" % disk_index,
8511 mode=disk[constants.IDISK_MODE],
8512 params=ld_params[0])
8513 disks.append(disk_dev)
8514 elif template_name == constants.DT_DRBD8:
8515 drbd_params, data_params, meta_params = ld_params
8516 if len(secondary_nodes) != 1:
8517 raise errors.ProgrammerError("Wrong template configuration")
8518 remote_node = secondary_nodes[0]
8519 minors = lu.cfg.AllocateDRBDMinor(
8520 [primary_node, remote_node] * len(disk_info), instance_name)
8523 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8524 for i in range(disk_count)]):
8525 names.append(lv_prefix + "_data")
8526 names.append(lv_prefix + "_meta")
8527 for idx, disk in enumerate(disk_info):
8528 disk_index = idx + base_index
8529 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8530 data_vg = disk.get(constants.IDISK_VG, vgname)
8531 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8532 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8533 disk[constants.IDISK_SIZE],
8535 names[idx * 2:idx * 2 + 2],
8536 "disk/%d" % disk_index,
8537 minors[idx * 2], minors[idx * 2 + 1],
8538 drbd_params, data_params, meta_params)
8539 disk_dev.mode = disk[constants.IDISK_MODE]
8540 disks.append(disk_dev)
8541 elif template_name == constants.DT_FILE:
8542 if len(secondary_nodes) != 0:
8543 raise errors.ProgrammerError("Wrong template configuration")
8545 opcodes.RequireFileStorage()
8547 for idx, disk in enumerate(disk_info):
8548 disk_index = idx + base_index
8549 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8550 size=disk[constants.IDISK_SIZE],
8551 iv_name="disk/%d" % disk_index,
8552 logical_id=(file_driver,
8553 "%s/disk%d" % (file_storage_dir,
8555 mode=disk[constants.IDISK_MODE],
8556 params=ld_params[0])
8557 disks.append(disk_dev)
8558 elif template_name == constants.DT_SHARED_FILE:
8559 if len(secondary_nodes) != 0:
8560 raise errors.ProgrammerError("Wrong template configuration")
8562 opcodes.RequireSharedFileStorage()
8564 for idx, disk in enumerate(disk_info):
8565 disk_index = idx + base_index
8566 disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8567 size=disk[constants.IDISK_SIZE],
8568 iv_name="disk/%d" % disk_index,
8569 logical_id=(file_driver,
8570 "%s/disk%d" % (file_storage_dir,
8572 mode=disk[constants.IDISK_MODE],
8573 params=ld_params[0])
8574 disks.append(disk_dev)
8575 elif template_name == constants.DT_BLOCK:
8576 if len(secondary_nodes) != 0:
8577 raise errors.ProgrammerError("Wrong template configuration")
8579 for idx, disk in enumerate(disk_info):
8580 disk_index = idx + base_index
8581 disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8582 size=disk[constants.IDISK_SIZE],
8583 logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8584 disk[constants.IDISK_ADOPT]),
8585 iv_name="disk/%d" % disk_index,
8586 mode=disk[constants.IDISK_MODE],
8587 params=ld_params[0])
8588 disks.append(disk_dev)
8591 raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8595 def _GetInstanceInfoText(instance):
8596 """Compute that text that should be added to the disk's metadata.
8599 return "originstname+%s" % instance.name
8602 def _CalcEta(time_taken, written, total_size):
8603 """Calculates the ETA based on size written and total size.
8605 @param time_taken: The time taken so far
8606 @param written: amount written so far
8607 @param total_size: The total size of data to be written
8608 @return: The remaining time in seconds
8611 avg_time = time_taken / float(written)
8612 return (total_size - written) * avg_time
8615 def _WipeDisks(lu, instance):
8616 """Wipes instance disks.
8618 @type lu: L{LogicalUnit}
8619 @param lu: the logical unit on whose behalf we execute
8620 @type instance: L{objects.Instance}
8621 @param instance: the instance whose disks we should create
8622 @return: the success of the wipe
8625 node = instance.primary_node
8627 for device in instance.disks:
8628 lu.cfg.SetDiskID(device, node)
8630 logging.info("Pause sync of instance %s disks", instance.name)
8631 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8633 for idx, success in enumerate(result.payload):
8635 logging.warn("pause-sync of instance %s for disks %d failed",
8639 for idx, device in enumerate(instance.disks):
8640 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8641 # MAX_WIPE_CHUNK at max
8642 wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8643 constants.MIN_WIPE_CHUNK_PERCENT)
8644 # we _must_ make this an int, otherwise rounding errors will
8646 wipe_chunk_size = int(wipe_chunk_size)
8648 lu.LogInfo("* Wiping disk %d", idx)
8649 logging.info("Wiping disk %d for instance %s, node %s using"
8650 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8655 start_time = time.time()
8657 while offset < size:
8658 wipe_size = min(wipe_chunk_size, size - offset)
8659 logging.debug("Wiping disk %d, offset %s, chunk %s",
8660 idx, offset, wipe_size)
8661 result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8662 result.Raise("Could not wipe disk %d at offset %d for size %d" %
8663 (idx, offset, wipe_size))
8666 if now - last_output >= 60:
8667 eta = _CalcEta(now - start_time, offset, size)
8668 lu.LogInfo(" - done: %.1f%% ETA: %s" %
8669 (offset / float(size) * 100, utils.FormatSeconds(eta)))
8672 logging.info("Resume sync of instance %s disks", instance.name)
8674 result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8676 for idx, success in enumerate(result.payload):
8678 lu.LogWarning("Resume sync of disk %d failed, please have a"
8679 " look at the status and troubleshoot the issue", idx)
8680 logging.warn("resume-sync of instance %s for disks %d failed",
8684 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8685 """Create all disks for an instance.
8687 This abstracts away some work from AddInstance.
8689 @type lu: L{LogicalUnit}
8690 @param lu: the logical unit on whose behalf we execute
8691 @type instance: L{objects.Instance}
8692 @param instance: the instance whose disks we should create
8694 @param to_skip: list of indices to skip
8695 @type target_node: string
8696 @param target_node: if passed, overrides the target node for creation
8698 @return: the success of the creation
8701 info = _GetInstanceInfoText(instance)
8702 if target_node is None:
8703 pnode = instance.primary_node
8704 all_nodes = instance.all_nodes
8709 if instance.disk_template in constants.DTS_FILEBASED:
8710 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8711 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8713 result.Raise("Failed to create directory '%s' on"
8714 " node %s" % (file_storage_dir, pnode))
8716 # Note: this needs to be kept in sync with adding of disks in
8717 # LUInstanceSetParams
8718 for idx, device in enumerate(instance.disks):
8719 if to_skip and idx in to_skip:
8721 logging.info("Creating volume %s for instance %s",
8722 device.iv_name, instance.name)
8724 for node in all_nodes:
8725 f_create = node == pnode
8726 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8729 def _RemoveDisks(lu, instance, target_node=None):
8730 """Remove all disks for an instance.
8732 This abstracts away some work from `AddInstance()` and
8733 `RemoveInstance()`. Note that in case some of the devices couldn't
8734 be removed, the removal will continue with the other ones (compare
8735 with `_CreateDisks()`).
8737 @type lu: L{LogicalUnit}
8738 @param lu: the logical unit on whose behalf we execute
8739 @type instance: L{objects.Instance}
8740 @param instance: the instance whose disks we should remove
8741 @type target_node: string
8742 @param target_node: used to override the node on which to remove the disks
8744 @return: the success of the removal
8747 logging.info("Removing block devices for instance %s", instance.name)
8750 for device in instance.disks:
8752 edata = [(target_node, device)]
8754 edata = device.ComputeNodeTree(instance.primary_node)
8755 for node, disk in edata:
8756 lu.cfg.SetDiskID(disk, node)
8757 msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8759 lu.LogWarning("Could not remove block device %s on node %s,"
8760 " continuing anyway: %s", device.iv_name, node, msg)
8763 # if this is a DRBD disk, return its port to the pool
8764 if device.dev_type in constants.LDS_DRBD:
8765 tcp_port = device.logical_id[2]
8766 lu.cfg.AddTcpUdpPort(tcp_port)
8768 if instance.disk_template == constants.DT_FILE:
8769 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8773 tgt = instance.primary_node
8774 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8776 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8777 file_storage_dir, instance.primary_node, result.fail_msg)
8783 def _ComputeDiskSizePerVG(disk_template, disks):
8784 """Compute disk size requirements in the volume group
8787 def _compute(disks, payload):
8788 """Universal algorithm.
8793 vgs[disk[constants.IDISK_VG]] = \
8794 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8798 # Required free disk space as a function of disk and swap space
8800 constants.DT_DISKLESS: {},
8801 constants.DT_PLAIN: _compute(disks, 0),
8802 # 128 MB are added for drbd metadata for each disk
8803 constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8804 constants.DT_FILE: {},
8805 constants.DT_SHARED_FILE: {},
8808 if disk_template not in req_size_dict:
8809 raise errors.ProgrammerError("Disk template '%s' size requirement"
8810 " is unknown" % disk_template)
8812 return req_size_dict[disk_template]
8815 def _ComputeDiskSize(disk_template, disks):
8816 """Compute disk size requirements in the volume group
8819 # Required free disk space as a function of disk and swap space
8821 constants.DT_DISKLESS: None,
8822 constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8823 # 128 MB are added for drbd metadata for each disk
8825 sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8826 constants.DT_FILE: None,
8827 constants.DT_SHARED_FILE: 0,
8828 constants.DT_BLOCK: 0,
8831 if disk_template not in req_size_dict:
8832 raise errors.ProgrammerError("Disk template '%s' size requirement"
8833 " is unknown" % disk_template)
8835 return req_size_dict[disk_template]
8838 def _FilterVmNodes(lu, nodenames):
8839 """Filters out non-vm_capable nodes from a list.
8841 @type lu: L{LogicalUnit}
8842 @param lu: the logical unit for which we check
8843 @type nodenames: list
8844 @param nodenames: the list of nodes on which we should check
8846 @return: the list of vm-capable nodes
8849 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8850 return [name for name in nodenames if name not in vm_nodes]
8853 def _CheckHVParams(lu, nodenames, hvname, hvparams):
8854 """Hypervisor parameter validation.
8856 This function abstract the hypervisor parameter validation to be
8857 used in both instance create and instance modify.
8859 @type lu: L{LogicalUnit}
8860 @param lu: the logical unit for which we check
8861 @type nodenames: list
8862 @param nodenames: the list of nodes on which we should check
8863 @type hvname: string
8864 @param hvname: the name of the hypervisor we should use
8865 @type hvparams: dict
8866 @param hvparams: the parameters which we need to check
8867 @raise errors.OpPrereqError: if the parameters are not valid
8870 nodenames = _FilterVmNodes(lu, nodenames)
8872 cluster = lu.cfg.GetClusterInfo()
8873 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8875 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8876 for node in nodenames:
8880 info.Raise("Hypervisor parameter validation failed on node %s" % node)
8883 def _CheckOSParams(lu, required, nodenames, osname, osparams):
8884 """OS parameters validation.
8886 @type lu: L{LogicalUnit}
8887 @param lu: the logical unit for which we check
8888 @type required: boolean
8889 @param required: whether the validation should fail if the OS is not
8891 @type nodenames: list
8892 @param nodenames: the list of nodes on which we should check
8893 @type osname: string
8894 @param osname: the name of the hypervisor we should use
8895 @type osparams: dict
8896 @param osparams: the parameters which we need to check
8897 @raise errors.OpPrereqError: if the parameters are not valid
8900 nodenames = _FilterVmNodes(lu, nodenames)
8901 result = lu.rpc.call_os_validate(nodenames, required, osname,
8902 [constants.OS_VALIDATE_PARAMETERS],
8904 for node, nres in result.items():
8905 # we don't check for offline cases since this should be run only
8906 # against the master node and/or an instance's nodes
8907 nres.Raise("OS Parameters validation failed on node %s" % node)
8908 if not nres.payload:
8909 lu.LogInfo("OS %s not found on node %s, validation skipped",
8913 class LUInstanceCreate(LogicalUnit):
8914 """Create an instance.
8917 HPATH = "instance-add"
8918 HTYPE = constants.HTYPE_INSTANCE
8921 def CheckArguments(self):
8925 # do not require name_check to ease forward/backward compatibility
8927 if self.op.no_install and self.op.start:
8928 self.LogInfo("No-installation mode selected, disabling startup")
8929 self.op.start = False
8930 # validate/normalize the instance name
8931 self.op.instance_name = \
8932 netutils.Hostname.GetNormalizedName(self.op.instance_name)
8934 if self.op.ip_check and not self.op.name_check:
8935 # TODO: make the ip check more flexible and not depend on the name check
8936 raise errors.OpPrereqError("Cannot do IP address check without a name"
8937 " check", errors.ECODE_INVAL)
8939 # check nics' parameter names
8940 for nic in self.op.nics:
8941 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8943 # check disks. parameter names and consistent adopt/no-adopt strategy
8944 has_adopt = has_no_adopt = False
8945 for disk in self.op.disks:
8946 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8947 if constants.IDISK_ADOPT in disk:
8951 if has_adopt and has_no_adopt:
8952 raise errors.OpPrereqError("Either all disks are adopted or none is",
8955 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8956 raise errors.OpPrereqError("Disk adoption is not supported for the"
8957 " '%s' disk template" %
8958 self.op.disk_template,
8960 if self.op.iallocator is not None:
8961 raise errors.OpPrereqError("Disk adoption not allowed with an"
8962 " iallocator script", errors.ECODE_INVAL)
8963 if self.op.mode == constants.INSTANCE_IMPORT:
8964 raise errors.OpPrereqError("Disk adoption not allowed for"
8965 " instance import", errors.ECODE_INVAL)
8967 if self.op.disk_template in constants.DTS_MUST_ADOPT:
8968 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8969 " but no 'adopt' parameter given" %
8970 self.op.disk_template,
8973 self.adopt_disks = has_adopt
8975 # instance name verification
8976 if self.op.name_check:
8977 self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8978 self.op.instance_name = self.hostname1.name
8979 # used in CheckPrereq for ip ping check
8980 self.check_ip = self.hostname1.ip
8982 self.check_ip = None
8984 # file storage checks
8985 if (self.op.file_driver and
8986 not self.op.file_driver in constants.FILE_DRIVER):
8987 raise errors.OpPrereqError("Invalid file driver name '%s'" %
8988 self.op.file_driver, errors.ECODE_INVAL)
8990 if self.op.disk_template == constants.DT_FILE:
8991 opcodes.RequireFileStorage()
8992 elif self.op.disk_template == constants.DT_SHARED_FILE:
8993 opcodes.RequireSharedFileStorage()
8995 ### Node/iallocator related checks
8996 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8998 if self.op.pnode is not None:
8999 if self.op.disk_template in constants.DTS_INT_MIRROR:
9000 if self.op.snode is None:
9001 raise errors.OpPrereqError("The networked disk templates need"
9002 " a mirror node", errors.ECODE_INVAL)
9004 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9006 self.op.snode = None
9008 self._cds = _GetClusterDomainSecret()
9010 if self.op.mode == constants.INSTANCE_IMPORT:
9011 # On import force_variant must be True, because if we forced it at
9012 # initial install, our only chance when importing it back is that it
9014 self.op.force_variant = True
9016 if self.op.no_install:
9017 self.LogInfo("No-installation mode has no effect during import")
9019 elif self.op.mode == constants.INSTANCE_CREATE:
9020 if self.op.os_type is None:
9021 raise errors.OpPrereqError("No guest OS specified",
9023 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9024 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9025 " installation" % self.op.os_type,
9027 if self.op.disk_template is None:
9028 raise errors.OpPrereqError("No disk template specified",
9031 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9032 # Check handshake to ensure both clusters have the same domain secret
9033 src_handshake = self.op.source_handshake
9034 if not src_handshake:
9035 raise errors.OpPrereqError("Missing source handshake",
9038 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9041 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9044 # Load and check source CA
9045 self.source_x509_ca_pem = self.op.source_x509_ca
9046 if not self.source_x509_ca_pem:
9047 raise errors.OpPrereqError("Missing source X509 CA",
9051 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9053 except OpenSSL.crypto.Error, err:
9054 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9055 (err, ), errors.ECODE_INVAL)
9057 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9058 if errcode is not None:
9059 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9062 self.source_x509_ca = cert
9064 src_instance_name = self.op.source_instance_name
9065 if not src_instance_name:
9066 raise errors.OpPrereqError("Missing source instance name",
9069 self.source_instance_name = \
9070 netutils.GetHostname(name=src_instance_name).name
9073 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9074 self.op.mode, errors.ECODE_INVAL)
9076 def ExpandNames(self):
9077 """ExpandNames for CreateInstance.
9079 Figure out the right locks for instance creation.
9082 self.needed_locks = {}
9084 instance_name = self.op.instance_name
9085 # this is just a preventive check, but someone might still add this
9086 # instance in the meantime, and creation will fail at lock-add time
9087 if instance_name in self.cfg.GetInstanceList():
9088 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9089 instance_name, errors.ECODE_EXISTS)
9091 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9093 if self.op.iallocator:
9094 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9095 # specifying a group on instance creation and then selecting nodes from
9097 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9098 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9100 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9101 nodelist = [self.op.pnode]
9102 if self.op.snode is not None:
9103 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9104 nodelist.append(self.op.snode)
9105 self.needed_locks[locking.LEVEL_NODE] = nodelist
9106 # Lock resources of instance's primary and secondary nodes (copy to
9107 # prevent accidential modification)
9108 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9110 # in case of import lock the source node too
9111 if self.op.mode == constants.INSTANCE_IMPORT:
9112 src_node = self.op.src_node
9113 src_path = self.op.src_path
9115 if src_path is None:
9116 self.op.src_path = src_path = self.op.instance_name
9118 if src_node is None:
9119 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9120 self.op.src_node = None
9121 if os.path.isabs(src_path):
9122 raise errors.OpPrereqError("Importing an instance from a path"
9123 " requires a source node option",
9126 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9127 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9128 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9129 if not os.path.isabs(src_path):
9130 self.op.src_path = src_path = \
9131 utils.PathJoin(constants.EXPORT_DIR, src_path)
9133 def _RunAllocator(self):
9134 """Run the allocator based on input opcode.
9137 nics = [n.ToDict() for n in self.nics]
9138 ial = IAllocator(self.cfg, self.rpc,
9139 mode=constants.IALLOCATOR_MODE_ALLOC,
9140 name=self.op.instance_name,
9141 disk_template=self.op.disk_template,
9144 vcpus=self.be_full[constants.BE_VCPUS],
9145 memory=self.be_full[constants.BE_MAXMEM],
9148 hypervisor=self.op.hypervisor,
9151 ial.Run(self.op.iallocator)
9154 raise errors.OpPrereqError("Can't compute nodes using"
9155 " iallocator '%s': %s" %
9156 (self.op.iallocator, ial.info),
9158 if len(ial.result) != ial.required_nodes:
9159 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9160 " of nodes (%s), required %s" %
9161 (self.op.iallocator, len(ial.result),
9162 ial.required_nodes), errors.ECODE_FAULT)
9163 self.op.pnode = ial.result[0]
9164 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9165 self.op.instance_name, self.op.iallocator,
9166 utils.CommaJoin(ial.result))
9167 if ial.required_nodes == 2:
9168 self.op.snode = ial.result[1]
9170 def BuildHooksEnv(self):
9173 This runs on master, primary and secondary nodes of the instance.
9177 "ADD_MODE": self.op.mode,
9179 if self.op.mode == constants.INSTANCE_IMPORT:
9180 env["SRC_NODE"] = self.op.src_node
9181 env["SRC_PATH"] = self.op.src_path
9182 env["SRC_IMAGES"] = self.src_images
9184 env.update(_BuildInstanceHookEnv(
9185 name=self.op.instance_name,
9186 primary_node=self.op.pnode,
9187 secondary_nodes=self.secondaries,
9188 status=self.op.start,
9189 os_type=self.op.os_type,
9190 minmem=self.be_full[constants.BE_MINMEM],
9191 maxmem=self.be_full[constants.BE_MAXMEM],
9192 vcpus=self.be_full[constants.BE_VCPUS],
9193 nics=_NICListToTuple(self, self.nics),
9194 disk_template=self.op.disk_template,
9195 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9196 for d in self.disks],
9199 hypervisor_name=self.op.hypervisor,
9205 def BuildHooksNodes(self):
9206 """Build hooks nodes.
9209 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9212 def _ReadExportInfo(self):
9213 """Reads the export information from disk.
9215 It will override the opcode source node and path with the actual
9216 information, if these two were not specified before.
9218 @return: the export information
9221 assert self.op.mode == constants.INSTANCE_IMPORT
9223 src_node = self.op.src_node
9224 src_path = self.op.src_path
9226 if src_node is None:
9227 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9228 exp_list = self.rpc.call_export_list(locked_nodes)
9230 for node in exp_list:
9231 if exp_list[node].fail_msg:
9233 if src_path in exp_list[node].payload:
9235 self.op.src_node = src_node = node
9236 self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
9240 raise errors.OpPrereqError("No export found for relative path %s" %
9241 src_path, errors.ECODE_INVAL)
9243 _CheckNodeOnline(self, src_node)
9244 result = self.rpc.call_export_info(src_node, src_path)
9245 result.Raise("No export or invalid export found in dir %s" % src_path)
9247 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9248 if not export_info.has_section(constants.INISECT_EXP):
9249 raise errors.ProgrammerError("Corrupted export config",
9250 errors.ECODE_ENVIRON)
9252 ei_version = export_info.get(constants.INISECT_EXP, "version")
9253 if (int(ei_version) != constants.EXPORT_VERSION):
9254 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9255 (ei_version, constants.EXPORT_VERSION),
9256 errors.ECODE_ENVIRON)
9259 def _ReadExportParams(self, einfo):
9260 """Use export parameters as defaults.
9262 In case the opcode doesn't specify (as in override) some instance
9263 parameters, then try to use them from the export information, if
9267 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9269 if self.op.disk_template is None:
9270 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9271 self.op.disk_template = einfo.get(constants.INISECT_INS,
9273 if self.op.disk_template not in constants.DISK_TEMPLATES:
9274 raise errors.OpPrereqError("Disk template specified in configuration"
9275 " file is not one of the allowed values:"
9276 " %s" % " ".join(constants.DISK_TEMPLATES))
9278 raise errors.OpPrereqError("No disk template specified and the export"
9279 " is missing the disk_template information",
9282 if not self.op.disks:
9284 # TODO: import the disk iv_name too
9285 for idx in range(constants.MAX_DISKS):
9286 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9287 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9288 disks.append({constants.IDISK_SIZE: disk_sz})
9289 self.op.disks = disks
9290 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9291 raise errors.OpPrereqError("No disk info specified and the export"
9292 " is missing the disk information",
9295 if not self.op.nics:
9297 for idx in range(constants.MAX_NICS):
9298 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9300 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9301 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9308 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9309 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9311 if (self.op.hypervisor is None and
9312 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9313 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9315 if einfo.has_section(constants.INISECT_HYP):
9316 # use the export parameters but do not override the ones
9317 # specified by the user
9318 for name, value in einfo.items(constants.INISECT_HYP):
9319 if name not in self.op.hvparams:
9320 self.op.hvparams[name] = value
9322 if einfo.has_section(constants.INISECT_BEP):
9323 # use the parameters, without overriding
9324 for name, value in einfo.items(constants.INISECT_BEP):
9325 if name not in self.op.beparams:
9326 self.op.beparams[name] = value
9327 # Compatibility for the old "memory" be param
9328 if name == constants.BE_MEMORY:
9329 if constants.BE_MAXMEM not in self.op.beparams:
9330 self.op.beparams[constants.BE_MAXMEM] = value
9331 if constants.BE_MINMEM not in self.op.beparams:
9332 self.op.beparams[constants.BE_MINMEM] = value
9334 # try to read the parameters old style, from the main section
9335 for name in constants.BES_PARAMETERS:
9336 if (name not in self.op.beparams and
9337 einfo.has_option(constants.INISECT_INS, name)):
9338 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9340 if einfo.has_section(constants.INISECT_OSP):
9341 # use the parameters, without overriding
9342 for name, value in einfo.items(constants.INISECT_OSP):
9343 if name not in self.op.osparams:
9344 self.op.osparams[name] = value
9346 def _RevertToDefaults(self, cluster):
9347 """Revert the instance parameters to the default values.
9351 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9352 for name in self.op.hvparams.keys():
9353 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9354 del self.op.hvparams[name]
9356 be_defs = cluster.SimpleFillBE({})
9357 for name in self.op.beparams.keys():
9358 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9359 del self.op.beparams[name]
9361 nic_defs = cluster.SimpleFillNIC({})
9362 for nic in self.op.nics:
9363 for name in constants.NICS_PARAMETERS:
9364 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9367 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9368 for name in self.op.osparams.keys():
9369 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9370 del self.op.osparams[name]
9372 def _CalculateFileStorageDir(self):
9373 """Calculate final instance file storage dir.
9376 # file storage dir calculation/check
9377 self.instance_file_storage_dir = None
9378 if self.op.disk_template in constants.DTS_FILEBASED:
9379 # build the full file storage dir path
9382 if self.op.disk_template == constants.DT_SHARED_FILE:
9383 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9385 get_fsd_fn = self.cfg.GetFileStorageDir
9387 cfg_storagedir = get_fsd_fn()
9388 if not cfg_storagedir:
9389 raise errors.OpPrereqError("Cluster file storage dir not defined")
9390 joinargs.append(cfg_storagedir)
9392 if self.op.file_storage_dir is not None:
9393 joinargs.append(self.op.file_storage_dir)
9395 joinargs.append(self.op.instance_name)
9397 # pylint: disable=W0142
9398 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9400 def CheckPrereq(self): # pylint: disable=R0914
9401 """Check prerequisites.
9404 self._CalculateFileStorageDir()
9406 if self.op.mode == constants.INSTANCE_IMPORT:
9407 export_info = self._ReadExportInfo()
9408 self._ReadExportParams(export_info)
9410 if (not self.cfg.GetVGName() and
9411 self.op.disk_template not in constants.DTS_NOT_LVM):
9412 raise errors.OpPrereqError("Cluster does not support lvm-based"
9413 " instances", errors.ECODE_STATE)
9415 if (self.op.hypervisor is None or
9416 self.op.hypervisor == constants.VALUE_AUTO):
9417 self.op.hypervisor = self.cfg.GetHypervisorType()
9419 cluster = self.cfg.GetClusterInfo()
9420 enabled_hvs = cluster.enabled_hypervisors
9421 if self.op.hypervisor not in enabled_hvs:
9422 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9423 " cluster (%s)" % (self.op.hypervisor,
9424 ",".join(enabled_hvs)),
9427 # Check tag validity
9428 for tag in self.op.tags:
9429 objects.TaggableObject.ValidateTag(tag)
9431 # check hypervisor parameter syntax (locally)
9432 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9433 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9435 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9436 hv_type.CheckParameterSyntax(filled_hvp)
9437 self.hv_full = filled_hvp
9438 # check that we don't specify global parameters on an instance
9439 _CheckGlobalHvParams(self.op.hvparams)
9441 # fill and remember the beparams dict
9442 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9443 for param, value in self.op.beparams.iteritems():
9444 if value == constants.VALUE_AUTO:
9445 self.op.beparams[param] = default_beparams[param]
9446 objects.UpgradeBeParams(self.op.beparams)
9447 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9448 self.be_full = cluster.SimpleFillBE(self.op.beparams)
9450 # build os parameters
9451 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9453 # now that hvp/bep are in final format, let's reset to defaults,
9455 if self.op.identify_defaults:
9456 self._RevertToDefaults(cluster)
9460 for idx, nic in enumerate(self.op.nics):
9461 nic_mode_req = nic.get(constants.INIC_MODE, None)
9462 nic_mode = nic_mode_req
9463 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9464 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9466 # in routed mode, for the first nic, the default ip is 'auto'
9467 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9468 default_ip_mode = constants.VALUE_AUTO
9470 default_ip_mode = constants.VALUE_NONE
9472 # ip validity checks
9473 ip = nic.get(constants.INIC_IP, default_ip_mode)
9474 if ip is None or ip.lower() == constants.VALUE_NONE:
9476 elif ip.lower() == constants.VALUE_AUTO:
9477 if not self.op.name_check:
9478 raise errors.OpPrereqError("IP address set to auto but name checks"
9479 " have been skipped",
9481 nic_ip = self.hostname1.ip
9483 if not netutils.IPAddress.IsValid(ip):
9484 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9488 # TODO: check the ip address for uniqueness
9489 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9490 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9493 # MAC address verification
9494 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9495 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9496 mac = utils.NormalizeAndValidateMac(mac)
9499 self.cfg.ReserveMAC(mac, self.proc.GetECId())
9500 except errors.ReservationError:
9501 raise errors.OpPrereqError("MAC address %s already in use"
9502 " in cluster" % mac,
9503 errors.ECODE_NOTUNIQUE)
9505 # Build nic parameters
9506 link = nic.get(constants.INIC_LINK, None)
9507 if link == constants.VALUE_AUTO:
9508 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9511 nicparams[constants.NIC_MODE] = nic_mode
9513 nicparams[constants.NIC_LINK] = link
9515 check_params = cluster.SimpleFillNIC(nicparams)
9516 objects.NIC.CheckParameterSyntax(check_params)
9517 self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9519 # disk checks/pre-build
9520 default_vg = self.cfg.GetVGName()
9522 for disk in self.op.disks:
9523 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9524 if mode not in constants.DISK_ACCESS_SET:
9525 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9526 mode, errors.ECODE_INVAL)
9527 size = disk.get(constants.IDISK_SIZE, None)
9529 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9532 except (TypeError, ValueError):
9533 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9536 data_vg = disk.get(constants.IDISK_VG, default_vg)
9538 constants.IDISK_SIZE: size,
9539 constants.IDISK_MODE: mode,
9540 constants.IDISK_VG: data_vg,
9542 if constants.IDISK_METAVG in disk:
9543 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9544 if constants.IDISK_ADOPT in disk:
9545 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9546 self.disks.append(new_disk)
9548 if self.op.mode == constants.INSTANCE_IMPORT:
9550 for idx in range(len(self.disks)):
9551 option = "disk%d_dump" % idx
9552 if export_info.has_option(constants.INISECT_INS, option):
9553 # FIXME: are the old os-es, disk sizes, etc. useful?
9554 export_name = export_info.get(constants.INISECT_INS, option)
9555 image = utils.PathJoin(self.op.src_path, export_name)
9556 disk_images.append(image)
9558 disk_images.append(False)
9560 self.src_images = disk_images
9562 old_name = export_info.get(constants.INISECT_INS, "name")
9563 if self.op.instance_name == old_name:
9564 for idx, nic in enumerate(self.nics):
9565 if nic.mac == constants.VALUE_AUTO:
9566 nic_mac_ini = "nic%d_mac" % idx
9567 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9569 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9571 # ip ping checks (we use the same ip that was resolved in ExpandNames)
9572 if self.op.ip_check:
9573 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9574 raise errors.OpPrereqError("IP %s of instance %s already in use" %
9575 (self.check_ip, self.op.instance_name),
9576 errors.ECODE_NOTUNIQUE)
9578 #### mac address generation
9579 # By generating here the mac address both the allocator and the hooks get
9580 # the real final mac address rather than the 'auto' or 'generate' value.
9581 # There is a race condition between the generation and the instance object
9582 # creation, which means that we know the mac is valid now, but we're not
9583 # sure it will be when we actually add the instance. If things go bad
9584 # adding the instance will abort because of a duplicate mac, and the
9585 # creation job will fail.
9586 for nic in self.nics:
9587 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9588 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9592 if self.op.iallocator is not None:
9593 self._RunAllocator()
9595 # Release all unneeded node locks
9596 _ReleaseLocks(self, locking.LEVEL_NODE,
9597 keep=filter(None, [self.op.pnode, self.op.snode,
9600 #### node related checks
9602 # check primary node
9603 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9604 assert self.pnode is not None, \
9605 "Cannot retrieve locked node %s" % self.op.pnode
9607 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9608 pnode.name, errors.ECODE_STATE)
9610 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9611 pnode.name, errors.ECODE_STATE)
9612 if not pnode.vm_capable:
9613 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9614 " '%s'" % pnode.name, errors.ECODE_STATE)
9616 self.secondaries = []
9618 # mirror node verification
9619 if self.op.disk_template in constants.DTS_INT_MIRROR:
9620 if self.op.snode == pnode.name:
9621 raise errors.OpPrereqError("The secondary node cannot be the"
9622 " primary node", errors.ECODE_INVAL)
9623 _CheckNodeOnline(self, self.op.snode)
9624 _CheckNodeNotDrained(self, self.op.snode)
9625 _CheckNodeVmCapable(self, self.op.snode)
9626 self.secondaries.append(self.op.snode)
9628 snode = self.cfg.GetNodeInfo(self.op.snode)
9629 if pnode.group != snode.group:
9630 self.LogWarning("The primary and secondary nodes are in two"
9631 " different node groups; the disk parameters"
9632 " from the first disk's node group will be"
9635 nodenames = [pnode.name] + self.secondaries
9637 # Verify instance specs
9639 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
9640 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
9641 constants.ISPEC_DISK_COUNT: len(self.disks),
9642 constants.ISPEC_DISK_SIZE: [disk.size for disk in self.disks],
9643 constants.ISPEC_NIC_COUNT: len(self.nics),
9646 ipolicy = _CalculateGroupIPolicy(cluster, pnode.group)
9647 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
9648 if not self.op.ignore_ipolicy and res:
9649 raise errors.OpPrereqError(("Instance allocation to group %s violates"
9650 " policy: %s") % (pnode.group,
9651 utils.CommaJoin(res)),
9654 # disk parameters (not customizable at instance or node level)
9655 # just use the primary node parameters, ignoring the secondary.
9656 self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9658 if not self.adopt_disks:
9659 # Check lv size requirements, if not adopting
9660 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9661 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9663 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9664 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9665 disk[constants.IDISK_ADOPT])
9666 for disk in self.disks])
9667 if len(all_lvs) != len(self.disks):
9668 raise errors.OpPrereqError("Duplicate volume names given for adoption",
9670 for lv_name in all_lvs:
9672 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9673 # to ReserveLV uses the same syntax
9674 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9675 except errors.ReservationError:
9676 raise errors.OpPrereqError("LV named %s used by another instance" %
9677 lv_name, errors.ECODE_NOTUNIQUE)
9679 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9680 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9682 node_lvs = self.rpc.call_lv_list([pnode.name],
9683 vg_names.payload.keys())[pnode.name]
9684 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9685 node_lvs = node_lvs.payload
9687 delta = all_lvs.difference(node_lvs.keys())
9689 raise errors.OpPrereqError("Missing logical volume(s): %s" %
9690 utils.CommaJoin(delta),
9692 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9694 raise errors.OpPrereqError("Online logical volumes found, cannot"
9695 " adopt: %s" % utils.CommaJoin(online_lvs),
9697 # update the size of disk based on what is found
9698 for dsk in self.disks:
9699 dsk[constants.IDISK_SIZE] = \
9700 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9701 dsk[constants.IDISK_ADOPT])][0]))
9703 elif self.op.disk_template == constants.DT_BLOCK:
9704 # Normalize and de-duplicate device paths
9705 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9706 for disk in self.disks])
9707 if len(all_disks) != len(self.disks):
9708 raise errors.OpPrereqError("Duplicate disk names given for adoption",
9710 baddisks = [d for d in all_disks
9711 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9713 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9714 " cannot be adopted" %
9715 (", ".join(baddisks),
9716 constants.ADOPTABLE_BLOCKDEV_ROOT),
9719 node_disks = self.rpc.call_bdev_sizes([pnode.name],
9720 list(all_disks))[pnode.name]
9721 node_disks.Raise("Cannot get block device information from node %s" %
9723 node_disks = node_disks.payload
9724 delta = all_disks.difference(node_disks.keys())
9726 raise errors.OpPrereqError("Missing block device(s): %s" %
9727 utils.CommaJoin(delta),
9729 for dsk in self.disks:
9730 dsk[constants.IDISK_SIZE] = \
9731 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9733 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9735 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9736 # check OS parameters (remotely)
9737 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9739 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9741 # memory check on primary node
9742 #TODO(dynmem): use MINMEM for checking
9744 _CheckNodeFreeMemory(self, self.pnode.name,
9745 "creating instance %s" % self.op.instance_name,
9746 self.be_full[constants.BE_MAXMEM],
9749 self.dry_run_result = list(nodenames)
9751 def Exec(self, feedback_fn):
9752 """Create and add the instance to the cluster.
9755 instance = self.op.instance_name
9756 pnode_name = self.pnode.name
9758 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9759 self.owned_locks(locking.LEVEL_NODE)), \
9760 "Node locks differ from node resource locks"
9762 ht_kind = self.op.hypervisor
9763 if ht_kind in constants.HTS_REQ_PORT:
9764 network_port = self.cfg.AllocatePort()
9768 disks = _GenerateDiskTemplate(self,
9769 self.op.disk_template,
9770 instance, pnode_name,
9773 self.instance_file_storage_dir,
9774 self.op.file_driver,
9779 iobj = objects.Instance(name=instance, os=self.op.os_type,
9780 primary_node=pnode_name,
9781 nics=self.nics, disks=disks,
9782 disk_template=self.op.disk_template,
9783 admin_state=constants.ADMINST_DOWN,
9784 network_port=network_port,
9785 beparams=self.op.beparams,
9786 hvparams=self.op.hvparams,
9787 hypervisor=self.op.hypervisor,
9788 osparams=self.op.osparams,
9792 for tag in self.op.tags:
9795 if self.adopt_disks:
9796 if self.op.disk_template == constants.DT_PLAIN:
9797 # rename LVs to the newly-generated names; we need to construct
9798 # 'fake' LV disks with the old data, plus the new unique_id
9799 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9801 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9802 rename_to.append(t_dsk.logical_id)
9803 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9804 self.cfg.SetDiskID(t_dsk, pnode_name)
9805 result = self.rpc.call_blockdev_rename(pnode_name,
9806 zip(tmp_disks, rename_to))
9807 result.Raise("Failed to rename adoped LVs")
9809 feedback_fn("* creating instance disks...")
9811 _CreateDisks(self, iobj)
9812 except errors.OpExecError:
9813 self.LogWarning("Device creation failed, reverting...")
9815 _RemoveDisks(self, iobj)
9817 self.cfg.ReleaseDRBDMinors(instance)
9820 feedback_fn("adding instance %s to cluster config" % instance)
9822 self.cfg.AddInstance(iobj, self.proc.GetECId())
9824 # Declare that we don't want to remove the instance lock anymore, as we've
9825 # added the instance to the config
9826 del self.remove_locks[locking.LEVEL_INSTANCE]
9828 if self.op.mode == constants.INSTANCE_IMPORT:
9829 # Release unused nodes
9830 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9833 _ReleaseLocks(self, locking.LEVEL_NODE)
9836 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9837 feedback_fn("* wiping instance disks...")
9839 _WipeDisks(self, iobj)
9840 except errors.OpExecError, err:
9841 logging.exception("Wiping disks failed")
9842 self.LogWarning("Wiping instance disks failed (%s)", err)
9846 # Something is already wrong with the disks, don't do anything else
9848 elif self.op.wait_for_sync:
9849 disk_abort = not _WaitForSync(self, iobj)
9850 elif iobj.disk_template in constants.DTS_INT_MIRROR:
9851 # make sure the disks are not degraded (still sync-ing is ok)
9852 feedback_fn("* checking mirrors status")
9853 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9858 _RemoveDisks(self, iobj)
9859 self.cfg.RemoveInstance(iobj.name)
9860 # Make sure the instance lock gets removed
9861 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9862 raise errors.OpExecError("There are some degraded disks for"
9865 # Release all node resource locks
9866 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9868 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9869 if self.op.mode == constants.INSTANCE_CREATE:
9870 if not self.op.no_install:
9871 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9872 not self.op.wait_for_sync)
9874 feedback_fn("* pausing disk sync to install instance OS")
9875 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9877 for idx, success in enumerate(result.payload):
9879 logging.warn("pause-sync of instance %s for disk %d failed",
9882 feedback_fn("* running the instance OS create scripts...")
9883 # FIXME: pass debug option from opcode to backend
9885 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9886 self.op.debug_level)
9888 feedback_fn("* resuming disk sync")
9889 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9891 for idx, success in enumerate(result.payload):
9893 logging.warn("resume-sync of instance %s for disk %d failed",
9896 os_add_result.Raise("Could not add os for instance %s"
9897 " on node %s" % (instance, pnode_name))
9899 elif self.op.mode == constants.INSTANCE_IMPORT:
9900 feedback_fn("* running the instance OS import scripts...")
9904 for idx, image in enumerate(self.src_images):
9908 # FIXME: pass debug option from opcode to backend
9909 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9910 constants.IEIO_FILE, (image, ),
9911 constants.IEIO_SCRIPT,
9912 (iobj.disks[idx], idx),
9914 transfers.append(dt)
9917 masterd.instance.TransferInstanceData(self, feedback_fn,
9918 self.op.src_node, pnode_name,
9919 self.pnode.secondary_ip,
9921 if not compat.all(import_result):
9922 self.LogWarning("Some disks for instance %s on node %s were not"
9923 " imported successfully" % (instance, pnode_name))
9925 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9926 feedback_fn("* preparing remote import...")
9927 # The source cluster will stop the instance before attempting to make a
9928 # connection. In some cases stopping an instance can take a long time,
9929 # hence the shutdown timeout is added to the connection timeout.
9930 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9931 self.op.source_shutdown_timeout)
9932 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9934 assert iobj.primary_node == self.pnode.name
9936 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9937 self.source_x509_ca,
9938 self._cds, timeouts)
9939 if not compat.all(disk_results):
9940 # TODO: Should the instance still be started, even if some disks
9941 # failed to import (valid for local imports, too)?
9942 self.LogWarning("Some disks for instance %s on node %s were not"
9943 " imported successfully" % (instance, pnode_name))
9945 # Run rename script on newly imported instance
9946 assert iobj.name == instance
9947 feedback_fn("Running rename script for %s" % instance)
9948 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9949 self.source_instance_name,
9950 self.op.debug_level)
9952 self.LogWarning("Failed to run rename script for %s on node"
9953 " %s: %s" % (instance, pnode_name, result.fail_msg))
9956 # also checked in the prereq part
9957 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9960 assert not self.owned_locks(locking.LEVEL_NODE_RES)
9963 iobj.admin_state = constants.ADMINST_UP
9964 self.cfg.Update(iobj, feedback_fn)
9965 logging.info("Starting instance %s on node %s", instance, pnode_name)
9966 feedback_fn("* starting instance...")
9967 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9969 result.Raise("Could not start instance")
9971 return list(iobj.all_nodes)
9974 class LUInstanceConsole(NoHooksLU):
9975 """Connect to an instance's console.
9977 This is somewhat special in that it returns the command line that
9978 you need to run on the master node in order to connect to the
9984 def ExpandNames(self):
9985 self.share_locks = _ShareAll()
9986 self._ExpandAndLockInstance()
9988 def CheckPrereq(self):
9989 """Check prerequisites.
9991 This checks that the instance is in the cluster.
9994 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9995 assert self.instance is not None, \
9996 "Cannot retrieve locked instance %s" % self.op.instance_name
9997 _CheckNodeOnline(self, self.instance.primary_node)
9999 def Exec(self, feedback_fn):
10000 """Connect to the console of an instance
10003 instance = self.instance
10004 node = instance.primary_node
10006 node_insts = self.rpc.call_instance_list([node],
10007 [instance.hypervisor])[node]
10008 node_insts.Raise("Can't get node information from %s" % node)
10010 if instance.name not in node_insts.payload:
10011 if instance.admin_state == constants.ADMINST_UP:
10012 state = constants.INSTST_ERRORDOWN
10013 elif instance.admin_state == constants.ADMINST_DOWN:
10014 state = constants.INSTST_ADMINDOWN
10016 state = constants.INSTST_ADMINOFFLINE
10017 raise errors.OpExecError("Instance %s is not running (state %s)" %
10018 (instance.name, state))
10020 logging.debug("Connecting to console of %s on %s", instance.name, node)
10022 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10025 def _GetInstanceConsole(cluster, instance):
10026 """Returns console information for an instance.
10028 @type cluster: L{objects.Cluster}
10029 @type instance: L{objects.Instance}
10033 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10034 # beparams and hvparams are passed separately, to avoid editing the
10035 # instance and then saving the defaults in the instance itself.
10036 hvparams = cluster.FillHV(instance)
10037 beparams = cluster.FillBE(instance)
10038 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10040 assert console.instance == instance.name
10041 assert console.Validate()
10043 return console.ToDict()
10046 class LUInstanceReplaceDisks(LogicalUnit):
10047 """Replace the disks of an instance.
10050 HPATH = "mirrors-replace"
10051 HTYPE = constants.HTYPE_INSTANCE
10054 def CheckArguments(self):
10055 TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
10056 self.op.iallocator)
10058 def ExpandNames(self):
10059 self._ExpandAndLockInstance()
10061 assert locking.LEVEL_NODE not in self.needed_locks
10062 assert locking.LEVEL_NODE_RES not in self.needed_locks
10063 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10065 assert self.op.iallocator is None or self.op.remote_node is None, \
10066 "Conflicting options"
10068 if self.op.remote_node is not None:
10069 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10071 # Warning: do not remove the locking of the new secondary here
10072 # unless DRBD8.AddChildren is changed to work in parallel;
10073 # currently it doesn't since parallel invocations of
10074 # FindUnusedMinor will conflict
10075 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10076 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10078 self.needed_locks[locking.LEVEL_NODE] = []
10079 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10081 if self.op.iallocator is not None:
10082 # iallocator will select a new node in the same group
10083 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10085 self.needed_locks[locking.LEVEL_NODE_RES] = []
10087 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10088 self.op.iallocator, self.op.remote_node,
10089 self.op.disks, False, self.op.early_release)
10091 self.tasklets = [self.replacer]
10093 def DeclareLocks(self, level):
10094 if level == locking.LEVEL_NODEGROUP:
10095 assert self.op.remote_node is None
10096 assert self.op.iallocator is not None
10097 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10099 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10100 # Lock all groups used by instance optimistically; this requires going
10101 # via the node before it's locked, requiring verification later on
10102 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10103 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10105 elif level == locking.LEVEL_NODE:
10106 if self.op.iallocator is not None:
10107 assert self.op.remote_node is None
10108 assert not self.needed_locks[locking.LEVEL_NODE]
10110 # Lock member nodes of all locked groups
10111 self.needed_locks[locking.LEVEL_NODE] = [node_name
10112 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10113 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10115 self._LockInstancesNodes()
10116 elif level == locking.LEVEL_NODE_RES:
10118 self.needed_locks[locking.LEVEL_NODE_RES] = \
10119 self.needed_locks[locking.LEVEL_NODE]
10121 def BuildHooksEnv(self):
10122 """Build hooks env.
10124 This runs on the master, the primary and all the secondaries.
10127 instance = self.replacer.instance
10129 "MODE": self.op.mode,
10130 "NEW_SECONDARY": self.op.remote_node,
10131 "OLD_SECONDARY": instance.secondary_nodes[0],
10133 env.update(_BuildInstanceHookEnvByObject(self, instance))
10136 def BuildHooksNodes(self):
10137 """Build hooks nodes.
10140 instance = self.replacer.instance
10142 self.cfg.GetMasterNode(),
10143 instance.primary_node,
10145 if self.op.remote_node is not None:
10146 nl.append(self.op.remote_node)
10149 def CheckPrereq(self):
10150 """Check prerequisites.
10153 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10154 self.op.iallocator is None)
10156 # Verify if node group locks are still correct
10157 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10159 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10161 return LogicalUnit.CheckPrereq(self)
10164 class TLReplaceDisks(Tasklet):
10165 """Replaces disks for an instance.
10167 Note: Locking is not within the scope of this class.
10170 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10171 disks, delay_iallocator, early_release):
10172 """Initializes this class.
10175 Tasklet.__init__(self, lu)
10178 self.instance_name = instance_name
10180 self.iallocator_name = iallocator_name
10181 self.remote_node = remote_node
10183 self.delay_iallocator = delay_iallocator
10184 self.early_release = early_release
10187 self.instance = None
10188 self.new_node = None
10189 self.target_node = None
10190 self.other_node = None
10191 self.remote_node_info = None
10192 self.node_secondary_ip = None
10195 def CheckArguments(mode, remote_node, iallocator):
10196 """Helper function for users of this class.
10199 # check for valid parameter combination
10200 if mode == constants.REPLACE_DISK_CHG:
10201 if remote_node is None and iallocator is None:
10202 raise errors.OpPrereqError("When changing the secondary either an"
10203 " iallocator script must be used or the"
10204 " new node given", errors.ECODE_INVAL)
10206 if remote_node is not None and iallocator is not None:
10207 raise errors.OpPrereqError("Give either the iallocator or the new"
10208 " secondary, not both", errors.ECODE_INVAL)
10210 elif remote_node is not None or iallocator is not None:
10211 # Not replacing the secondary
10212 raise errors.OpPrereqError("The iallocator and new node options can"
10213 " only be used when changing the"
10214 " secondary node", errors.ECODE_INVAL)
10217 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10218 """Compute a new secondary node using an IAllocator.
10221 ial = IAllocator(lu.cfg, lu.rpc,
10222 mode=constants.IALLOCATOR_MODE_RELOC,
10223 name=instance_name,
10224 relocate_from=list(relocate_from))
10226 ial.Run(iallocator_name)
10228 if not ial.success:
10229 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10230 " %s" % (iallocator_name, ial.info),
10231 errors.ECODE_NORES)
10233 if len(ial.result) != ial.required_nodes:
10234 raise errors.OpPrereqError("iallocator '%s' returned invalid number"
10235 " of nodes (%s), required %s" %
10237 len(ial.result), ial.required_nodes),
10238 errors.ECODE_FAULT)
10240 remote_node_name = ial.result[0]
10242 lu.LogInfo("Selected new secondary for instance '%s': %s",
10243 instance_name, remote_node_name)
10245 return remote_node_name
10247 def _FindFaultyDisks(self, node_name):
10248 """Wrapper for L{_FindFaultyInstanceDisks}.
10251 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10254 def _CheckDisksActivated(self, instance):
10255 """Checks if the instance disks are activated.
10257 @param instance: The instance to check disks
10258 @return: True if they are activated, False otherwise
10261 nodes = instance.all_nodes
10263 for idx, dev in enumerate(instance.disks):
10265 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10266 self.cfg.SetDiskID(dev, node)
10268 result = self.rpc.call_blockdev_find(node, dev)
10272 elif result.fail_msg or not result.payload:
10277 def CheckPrereq(self):
10278 """Check prerequisites.
10280 This checks that the instance is in the cluster.
10283 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10284 assert instance is not None, \
10285 "Cannot retrieve locked instance %s" % self.instance_name
10287 if instance.disk_template != constants.DT_DRBD8:
10288 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10289 " instances", errors.ECODE_INVAL)
10291 if len(instance.secondary_nodes) != 1:
10292 raise errors.OpPrereqError("The instance has a strange layout,"
10293 " expected one secondary but found %d" %
10294 len(instance.secondary_nodes),
10295 errors.ECODE_FAULT)
10297 if not self.delay_iallocator:
10298 self._CheckPrereq2()
10300 def _CheckPrereq2(self):
10301 """Check prerequisites, second part.
10303 This function should always be part of CheckPrereq. It was separated and is
10304 now called from Exec because during node evacuation iallocator was only
10305 called with an unmodified cluster model, not taking planned changes into
10309 instance = self.instance
10310 secondary_node = instance.secondary_nodes[0]
10312 if self.iallocator_name is None:
10313 remote_node = self.remote_node
10315 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
10316 instance.name, instance.secondary_nodes)
10318 if remote_node is None:
10319 self.remote_node_info = None
10321 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
10322 "Remote node '%s' is not locked" % remote_node
10324 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
10325 assert self.remote_node_info is not None, \
10326 "Cannot retrieve locked node %s" % remote_node
10328 if remote_node == self.instance.primary_node:
10329 raise errors.OpPrereqError("The specified node is the primary node of"
10330 " the instance", errors.ECODE_INVAL)
10332 if remote_node == secondary_node:
10333 raise errors.OpPrereqError("The specified node is already the"
10334 " secondary node of the instance",
10335 errors.ECODE_INVAL)
10337 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
10338 constants.REPLACE_DISK_CHG):
10339 raise errors.OpPrereqError("Cannot specify disks to be replaced",
10340 errors.ECODE_INVAL)
10342 if self.mode == constants.REPLACE_DISK_AUTO:
10343 if not self._CheckDisksActivated(instance):
10344 raise errors.OpPrereqError("Please run activate-disks on instance %s"
10345 " first" % self.instance_name,
10346 errors.ECODE_STATE)
10347 faulty_primary = self._FindFaultyDisks(instance.primary_node)
10348 faulty_secondary = self._FindFaultyDisks(secondary_node)
10350 if faulty_primary and faulty_secondary:
10351 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10352 " one node and can not be repaired"
10353 " automatically" % self.instance_name,
10354 errors.ECODE_STATE)
10357 self.disks = faulty_primary
10358 self.target_node = instance.primary_node
10359 self.other_node = secondary_node
10360 check_nodes = [self.target_node, self.other_node]
10361 elif faulty_secondary:
10362 self.disks = faulty_secondary
10363 self.target_node = secondary_node
10364 self.other_node = instance.primary_node
10365 check_nodes = [self.target_node, self.other_node]
10371 # Non-automatic modes
10372 if self.mode == constants.REPLACE_DISK_PRI:
10373 self.target_node = instance.primary_node
10374 self.other_node = secondary_node
10375 check_nodes = [self.target_node, self.other_node]
10377 elif self.mode == constants.REPLACE_DISK_SEC:
10378 self.target_node = secondary_node
10379 self.other_node = instance.primary_node
10380 check_nodes = [self.target_node, self.other_node]
10382 elif self.mode == constants.REPLACE_DISK_CHG:
10383 self.new_node = remote_node
10384 self.other_node = instance.primary_node
10385 self.target_node = secondary_node
10386 check_nodes = [self.new_node, self.other_node]
10388 _CheckNodeNotDrained(self.lu, remote_node)
10389 _CheckNodeVmCapable(self.lu, remote_node)
10391 old_node_info = self.cfg.GetNodeInfo(secondary_node)
10392 assert old_node_info is not None
10393 if old_node_info.offline and not self.early_release:
10394 # doesn't make sense to delay the release
10395 self.early_release = True
10396 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10397 " early-release mode", secondary_node)
10400 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10403 # If not specified all disks should be replaced
10405 self.disks = range(len(self.instance.disks))
10407 # TODO: compute disk parameters
10408 primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10409 secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10410 if primary_node_info.group != secondary_node_info.group:
10411 self.lu.LogInfo("The instance primary and secondary nodes are in two"
10412 " different node groups; the disk parameters of the"
10413 " primary node's group will be applied.")
10415 self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10417 for node in check_nodes:
10418 _CheckNodeOnline(self.lu, node)
10420 touched_nodes = frozenset(node_name for node_name in [self.new_node,
10423 if node_name is not None)
10425 # Release unneeded node and node resource locks
10426 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10427 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10429 # Release any owned node group
10430 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10431 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10433 # Check whether disks are valid
10434 for disk_idx in self.disks:
10435 instance.FindDisk(disk_idx)
10437 # Get secondary node IP addresses
10438 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10439 in self.cfg.GetMultiNodeInfo(touched_nodes))
10441 def Exec(self, feedback_fn):
10442 """Execute disk replacement.
10444 This dispatches the disk replacement to the appropriate handler.
10447 if self.delay_iallocator:
10448 self._CheckPrereq2()
10451 # Verify owned locks before starting operation
10452 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10453 assert set(owned_nodes) == set(self.node_secondary_ip), \
10454 ("Incorrect node locks, owning %s, expected %s" %
10455 (owned_nodes, self.node_secondary_ip.keys()))
10456 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10457 self.lu.owned_locks(locking.LEVEL_NODE_RES))
10459 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10460 assert list(owned_instances) == [self.instance_name], \
10461 "Instance '%s' not locked" % self.instance_name
10463 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10464 "Should not own any node group lock at this point"
10467 feedback_fn("No disks need replacement")
10470 feedback_fn("Replacing disk(s) %s for %s" %
10471 (utils.CommaJoin(self.disks), self.instance.name))
10473 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10475 # Activate the instance disks if we're replacing them on a down instance
10477 _StartInstanceDisks(self.lu, self.instance, True)
10480 # Should we replace the secondary node?
10481 if self.new_node is not None:
10482 fn = self._ExecDrbd8Secondary
10484 fn = self._ExecDrbd8DiskOnly
10486 result = fn(feedback_fn)
10488 # Deactivate the instance disks if we're replacing them on a
10491 _SafeShutdownInstanceDisks(self.lu, self.instance)
10493 assert not self.lu.owned_locks(locking.LEVEL_NODE)
10496 # Verify owned locks
10497 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10498 nodes = frozenset(self.node_secondary_ip)
10499 assert ((self.early_release and not owned_nodes) or
10500 (not self.early_release and not (set(owned_nodes) - nodes))), \
10501 ("Not owning the correct locks, early_release=%s, owned=%r,"
10502 " nodes=%r" % (self.early_release, owned_nodes, nodes))
10506 def _CheckVolumeGroup(self, nodes):
10507 self.lu.LogInfo("Checking volume groups")
10509 vgname = self.cfg.GetVGName()
10511 # Make sure volume group exists on all involved nodes
10512 results = self.rpc.call_vg_list(nodes)
10514 raise errors.OpExecError("Can't list volume groups on the nodes")
10517 res = results[node]
10518 res.Raise("Error checking node %s" % node)
10519 if vgname not in res.payload:
10520 raise errors.OpExecError("Volume group '%s' not found on node %s" %
10523 def _CheckDisksExistence(self, nodes):
10524 # Check disk existence
10525 for idx, dev in enumerate(self.instance.disks):
10526 if idx not in self.disks:
10530 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10531 self.cfg.SetDiskID(dev, node)
10533 result = self.rpc.call_blockdev_find(node, dev)
10535 msg = result.fail_msg
10536 if msg or not result.payload:
10538 msg = "disk not found"
10539 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10542 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10543 for idx, dev in enumerate(self.instance.disks):
10544 if idx not in self.disks:
10547 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10550 if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10552 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10553 " replace disks for instance %s" %
10554 (node_name, self.instance.name))
10556 def _CreateNewStorage(self, node_name):
10557 """Create new storage on the primary or secondary node.
10559 This is only used for same-node replaces, not for changing the
10560 secondary node, hence we don't want to modify the existing disk.
10565 for idx, dev in enumerate(self.instance.disks):
10566 if idx not in self.disks:
10569 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10571 self.cfg.SetDiskID(dev, node_name)
10573 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10574 names = _GenerateUniqueNames(self.lu, lv_names)
10576 _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10578 vg_data = dev.children[0].logical_id[0]
10579 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10580 logical_id=(vg_data, names[0]), params=data_p)
10581 vg_meta = dev.children[1].logical_id[0]
10582 lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10583 logical_id=(vg_meta, names[1]), params=meta_p)
10585 new_lvs = [lv_data, lv_meta]
10586 old_lvs = [child.Copy() for child in dev.children]
10587 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10589 # we pass force_create=True to force the LVM creation
10590 for new_lv in new_lvs:
10591 _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10592 _GetInstanceInfoText(self.instance), False)
10596 def _CheckDevices(self, node_name, iv_names):
10597 for name, (dev, _, _) in iv_names.iteritems():
10598 self.cfg.SetDiskID(dev, node_name)
10600 result = self.rpc.call_blockdev_find(node_name, dev)
10602 msg = result.fail_msg
10603 if msg or not result.payload:
10605 msg = "disk not found"
10606 raise errors.OpExecError("Can't find DRBD device %s: %s" %
10609 if result.payload.is_degraded:
10610 raise errors.OpExecError("DRBD device %s is degraded!" % name)
10612 def _RemoveOldStorage(self, node_name, iv_names):
10613 for name, (_, old_lvs, _) in iv_names.iteritems():
10614 self.lu.LogInfo("Remove logical volumes for %s" % name)
10617 self.cfg.SetDiskID(lv, node_name)
10619 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10621 self.lu.LogWarning("Can't remove old LV: %s" % msg,
10622 hint="remove unused LVs manually")
10624 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10625 """Replace a disk on the primary or secondary for DRBD 8.
10627 The algorithm for replace is quite complicated:
10629 1. for each disk to be replaced:
10631 1. create new LVs on the target node with unique names
10632 1. detach old LVs from the drbd device
10633 1. rename old LVs to name_replaced.<time_t>
10634 1. rename new LVs to old LVs
10635 1. attach the new LVs (with the old names now) to the drbd device
10637 1. wait for sync across all devices
10639 1. for each modified disk:
10641 1. remove old LVs (which have the name name_replaces.<time_t>)
10643 Failures are not very well handled.
10648 # Step: check device activation
10649 self.lu.LogStep(1, steps_total, "Check device existence")
10650 self._CheckDisksExistence([self.other_node, self.target_node])
10651 self._CheckVolumeGroup([self.target_node, self.other_node])
10653 # Step: check other node consistency
10654 self.lu.LogStep(2, steps_total, "Check peer consistency")
10655 self._CheckDisksConsistency(self.other_node,
10656 self.other_node == self.instance.primary_node,
10659 # Step: create new storage
10660 self.lu.LogStep(3, steps_total, "Allocate new storage")
10661 iv_names = self._CreateNewStorage(self.target_node)
10663 # Step: for each lv, detach+rename*2+attach
10664 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10665 for dev, old_lvs, new_lvs in iv_names.itervalues():
10666 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10668 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10670 result.Raise("Can't detach drbd from local storage on node"
10671 " %s for device %s" % (self.target_node, dev.iv_name))
10673 #cfg.Update(instance)
10675 # ok, we created the new LVs, so now we know we have the needed
10676 # storage; as such, we proceed on the target node to rename
10677 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10678 # using the assumption that logical_id == physical_id (which in
10679 # turn is the unique_id on that node)
10681 # FIXME(iustin): use a better name for the replaced LVs
10682 temp_suffix = int(time.time())
10683 ren_fn = lambda d, suff: (d.physical_id[0],
10684 d.physical_id[1] + "_replaced-%s" % suff)
10686 # Build the rename list based on what LVs exist on the node
10687 rename_old_to_new = []
10688 for to_ren in old_lvs:
10689 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10690 if not result.fail_msg and result.payload:
10692 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10694 self.lu.LogInfo("Renaming the old LVs on the target node")
10695 result = self.rpc.call_blockdev_rename(self.target_node,
10697 result.Raise("Can't rename old LVs on node %s" % self.target_node)
10699 # Now we rename the new LVs to the old LVs
10700 self.lu.LogInfo("Renaming the new LVs on the target node")
10701 rename_new_to_old = [(new, old.physical_id)
10702 for old, new in zip(old_lvs, new_lvs)]
10703 result = self.rpc.call_blockdev_rename(self.target_node,
10705 result.Raise("Can't rename new LVs on node %s" % self.target_node)
10707 # Intermediate steps of in memory modifications
10708 for old, new in zip(old_lvs, new_lvs):
10709 new.logical_id = old.logical_id
10710 self.cfg.SetDiskID(new, self.target_node)
10712 # We need to modify old_lvs so that removal later removes the
10713 # right LVs, not the newly added ones; note that old_lvs is a
10715 for disk in old_lvs:
10716 disk.logical_id = ren_fn(disk, temp_suffix)
10717 self.cfg.SetDiskID(disk, self.target_node)
10719 # Now that the new lvs have the old name, we can add them to the device
10720 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10721 result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10723 msg = result.fail_msg
10725 for new_lv in new_lvs:
10726 msg2 = self.rpc.call_blockdev_remove(self.target_node,
10729 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10730 hint=("cleanup manually the unused logical"
10732 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10734 cstep = itertools.count(5)
10736 if self.early_release:
10737 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10738 self._RemoveOldStorage(self.target_node, iv_names)
10739 # TODO: Check if releasing locks early still makes sense
10740 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10742 # Release all resource locks except those used by the instance
10743 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10744 keep=self.node_secondary_ip.keys())
10746 # Release all node locks while waiting for sync
10747 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10749 # TODO: Can the instance lock be downgraded here? Take the optional disk
10750 # shutdown in the caller into consideration.
10753 # This can fail as the old devices are degraded and _WaitForSync
10754 # does a combined result over all disks, so we don't check its return value
10755 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10756 _WaitForSync(self.lu, self.instance)
10758 # Check all devices manually
10759 self._CheckDevices(self.instance.primary_node, iv_names)
10761 # Step: remove old storage
10762 if not self.early_release:
10763 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10764 self._RemoveOldStorage(self.target_node, iv_names)
10766 def _ExecDrbd8Secondary(self, feedback_fn):
10767 """Replace the secondary node for DRBD 8.
10769 The algorithm for replace is quite complicated:
10770 - for all disks of the instance:
10771 - create new LVs on the new node with same names
10772 - shutdown the drbd device on the old secondary
10773 - disconnect the drbd network on the primary
10774 - create the drbd device on the new secondary
10775 - network attach the drbd on the primary, using an artifice:
10776 the drbd code for Attach() will connect to the network if it
10777 finds a device which is connected to the good local disks but
10778 not network enabled
10779 - wait for sync across all devices
10780 - remove all disks from the old secondary
10782 Failures are not very well handled.
10787 pnode = self.instance.primary_node
10789 # Step: check device activation
10790 self.lu.LogStep(1, steps_total, "Check device existence")
10791 self._CheckDisksExistence([self.instance.primary_node])
10792 self._CheckVolumeGroup([self.instance.primary_node])
10794 # Step: check other node consistency
10795 self.lu.LogStep(2, steps_total, "Check peer consistency")
10796 self._CheckDisksConsistency(self.instance.primary_node, True, True)
10798 # Step: create new storage
10799 self.lu.LogStep(3, steps_total, "Allocate new storage")
10800 for idx, dev in enumerate(self.instance.disks):
10801 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10802 (self.new_node, idx))
10803 # we pass force_create=True to force LVM creation
10804 for new_lv in dev.children:
10805 _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10806 _GetInstanceInfoText(self.instance), False)
10808 # Step 4: dbrd minors and drbd setups changes
10809 # after this, we must manually remove the drbd minors on both the
10810 # error and the success paths
10811 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10812 minors = self.cfg.AllocateDRBDMinor([self.new_node
10813 for dev in self.instance.disks],
10814 self.instance.name)
10815 logging.debug("Allocated minors %r", minors)
10818 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10819 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10820 (self.new_node, idx))
10821 # create new devices on new_node; note that we create two IDs:
10822 # one without port, so the drbd will be activated without
10823 # networking information on the new node at this stage, and one
10824 # with network, for the latter activation in step 4
10825 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10826 if self.instance.primary_node == o_node1:
10829 assert self.instance.primary_node == o_node2, "Three-node instance?"
10832 new_alone_id = (self.instance.primary_node, self.new_node, None,
10833 p_minor, new_minor, o_secret)
10834 new_net_id = (self.instance.primary_node, self.new_node, o_port,
10835 p_minor, new_minor, o_secret)
10837 iv_names[idx] = (dev, dev.children, new_net_id)
10838 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10840 drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10841 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10842 logical_id=new_alone_id,
10843 children=dev.children,
10845 params=drbd_params)
10847 _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10848 _GetInstanceInfoText(self.instance), False)
10849 except errors.GenericError:
10850 self.cfg.ReleaseDRBDMinors(self.instance.name)
10853 # We have new devices, shutdown the drbd on the old secondary
10854 for idx, dev in enumerate(self.instance.disks):
10855 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10856 self.cfg.SetDiskID(dev, self.target_node)
10857 msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10859 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10860 "node: %s" % (idx, msg),
10861 hint=("Please cleanup this device manually as"
10862 " soon as possible"))
10864 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10865 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10866 self.instance.disks)[pnode]
10868 msg = result.fail_msg
10870 # detaches didn't succeed (unlikely)
10871 self.cfg.ReleaseDRBDMinors(self.instance.name)
10872 raise errors.OpExecError("Can't detach the disks from the network on"
10873 " old node: %s" % (msg,))
10875 # if we managed to detach at least one, we update all the disks of
10876 # the instance to point to the new secondary
10877 self.lu.LogInfo("Updating instance configuration")
10878 for dev, _, new_logical_id in iv_names.itervalues():
10879 dev.logical_id = new_logical_id
10880 self.cfg.SetDiskID(dev, self.instance.primary_node)
10882 self.cfg.Update(self.instance, feedback_fn)
10884 # Release all node locks (the configuration has been updated)
10885 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10887 # and now perform the drbd attach
10888 self.lu.LogInfo("Attaching primary drbds to new secondary"
10889 " (standalone => connected)")
10890 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10892 self.node_secondary_ip,
10893 self.instance.disks,
10894 self.instance.name,
10896 for to_node, to_result in result.items():
10897 msg = to_result.fail_msg
10899 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10901 hint=("please do a gnt-instance info to see the"
10902 " status of disks"))
10904 cstep = itertools.count(5)
10906 if self.early_release:
10907 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10908 self._RemoveOldStorage(self.target_node, iv_names)
10909 # TODO: Check if releasing locks early still makes sense
10910 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10912 # Release all resource locks except those used by the instance
10913 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10914 keep=self.node_secondary_ip.keys())
10916 # TODO: Can the instance lock be downgraded here? Take the optional disk
10917 # shutdown in the caller into consideration.
10920 # This can fail as the old devices are degraded and _WaitForSync
10921 # does a combined result over all disks, so we don't check its return value
10922 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10923 _WaitForSync(self.lu, self.instance)
10925 # Check all devices manually
10926 self._CheckDevices(self.instance.primary_node, iv_names)
10928 # Step: remove old storage
10929 if not self.early_release:
10930 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10931 self._RemoveOldStorage(self.target_node, iv_names)
10934 class LURepairNodeStorage(NoHooksLU):
10935 """Repairs the volume group on a node.
10940 def CheckArguments(self):
10941 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10943 storage_type = self.op.storage_type
10945 if (constants.SO_FIX_CONSISTENCY not in
10946 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10947 raise errors.OpPrereqError("Storage units of type '%s' can not be"
10948 " repaired" % storage_type,
10949 errors.ECODE_INVAL)
10951 def ExpandNames(self):
10952 self.needed_locks = {
10953 locking.LEVEL_NODE: [self.op.node_name],
10956 def _CheckFaultyDisks(self, instance, node_name):
10957 """Ensure faulty disks abort the opcode or at least warn."""
10959 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10961 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10962 " node '%s'" % (instance.name, node_name),
10963 errors.ECODE_STATE)
10964 except errors.OpPrereqError, err:
10965 if self.op.ignore_consistency:
10966 self.proc.LogWarning(str(err.args[0]))
10970 def CheckPrereq(self):
10971 """Check prerequisites.
10974 # Check whether any instance on this node has faulty disks
10975 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10976 if inst.admin_state != constants.ADMINST_UP:
10978 check_nodes = set(inst.all_nodes)
10979 check_nodes.discard(self.op.node_name)
10980 for inst_node_name in check_nodes:
10981 self._CheckFaultyDisks(inst, inst_node_name)
10983 def Exec(self, feedback_fn):
10984 feedback_fn("Repairing storage unit '%s' on %s ..." %
10985 (self.op.name, self.op.node_name))
10987 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10988 result = self.rpc.call_storage_execute(self.op.node_name,
10989 self.op.storage_type, st_args,
10991 constants.SO_FIX_CONSISTENCY)
10992 result.Raise("Failed to repair storage unit '%s' on %s" %
10993 (self.op.name, self.op.node_name))
10996 class LUNodeEvacuate(NoHooksLU):
10997 """Evacuates instances off a list of nodes.
11002 _MODE2IALLOCATOR = {
11003 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11004 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11005 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11007 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11008 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11009 constants.IALLOCATOR_NEVAC_MODES)
11011 def CheckArguments(self):
11012 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11014 def ExpandNames(self):
11015 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11017 if self.op.remote_node is not None:
11018 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11019 assert self.op.remote_node
11021 if self.op.remote_node == self.op.node_name:
11022 raise errors.OpPrereqError("Can not use evacuated node as a new"
11023 " secondary node", errors.ECODE_INVAL)
11025 if self.op.mode != constants.NODE_EVAC_SEC:
11026 raise errors.OpPrereqError("Without the use of an iallocator only"
11027 " secondary instances can be evacuated",
11028 errors.ECODE_INVAL)
11031 self.share_locks = _ShareAll()
11032 self.needed_locks = {
11033 locking.LEVEL_INSTANCE: [],
11034 locking.LEVEL_NODEGROUP: [],
11035 locking.LEVEL_NODE: [],
11038 # Determine nodes (via group) optimistically, needs verification once locks
11039 # have been acquired
11040 self.lock_nodes = self._DetermineNodes()
11042 def _DetermineNodes(self):
11043 """Gets the list of nodes to operate on.
11046 if self.op.remote_node is None:
11047 # Iallocator will choose any node(s) in the same group
11048 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11050 group_nodes = frozenset([self.op.remote_node])
11052 # Determine nodes to be locked
11053 return set([self.op.node_name]) | group_nodes
11055 def _DetermineInstances(self):
11056 """Builds list of instances to operate on.
11059 assert self.op.mode in constants.NODE_EVAC_MODES
11061 if self.op.mode == constants.NODE_EVAC_PRI:
11062 # Primary instances only
11063 inst_fn = _GetNodePrimaryInstances
11064 assert self.op.remote_node is None, \
11065 "Evacuating primary instances requires iallocator"
11066 elif self.op.mode == constants.NODE_EVAC_SEC:
11067 # Secondary instances only
11068 inst_fn = _GetNodeSecondaryInstances
11071 assert self.op.mode == constants.NODE_EVAC_ALL
11072 inst_fn = _GetNodeInstances
11073 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11075 raise errors.OpPrereqError("Due to an issue with the iallocator"
11076 " interface it is not possible to evacuate"
11077 " all instances at once; specify explicitly"
11078 " whether to evacuate primary or secondary"
11080 errors.ECODE_INVAL)
11082 return inst_fn(self.cfg, self.op.node_name)
11084 def DeclareLocks(self, level):
11085 if level == locking.LEVEL_INSTANCE:
11086 # Lock instances optimistically, needs verification once node and group
11087 # locks have been acquired
11088 self.needed_locks[locking.LEVEL_INSTANCE] = \
11089 set(i.name for i in self._DetermineInstances())
11091 elif level == locking.LEVEL_NODEGROUP:
11092 # Lock node groups for all potential target nodes optimistically, needs
11093 # verification once nodes have been acquired
11094 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11095 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11097 elif level == locking.LEVEL_NODE:
11098 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11100 def CheckPrereq(self):
11102 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11103 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11104 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11106 need_nodes = self._DetermineNodes()
11108 if not owned_nodes.issuperset(need_nodes):
11109 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11110 " locks were acquired, current nodes are"
11111 " are '%s', used to be '%s'; retry the"
11113 (self.op.node_name,
11114 utils.CommaJoin(need_nodes),
11115 utils.CommaJoin(owned_nodes)),
11116 errors.ECODE_STATE)
11118 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11119 if owned_groups != wanted_groups:
11120 raise errors.OpExecError("Node groups changed since locks were acquired,"
11121 " current groups are '%s', used to be '%s';"
11122 " retry the operation" %
11123 (utils.CommaJoin(wanted_groups),
11124 utils.CommaJoin(owned_groups)))
11126 # Determine affected instances
11127 self.instances = self._DetermineInstances()
11128 self.instance_names = [i.name for i in self.instances]
11130 if set(self.instance_names) != owned_instances:
11131 raise errors.OpExecError("Instances on node '%s' changed since locks"
11132 " were acquired, current instances are '%s',"
11133 " used to be '%s'; retry the operation" %
11134 (self.op.node_name,
11135 utils.CommaJoin(self.instance_names),
11136 utils.CommaJoin(owned_instances)))
11138 if self.instance_names:
11139 self.LogInfo("Evacuating instances from node '%s': %s",
11141 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11143 self.LogInfo("No instances to evacuate from node '%s'",
11146 if self.op.remote_node is not None:
11147 for i in self.instances:
11148 if i.primary_node == self.op.remote_node:
11149 raise errors.OpPrereqError("Node %s is the primary node of"
11150 " instance %s, cannot use it as"
11152 (self.op.remote_node, i.name),
11153 errors.ECODE_INVAL)
11155 def Exec(self, feedback_fn):
11156 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11158 if not self.instance_names:
11159 # No instances to evacuate
11162 elif self.op.iallocator is not None:
11163 # TODO: Implement relocation to other group
11164 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
11165 evac_mode=self._MODE2IALLOCATOR[self.op.mode],
11166 instances=list(self.instance_names))
11168 ial.Run(self.op.iallocator)
11170 if not ial.success:
11171 raise errors.OpPrereqError("Can't compute node evacuation using"
11172 " iallocator '%s': %s" %
11173 (self.op.iallocator, ial.info),
11174 errors.ECODE_NORES)
11176 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11178 elif self.op.remote_node is not None:
11179 assert self.op.mode == constants.NODE_EVAC_SEC
11181 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11182 remote_node=self.op.remote_node,
11184 mode=constants.REPLACE_DISK_CHG,
11185 early_release=self.op.early_release)]
11186 for instance_name in self.instance_names
11190 raise errors.ProgrammerError("No iallocator or remote node")
11192 return ResultWithJobs(jobs)
11195 def _SetOpEarlyRelease(early_release, op):
11196 """Sets C{early_release} flag on opcodes if available.
11200 op.early_release = early_release
11201 except AttributeError:
11202 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11207 def _NodeEvacDest(use_nodes, group, nodes):
11208 """Returns group or nodes depending on caller's choice.
11212 return utils.CommaJoin(nodes)
11217 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11218 """Unpacks the result of change-group and node-evacuate iallocator requests.
11220 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11221 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11223 @type lu: L{LogicalUnit}
11224 @param lu: Logical unit instance
11225 @type alloc_result: tuple/list
11226 @param alloc_result: Result from iallocator
11227 @type early_release: bool
11228 @param early_release: Whether to release locks early if possible
11229 @type use_nodes: bool
11230 @param use_nodes: Whether to display node names instead of groups
11233 (moved, failed, jobs) = alloc_result
11236 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11237 for (name, reason) in failed)
11238 lu.LogWarning("Unable to evacuate instances %s", failreason)
11239 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11242 lu.LogInfo("Instances to be moved: %s",
11243 utils.CommaJoin("%s (to %s)" %
11244 (name, _NodeEvacDest(use_nodes, group, nodes))
11245 for (name, group, nodes) in moved))
11247 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11248 map(opcodes.OpCode.LoadOpCode, ops))
11252 class LUInstanceGrowDisk(LogicalUnit):
11253 """Grow a disk of an instance.
11256 HPATH = "disk-grow"
11257 HTYPE = constants.HTYPE_INSTANCE
11260 def ExpandNames(self):
11261 self._ExpandAndLockInstance()
11262 self.needed_locks[locking.LEVEL_NODE] = []
11263 self.needed_locks[locking.LEVEL_NODE_RES] = []
11264 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11266 def DeclareLocks(self, level):
11267 if level == locking.LEVEL_NODE:
11268 self._LockInstancesNodes()
11269 elif level == locking.LEVEL_NODE_RES:
11271 self.needed_locks[locking.LEVEL_NODE_RES] = \
11272 self.needed_locks[locking.LEVEL_NODE][:]
11274 def BuildHooksEnv(self):
11275 """Build hooks env.
11277 This runs on the master, the primary and all the secondaries.
11281 "DISK": self.op.disk,
11282 "AMOUNT": self.op.amount,
11284 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11287 def BuildHooksNodes(self):
11288 """Build hooks nodes.
11291 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11294 def CheckPrereq(self):
11295 """Check prerequisites.
11297 This checks that the instance is in the cluster.
11300 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11301 assert instance is not None, \
11302 "Cannot retrieve locked instance %s" % self.op.instance_name
11303 nodenames = list(instance.all_nodes)
11304 for node in nodenames:
11305 _CheckNodeOnline(self, node)
11307 self.instance = instance
11309 if instance.disk_template not in constants.DTS_GROWABLE:
11310 raise errors.OpPrereqError("Instance's disk layout does not support"
11311 " growing", errors.ECODE_INVAL)
11313 self.disk = instance.FindDisk(self.op.disk)
11315 if instance.disk_template not in (constants.DT_FILE,
11316 constants.DT_SHARED_FILE):
11317 # TODO: check the free disk space for file, when that feature will be
11319 _CheckNodesFreeDiskPerVG(self, nodenames,
11320 self.disk.ComputeGrowth(self.op.amount))
11322 def Exec(self, feedback_fn):
11323 """Execute disk grow.
11326 instance = self.instance
11329 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11330 assert (self.owned_locks(locking.LEVEL_NODE) ==
11331 self.owned_locks(locking.LEVEL_NODE_RES))
11333 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
11335 raise errors.OpExecError("Cannot activate block device to grow")
11337 feedback_fn("Growing disk %s of instance '%s' by %s" %
11338 (self.op.disk, instance.name,
11339 utils.FormatUnit(self.op.amount, "h")))
11341 # First run all grow ops in dry-run mode
11342 for node in instance.all_nodes:
11343 self.cfg.SetDiskID(disk, node)
11344 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
11345 result.Raise("Grow request failed to node %s" % node)
11347 # We know that (as far as we can test) operations across different
11348 # nodes will succeed, time to run it for real
11349 for node in instance.all_nodes:
11350 self.cfg.SetDiskID(disk, node)
11351 result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11352 result.Raise("Grow request failed to node %s" % node)
11354 # TODO: Rewrite code to work properly
11355 # DRBD goes into sync mode for a short amount of time after executing the
11356 # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11357 # calling "resize" in sync mode fails. Sleeping for a short amount of
11358 # time is a work-around.
11361 disk.RecordGrow(self.op.amount)
11362 self.cfg.Update(instance, feedback_fn)
11364 # Changes have been recorded, release node lock
11365 _ReleaseLocks(self, locking.LEVEL_NODE)
11367 # Downgrade lock while waiting for sync
11368 self.glm.downgrade(locking.LEVEL_INSTANCE)
11370 if self.op.wait_for_sync:
11371 disk_abort = not _WaitForSync(self, instance, disks=[disk])
11373 self.proc.LogWarning("Disk sync-ing has not returned a good"
11374 " status; please check the instance")
11375 if instance.admin_state != constants.ADMINST_UP:
11376 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11377 elif instance.admin_state != constants.ADMINST_UP:
11378 self.proc.LogWarning("Not shutting down the disk even if the instance is"
11379 " not supposed to be running because no wait for"
11380 " sync mode was requested")
11382 assert self.owned_locks(locking.LEVEL_NODE_RES)
11383 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11386 class LUInstanceQueryData(NoHooksLU):
11387 """Query runtime instance data.
11392 def ExpandNames(self):
11393 self.needed_locks = {}
11395 # Use locking if requested or when non-static information is wanted
11396 if not (self.op.static or self.op.use_locking):
11397 self.LogWarning("Non-static data requested, locks need to be acquired")
11398 self.op.use_locking = True
11400 if self.op.instances or not self.op.use_locking:
11401 # Expand instance names right here
11402 self.wanted_names = _GetWantedInstances(self, self.op.instances)
11404 # Will use acquired locks
11405 self.wanted_names = None
11407 if self.op.use_locking:
11408 self.share_locks = _ShareAll()
11410 if self.wanted_names is None:
11411 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11413 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11415 self.needed_locks[locking.LEVEL_NODE] = []
11416 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11418 def DeclareLocks(self, level):
11419 if self.op.use_locking and level == locking.LEVEL_NODE:
11420 self._LockInstancesNodes()
11422 def CheckPrereq(self):
11423 """Check prerequisites.
11425 This only checks the optional instance list against the existing names.
11428 if self.wanted_names is None:
11429 assert self.op.use_locking, "Locking was not used"
11430 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11432 self.wanted_instances = \
11433 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11435 def _ComputeBlockdevStatus(self, node, instance_name, dev):
11436 """Returns the status of a block device
11439 if self.op.static or not node:
11442 self.cfg.SetDiskID(dev, node)
11444 result = self.rpc.call_blockdev_find(node, dev)
11448 result.Raise("Can't compute disk status for %s" % instance_name)
11450 status = result.payload
11454 return (status.dev_path, status.major, status.minor,
11455 status.sync_percent, status.estimated_time,
11456 status.is_degraded, status.ldisk_status)
11458 def _ComputeDiskStatus(self, instance, snode, dev):
11459 """Compute block device status.
11462 if dev.dev_type in constants.LDS_DRBD:
11463 # we change the snode then (otherwise we use the one passed in)
11464 if dev.logical_id[0] == instance.primary_node:
11465 snode = dev.logical_id[1]
11467 snode = dev.logical_id[0]
11469 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11470 instance.name, dev)
11471 dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11474 dev_children = map(compat.partial(self._ComputeDiskStatus,
11481 "iv_name": dev.iv_name,
11482 "dev_type": dev.dev_type,
11483 "logical_id": dev.logical_id,
11484 "physical_id": dev.physical_id,
11485 "pstatus": dev_pstatus,
11486 "sstatus": dev_sstatus,
11487 "children": dev_children,
11492 def Exec(self, feedback_fn):
11493 """Gather and return data"""
11496 cluster = self.cfg.GetClusterInfo()
11498 pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11499 for i in self.wanted_instances)
11500 for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11501 if self.op.static or pnode.offline:
11502 remote_state = None
11504 self.LogWarning("Primary node %s is marked offline, returning static"
11505 " information only for instance %s" %
11506 (pnode.name, instance.name))
11508 remote_info = self.rpc.call_instance_info(instance.primary_node,
11510 instance.hypervisor)
11511 remote_info.Raise("Error checking node %s" % instance.primary_node)
11512 remote_info = remote_info.payload
11513 if remote_info and "state" in remote_info:
11514 remote_state = "up"
11516 if instance.admin_state == constants.ADMINST_UP:
11517 remote_state = "down"
11519 remote_state = instance.admin_state
11521 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11524 result[instance.name] = {
11525 "name": instance.name,
11526 "config_state": instance.admin_state,
11527 "run_state": remote_state,
11528 "pnode": instance.primary_node,
11529 "snodes": instance.secondary_nodes,
11531 # this happens to be the same format used for hooks
11532 "nics": _NICListToTuple(self, instance.nics),
11533 "disk_template": instance.disk_template,
11535 "hypervisor": instance.hypervisor,
11536 "network_port": instance.network_port,
11537 "hv_instance": instance.hvparams,
11538 "hv_actual": cluster.FillHV(instance, skip_globals=True),
11539 "be_instance": instance.beparams,
11540 "be_actual": cluster.FillBE(instance),
11541 "os_instance": instance.osparams,
11542 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11543 "serial_no": instance.serial_no,
11544 "mtime": instance.mtime,
11545 "ctime": instance.ctime,
11546 "uuid": instance.uuid,
11552 class LUInstanceSetParams(LogicalUnit):
11553 """Modifies an instances's parameters.
11556 HPATH = "instance-modify"
11557 HTYPE = constants.HTYPE_INSTANCE
11560 def CheckArguments(self):
11561 if not (self.op.nics or self.op.disks or self.op.disk_template or
11562 self.op.hvparams or self.op.beparams or self.op.os_name or
11563 self.op.online_inst or self.op.offline_inst):
11564 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11566 if self.op.hvparams:
11567 _CheckGlobalHvParams(self.op.hvparams)
11571 for disk_op, disk_dict in self.op.disks:
11572 utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11573 if disk_op == constants.DDM_REMOVE:
11574 disk_addremove += 1
11576 elif disk_op == constants.DDM_ADD:
11577 disk_addremove += 1
11579 if not isinstance(disk_op, int):
11580 raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11581 if not isinstance(disk_dict, dict):
11582 msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11583 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11585 if disk_op == constants.DDM_ADD:
11586 mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11587 if mode not in constants.DISK_ACCESS_SET:
11588 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11589 errors.ECODE_INVAL)
11590 size = disk_dict.get(constants.IDISK_SIZE, None)
11592 raise errors.OpPrereqError("Required disk parameter size missing",
11593 errors.ECODE_INVAL)
11596 except (TypeError, ValueError), err:
11597 raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11598 str(err), errors.ECODE_INVAL)
11599 disk_dict[constants.IDISK_SIZE] = size
11601 # modification of disk
11602 if constants.IDISK_SIZE in disk_dict:
11603 raise errors.OpPrereqError("Disk size change not possible, use"
11604 " grow-disk", errors.ECODE_INVAL)
11606 if disk_addremove > 1:
11607 raise errors.OpPrereqError("Only one disk add or remove operation"
11608 " supported at a time", errors.ECODE_INVAL)
11610 if self.op.disks and self.op.disk_template is not None:
11611 raise errors.OpPrereqError("Disk template conversion and other disk"
11612 " changes not supported at the same time",
11613 errors.ECODE_INVAL)
11615 if (self.op.disk_template and
11616 self.op.disk_template in constants.DTS_INT_MIRROR and
11617 self.op.remote_node is None):
11618 raise errors.OpPrereqError("Changing the disk template to a mirrored"
11619 " one requires specifying a secondary node",
11620 errors.ECODE_INVAL)
11624 for nic_op, nic_dict in self.op.nics:
11625 utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11626 if nic_op == constants.DDM_REMOVE:
11629 elif nic_op == constants.DDM_ADD:
11632 if not isinstance(nic_op, int):
11633 raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11634 if not isinstance(nic_dict, dict):
11635 msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11636 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11638 # nic_dict should be a dict
11639 nic_ip = nic_dict.get(constants.INIC_IP, None)
11640 if nic_ip is not None:
11641 if nic_ip.lower() == constants.VALUE_NONE:
11642 nic_dict[constants.INIC_IP] = None
11644 if not netutils.IPAddress.IsValid(nic_ip):
11645 raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11646 errors.ECODE_INVAL)
11648 nic_bridge = nic_dict.get("bridge", None)
11649 nic_link = nic_dict.get(constants.INIC_LINK, None)
11650 if nic_bridge and nic_link:
11651 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11652 " at the same time", errors.ECODE_INVAL)
11653 elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11654 nic_dict["bridge"] = None
11655 elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11656 nic_dict[constants.INIC_LINK] = None
11658 if nic_op == constants.DDM_ADD:
11659 nic_mac = nic_dict.get(constants.INIC_MAC, None)
11660 if nic_mac is None:
11661 nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11663 if constants.INIC_MAC in nic_dict:
11664 nic_mac = nic_dict[constants.INIC_MAC]
11665 if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11666 nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11668 if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11669 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11670 " modifying an existing nic",
11671 errors.ECODE_INVAL)
11673 if nic_addremove > 1:
11674 raise errors.OpPrereqError("Only one NIC add or remove operation"
11675 " supported at a time", errors.ECODE_INVAL)
11677 def ExpandNames(self):
11678 self._ExpandAndLockInstance()
11679 # Can't even acquire node locks in shared mode as upcoming changes in
11680 # Ganeti 2.6 will start to modify the node object on disk conversion
11681 self.needed_locks[locking.LEVEL_NODE] = []
11682 self.needed_locks[locking.LEVEL_NODE_RES] = []
11683 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11685 def DeclareLocks(self, level):
11686 if level == locking.LEVEL_NODE:
11687 self._LockInstancesNodes()
11688 if self.op.disk_template and self.op.remote_node:
11689 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11690 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11691 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11693 self.needed_locks[locking.LEVEL_NODE_RES] = \
11694 self.needed_locks[locking.LEVEL_NODE][:]
11696 def BuildHooksEnv(self):
11697 """Build hooks env.
11699 This runs on the master, primary and secondaries.
11703 if constants.BE_MINMEM in self.be_new:
11704 args["minmem"] = self.be_new[constants.BE_MINMEM]
11705 if constants.BE_MAXMEM in self.be_new:
11706 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11707 if constants.BE_VCPUS in self.be_new:
11708 args["vcpus"] = self.be_new[constants.BE_VCPUS]
11709 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11710 # information at all.
11713 nic_override = dict(self.op.nics)
11714 for idx, nic in enumerate(self.instance.nics):
11715 if idx in nic_override:
11716 this_nic_override = nic_override[idx]
11718 this_nic_override = {}
11719 if constants.INIC_IP in this_nic_override:
11720 ip = this_nic_override[constants.INIC_IP]
11723 if constants.INIC_MAC in this_nic_override:
11724 mac = this_nic_override[constants.INIC_MAC]
11727 if idx in self.nic_pnew:
11728 nicparams = self.nic_pnew[idx]
11730 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11731 mode = nicparams[constants.NIC_MODE]
11732 link = nicparams[constants.NIC_LINK]
11733 args["nics"].append((ip, mac, mode, link))
11734 if constants.DDM_ADD in nic_override:
11735 ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11736 mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11737 nicparams = self.nic_pnew[constants.DDM_ADD]
11738 mode = nicparams[constants.NIC_MODE]
11739 link = nicparams[constants.NIC_LINK]
11740 args["nics"].append((ip, mac, mode, link))
11741 elif constants.DDM_REMOVE in nic_override:
11742 del args["nics"][-1]
11744 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11745 if self.op.disk_template:
11746 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11750 def BuildHooksNodes(self):
11751 """Build hooks nodes.
11754 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11757 def CheckPrereq(self):
11758 """Check prerequisites.
11760 This only checks the instance list against the existing names.
11763 # checking the new params on the primary/secondary nodes
11765 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11766 cluster = self.cluster = self.cfg.GetClusterInfo()
11767 assert self.instance is not None, \
11768 "Cannot retrieve locked instance %s" % self.op.instance_name
11769 pnode = instance.primary_node
11770 nodelist = list(instance.all_nodes)
11771 pnode_info = self.cfg.GetNodeInfo(pnode)
11772 self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11775 if self.op.os_name and not self.op.force:
11776 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11777 self.op.force_variant)
11778 instance_os = self.op.os_name
11780 instance_os = instance.os
11782 if self.op.disk_template:
11783 if instance.disk_template == self.op.disk_template:
11784 raise errors.OpPrereqError("Instance already has disk template %s" %
11785 instance.disk_template, errors.ECODE_INVAL)
11787 if (instance.disk_template,
11788 self.op.disk_template) not in self._DISK_CONVERSIONS:
11789 raise errors.OpPrereqError("Unsupported disk template conversion from"
11790 " %s to %s" % (instance.disk_template,
11791 self.op.disk_template),
11792 errors.ECODE_INVAL)
11793 _CheckInstanceState(self, instance, INSTANCE_DOWN,
11794 msg="cannot change disk template")
11795 if self.op.disk_template in constants.DTS_INT_MIRROR:
11796 if self.op.remote_node == pnode:
11797 raise errors.OpPrereqError("Given new secondary node %s is the same"
11798 " as the primary node of the instance" %
11799 self.op.remote_node, errors.ECODE_STATE)
11800 _CheckNodeOnline(self, self.op.remote_node)
11801 _CheckNodeNotDrained(self, self.op.remote_node)
11802 # FIXME: here we assume that the old instance type is DT_PLAIN
11803 assert instance.disk_template == constants.DT_PLAIN
11804 disks = [{constants.IDISK_SIZE: d.size,
11805 constants.IDISK_VG: d.logical_id[0]}
11806 for d in instance.disks]
11807 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11808 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11810 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11811 ipolicy = _CalculateGroupIPolicy(cluster, snode_info.group)
11812 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
11813 ignore=self.op.ignore_ipolicy)
11814 if pnode_info.group != snode_info.group:
11815 self.LogWarning("The primary and secondary nodes are in two"
11816 " different node groups; the disk parameters"
11817 " from the first disk's node group will be"
11820 # hvparams processing
11821 if self.op.hvparams:
11822 hv_type = instance.hypervisor
11823 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11824 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11825 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11828 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11829 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11830 self.hv_proposed = self.hv_new = hv_new # the new actual values
11831 self.hv_inst = i_hvdict # the new dict (without defaults)
11833 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11835 self.hv_new = self.hv_inst = {}
11837 # beparams processing
11838 if self.op.beparams:
11839 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11841 objects.UpgradeBeParams(i_bedict)
11842 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11843 be_new = cluster.SimpleFillBE(i_bedict)
11844 self.be_proposed = self.be_new = be_new # the new actual values
11845 self.be_inst = i_bedict # the new dict (without defaults)
11847 self.be_new = self.be_inst = {}
11848 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11849 be_old = cluster.FillBE(instance)
11851 # CPU param validation -- checking every time a paramtere is
11852 # changed to cover all cases where either CPU mask or vcpus have
11854 if (constants.BE_VCPUS in self.be_proposed and
11855 constants.HV_CPU_MASK in self.hv_proposed):
11857 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11858 # Verify mask is consistent with number of vCPUs. Can skip this
11859 # test if only 1 entry in the CPU mask, which means same mask
11860 # is applied to all vCPUs.
11861 if (len(cpu_list) > 1 and
11862 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11863 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11865 (self.be_proposed[constants.BE_VCPUS],
11866 self.hv_proposed[constants.HV_CPU_MASK]),
11867 errors.ECODE_INVAL)
11869 # Only perform this test if a new CPU mask is given
11870 if constants.HV_CPU_MASK in self.hv_new:
11871 # Calculate the largest CPU number requested
11872 max_requested_cpu = max(map(max, cpu_list))
11873 # Check that all of the instance's nodes have enough physical CPUs to
11874 # satisfy the requested CPU mask
11875 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11876 max_requested_cpu + 1, instance.hypervisor)
11878 # osparams processing
11879 if self.op.osparams:
11880 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11881 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11882 self.os_inst = i_osdict # the new dict (without defaults)
11888 #TODO(dynmem): do the appropriate check involving MINMEM
11889 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11890 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11891 mem_check_list = [pnode]
11892 if be_new[constants.BE_AUTO_BALANCE]:
11893 # either we changed auto_balance to yes or it was from before
11894 mem_check_list.extend(instance.secondary_nodes)
11895 instance_info = self.rpc.call_instance_info(pnode, instance.name,
11896 instance.hypervisor)
11897 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11898 [instance.hypervisor])
11899 pninfo = nodeinfo[pnode]
11900 msg = pninfo.fail_msg
11902 # Assume the primary node is unreachable and go ahead
11903 self.warn.append("Can't get info from primary node %s: %s" %
11906 (_, _, (pnhvinfo, )) = pninfo.payload
11907 if not isinstance(pnhvinfo.get("memory_free", None), int):
11908 self.warn.append("Node data from primary node %s doesn't contain"
11909 " free memory information" % pnode)
11910 elif instance_info.fail_msg:
11911 self.warn.append("Can't get instance runtime information: %s" %
11912 instance_info.fail_msg)
11914 if instance_info.payload:
11915 current_mem = int(instance_info.payload["memory"])
11917 # Assume instance not running
11918 # (there is a slight race condition here, but it's not very
11919 # probable, and we have no other way to check)
11920 # TODO: Describe race condition
11922 #TODO(dynmem): do the appropriate check involving MINMEM
11923 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11924 pnhvinfo["memory_free"])
11926 raise errors.OpPrereqError("This change will prevent the instance"
11927 " from starting, due to %d MB of memory"
11928 " missing on its primary node" %
11930 errors.ECODE_NORES)
11932 if be_new[constants.BE_AUTO_BALANCE]:
11933 for node, nres in nodeinfo.items():
11934 if node not in instance.secondary_nodes:
11936 nres.Raise("Can't get info from secondary node %s" % node,
11937 prereq=True, ecode=errors.ECODE_STATE)
11938 (_, _, (nhvinfo, )) = nres.payload
11939 if not isinstance(nhvinfo.get("memory_free", None), int):
11940 raise errors.OpPrereqError("Secondary node %s didn't return free"
11941 " memory information" % node,
11942 errors.ECODE_STATE)
11943 #TODO(dynmem): do the appropriate check involving MINMEM
11944 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11945 raise errors.OpPrereqError("This change will prevent the instance"
11946 " from failover to its secondary node"
11947 " %s, due to not enough memory" % node,
11948 errors.ECODE_STATE)
11952 self.nic_pinst = {}
11953 for nic_op, nic_dict in self.op.nics:
11954 if nic_op == constants.DDM_REMOVE:
11955 if not instance.nics:
11956 raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11957 errors.ECODE_INVAL)
11959 if nic_op != constants.DDM_ADD:
11961 if not instance.nics:
11962 raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11963 " no NICs" % nic_op,
11964 errors.ECODE_INVAL)
11965 if nic_op < 0 or nic_op >= len(instance.nics):
11966 raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11968 (nic_op, len(instance.nics) - 1),
11969 errors.ECODE_INVAL)
11970 old_nic_params = instance.nics[nic_op].nicparams
11971 old_nic_ip = instance.nics[nic_op].ip
11973 old_nic_params = {}
11976 update_params_dict = dict([(key, nic_dict[key])
11977 for key in constants.NICS_PARAMETERS
11978 if key in nic_dict])
11980 if "bridge" in nic_dict:
11981 update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11983 new_nic_params = _GetUpdatedParams(old_nic_params,
11984 update_params_dict)
11985 utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11986 new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11987 objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11988 self.nic_pinst[nic_op] = new_nic_params
11989 self.nic_pnew[nic_op] = new_filled_nic_params
11990 new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11992 if new_nic_mode == constants.NIC_MODE_BRIDGED:
11993 nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11994 msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11996 msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11998 self.warn.append(msg)
12000 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12001 if new_nic_mode == constants.NIC_MODE_ROUTED:
12002 if constants.INIC_IP in nic_dict:
12003 nic_ip = nic_dict[constants.INIC_IP]
12005 nic_ip = old_nic_ip
12007 raise errors.OpPrereqError("Cannot set the nic ip to None"
12008 " on a routed nic", errors.ECODE_INVAL)
12009 if constants.INIC_MAC in nic_dict:
12010 nic_mac = nic_dict[constants.INIC_MAC]
12011 if nic_mac is None:
12012 raise errors.OpPrereqError("Cannot set the nic mac to None",
12013 errors.ECODE_INVAL)
12014 elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12015 # otherwise generate the mac
12016 nic_dict[constants.INIC_MAC] = \
12017 self.cfg.GenerateMAC(self.proc.GetECId())
12019 # or validate/reserve the current one
12021 self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
12022 except errors.ReservationError:
12023 raise errors.OpPrereqError("MAC address %s already in use"
12024 " in cluster" % nic_mac,
12025 errors.ECODE_NOTUNIQUE)
12028 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
12029 raise errors.OpPrereqError("Disk operations not supported for"
12030 " diskless instances",
12031 errors.ECODE_INVAL)
12032 for disk_op, _ in self.op.disks:
12033 if disk_op == constants.DDM_REMOVE:
12034 if len(instance.disks) == 1:
12035 raise errors.OpPrereqError("Cannot remove the last disk of"
12036 " an instance", errors.ECODE_INVAL)
12037 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12038 msg="cannot remove disks")
12040 if (disk_op == constants.DDM_ADD and
12041 len(instance.disks) >= constants.MAX_DISKS):
12042 raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
12043 " add more" % constants.MAX_DISKS,
12044 errors.ECODE_STATE)
12045 if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
12047 if disk_op < 0 or disk_op >= len(instance.disks):
12048 raise errors.OpPrereqError("Invalid disk index %s, valid values"
12050 (disk_op, len(instance.disks)),
12051 errors.ECODE_INVAL)
12053 # disabling the instance
12054 if self.op.offline_inst:
12055 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12056 msg="cannot change instance state to offline")
12058 # enabling the instance
12059 if self.op.online_inst:
12060 _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
12061 msg="cannot make instance go online")
12063 def _ConvertPlainToDrbd(self, feedback_fn):
12064 """Converts an instance from plain to drbd.
12067 feedback_fn("Converting template to drbd")
12068 instance = self.instance
12069 pnode = instance.primary_node
12070 snode = self.op.remote_node
12072 assert instance.disk_template == constants.DT_PLAIN
12074 # create a fake disk info for _GenerateDiskTemplate
12075 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
12076 constants.IDISK_VG: d.logical_id[0]}
12077 for d in instance.disks]
12078 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
12079 instance.name, pnode, [snode],
12080 disk_info, None, None, 0, feedback_fn,
12082 info = _GetInstanceInfoText(instance)
12083 feedback_fn("Creating aditional volumes...")
12084 # first, create the missing data and meta devices
12085 for disk in new_disks:
12086 # unfortunately this is... not too nice
12087 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
12089 for child in disk.children:
12090 _CreateSingleBlockDev(self, snode, instance, child, info, True)
12091 # at this stage, all new LVs have been created, we can rename the
12093 feedback_fn("Renaming original volumes...")
12094 rename_list = [(o, n.children[0].logical_id)
12095 for (o, n) in zip(instance.disks, new_disks)]
12096 result = self.rpc.call_blockdev_rename(pnode, rename_list)
12097 result.Raise("Failed to rename original LVs")
12099 feedback_fn("Initializing DRBD devices...")
12100 # all child devices are in place, we can now create the DRBD devices
12101 for disk in new_disks:
12102 for node in [pnode, snode]:
12103 f_create = node == pnode
12104 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
12106 # at this point, the instance has been modified
12107 instance.disk_template = constants.DT_DRBD8
12108 instance.disks = new_disks
12109 self.cfg.Update(instance, feedback_fn)
12111 # Release node locks while waiting for sync
12112 _ReleaseLocks(self, locking.LEVEL_NODE)
12114 # disks are created, waiting for sync
12115 disk_abort = not _WaitForSync(self, instance,
12116 oneshot=not self.op.wait_for_sync)
12118 raise errors.OpExecError("There are some degraded disks for"
12119 " this instance, please cleanup manually")
12121 # Node resource locks will be released by caller
12123 def _ConvertDrbdToPlain(self, feedback_fn):
12124 """Converts an instance from drbd to plain.
12127 instance = self.instance
12129 assert len(instance.secondary_nodes) == 1
12130 assert instance.disk_template == constants.DT_DRBD8
12132 pnode = instance.primary_node
12133 snode = instance.secondary_nodes[0]
12134 feedback_fn("Converting template to plain")
12136 old_disks = instance.disks
12137 new_disks = [d.children[0] for d in old_disks]
12139 # copy over size and mode
12140 for parent, child in zip(old_disks, new_disks):
12141 child.size = parent.size
12142 child.mode = parent.mode
12144 # update instance structure
12145 instance.disks = new_disks
12146 instance.disk_template = constants.DT_PLAIN
12147 self.cfg.Update(instance, feedback_fn)
12149 # Release locks in case removing disks takes a while
12150 _ReleaseLocks(self, locking.LEVEL_NODE)
12152 feedback_fn("Removing volumes on the secondary node...")
12153 for disk in old_disks:
12154 self.cfg.SetDiskID(disk, snode)
12155 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
12157 self.LogWarning("Could not remove block device %s on node %s,"
12158 " continuing anyway: %s", disk.iv_name, snode, msg)
12160 feedback_fn("Removing unneeded volumes on the primary node...")
12161 for idx, disk in enumerate(old_disks):
12162 meta = disk.children[1]
12163 self.cfg.SetDiskID(meta, pnode)
12164 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
12166 self.LogWarning("Could not remove metadata for disk %d on node %s,"
12167 " continuing anyway: %s", idx, pnode, msg)
12169 # this is a DRBD disk, return its port to the pool
12170 for disk in old_disks:
12171 tcp_port = disk.logical_id[2]
12172 self.cfg.AddTcpUdpPort(tcp_port)
12174 # Node resource locks will be released by caller
12176 def Exec(self, feedback_fn):
12177 """Modifies an instance.
12179 All parameters take effect only at the next restart of the instance.
12182 # Process here the warnings from CheckPrereq, as we don't have a
12183 # feedback_fn there.
12184 for warn in self.warn:
12185 feedback_fn("WARNING: %s" % warn)
12187 assert ((self.op.disk_template is None) ^
12188 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
12189 "Not owning any node resource locks"
12192 instance = self.instance
12194 for disk_op, disk_dict in self.op.disks:
12195 if disk_op == constants.DDM_REMOVE:
12196 # remove the last disk
12197 device = instance.disks.pop()
12198 device_idx = len(instance.disks)
12199 for node, disk in device.ComputeNodeTree(instance.primary_node):
12200 self.cfg.SetDiskID(disk, node)
12201 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
12203 self.LogWarning("Could not remove disk/%d on node %s: %s,"
12204 " continuing anyway", device_idx, node, msg)
12205 result.append(("disk/%d" % device_idx, "remove"))
12207 # if this is a DRBD disk, return its port to the pool
12208 if device.dev_type in constants.LDS_DRBD:
12209 tcp_port = device.logical_id[2]
12210 self.cfg.AddTcpUdpPort(tcp_port)
12211 elif disk_op == constants.DDM_ADD:
12213 if instance.disk_template in (constants.DT_FILE,
12214 constants.DT_SHARED_FILE):
12215 file_driver, file_path = instance.disks[0].logical_id
12216 file_path = os.path.dirname(file_path)
12218 file_driver = file_path = None
12219 disk_idx_base = len(instance.disks)
12220 new_disk = _GenerateDiskTemplate(self,
12221 instance.disk_template,
12222 instance.name, instance.primary_node,
12223 instance.secondary_nodes,
12229 self.diskparams)[0]
12230 instance.disks.append(new_disk)
12231 info = _GetInstanceInfoText(instance)
12233 logging.info("Creating volume %s for instance %s",
12234 new_disk.iv_name, instance.name)
12235 # Note: this needs to be kept in sync with _CreateDisks
12237 for node in instance.all_nodes:
12238 f_create = node == instance.primary_node
12240 _CreateBlockDev(self, node, instance, new_disk,
12241 f_create, info, f_create)
12242 except errors.OpExecError, err:
12243 self.LogWarning("Failed to create volume %s (%s) on"
12245 new_disk.iv_name, new_disk, node, err)
12246 result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
12247 (new_disk.size, new_disk.mode)))
12249 # change a given disk
12250 instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
12251 result.append(("disk.mode/%d" % disk_op,
12252 disk_dict[constants.IDISK_MODE]))
12254 if self.op.disk_template:
12256 check_nodes = set(instance.all_nodes)
12257 if self.op.remote_node:
12258 check_nodes.add(self.op.remote_node)
12259 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
12260 owned = self.owned_locks(level)
12261 assert not (check_nodes - owned), \
12262 ("Not owning the correct locks, owning %r, expected at least %r" %
12263 (owned, check_nodes))
12265 r_shut = _ShutdownInstanceDisks(self, instance)
12267 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
12268 " proceed with disk template conversion")
12269 mode = (instance.disk_template, self.op.disk_template)
12271 self._DISK_CONVERSIONS[mode](self, feedback_fn)
12273 self.cfg.ReleaseDRBDMinors(instance.name)
12275 result.append(("disk_template", self.op.disk_template))
12277 assert instance.disk_template == self.op.disk_template, \
12278 ("Expected disk template '%s', found '%s'" %
12279 (self.op.disk_template, instance.disk_template))
12281 # Release node and resource locks if there are any (they might already have
12282 # been released during disk conversion)
12283 _ReleaseLocks(self, locking.LEVEL_NODE)
12284 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
12287 for nic_op, nic_dict in self.op.nics:
12288 if nic_op == constants.DDM_REMOVE:
12289 # remove the last nic
12290 del instance.nics[-1]
12291 result.append(("nic.%d" % len(instance.nics), "remove"))
12292 elif nic_op == constants.DDM_ADD:
12293 # mac and bridge should be set, by now
12294 mac = nic_dict[constants.INIC_MAC]
12295 ip = nic_dict.get(constants.INIC_IP, None)
12296 nicparams = self.nic_pinst[constants.DDM_ADD]
12297 new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
12298 instance.nics.append(new_nic)
12299 result.append(("nic.%d" % (len(instance.nics) - 1),
12300 "add:mac=%s,ip=%s,mode=%s,link=%s" %
12301 (new_nic.mac, new_nic.ip,
12302 self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
12303 self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
12306 for key in (constants.INIC_MAC, constants.INIC_IP):
12307 if key in nic_dict:
12308 setattr(instance.nics[nic_op], key, nic_dict[key])
12309 if nic_op in self.nic_pinst:
12310 instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
12311 for key, val in nic_dict.iteritems():
12312 result.append(("nic.%s/%d" % (key, nic_op), val))
12315 if self.op.hvparams:
12316 instance.hvparams = self.hv_inst
12317 for key, val in self.op.hvparams.iteritems():
12318 result.append(("hv/%s" % key, val))
12321 if self.op.beparams:
12322 instance.beparams = self.be_inst
12323 for key, val in self.op.beparams.iteritems():
12324 result.append(("be/%s" % key, val))
12327 if self.op.os_name:
12328 instance.os = self.op.os_name
12331 if self.op.osparams:
12332 instance.osparams = self.os_inst
12333 for key, val in self.op.osparams.iteritems():
12334 result.append(("os/%s" % key, val))
12336 # online/offline instance
12337 if self.op.online_inst:
12338 self.cfg.MarkInstanceDown(instance.name)
12339 result.append(("admin_state", constants.ADMINST_DOWN))
12340 if self.op.offline_inst:
12341 self.cfg.MarkInstanceOffline(instance.name)
12342 result.append(("admin_state", constants.ADMINST_OFFLINE))
12344 self.cfg.Update(instance, feedback_fn)
12346 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
12347 self.owned_locks(locking.LEVEL_NODE)), \
12348 "All node locks should have been released by now"
12352 _DISK_CONVERSIONS = {
12353 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12354 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12358 class LUInstanceChangeGroup(LogicalUnit):
12359 HPATH = "instance-change-group"
12360 HTYPE = constants.HTYPE_INSTANCE
12363 def ExpandNames(self):
12364 self.share_locks = _ShareAll()
12365 self.needed_locks = {
12366 locking.LEVEL_NODEGROUP: [],
12367 locking.LEVEL_NODE: [],
12370 self._ExpandAndLockInstance()
12372 if self.op.target_groups:
12373 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12374 self.op.target_groups)
12376 self.req_target_uuids = None
12378 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12380 def DeclareLocks(self, level):
12381 if level == locking.LEVEL_NODEGROUP:
12382 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12384 if self.req_target_uuids:
12385 lock_groups = set(self.req_target_uuids)
12387 # Lock all groups used by instance optimistically; this requires going
12388 # via the node before it's locked, requiring verification later on
12389 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12390 lock_groups.update(instance_groups)
12392 # No target groups, need to lock all of them
12393 lock_groups = locking.ALL_SET
12395 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12397 elif level == locking.LEVEL_NODE:
12398 if self.req_target_uuids:
12399 # Lock all nodes used by instances
12400 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12401 self._LockInstancesNodes()
12403 # Lock all nodes in all potential target groups
12404 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12405 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12406 member_nodes = [node_name
12407 for group in lock_groups
12408 for node_name in self.cfg.GetNodeGroup(group).members]
12409 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12411 # Lock all nodes as all groups are potential targets
12412 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12414 def CheckPrereq(self):
12415 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12416 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12417 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12419 assert (self.req_target_uuids is None or
12420 owned_groups.issuperset(self.req_target_uuids))
12421 assert owned_instances == set([self.op.instance_name])
12423 # Get instance information
12424 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12426 # Check if node groups for locked instance are still correct
12427 assert owned_nodes.issuperset(self.instance.all_nodes), \
12428 ("Instance %s's nodes changed while we kept the lock" %
12429 self.op.instance_name)
12431 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12434 if self.req_target_uuids:
12435 # User requested specific target groups
12436 self.target_uuids = self.req_target_uuids
12438 # All groups except those used by the instance are potential targets
12439 self.target_uuids = owned_groups - inst_groups
12441 conflicting_groups = self.target_uuids & inst_groups
12442 if conflicting_groups:
12443 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12444 " used by the instance '%s'" %
12445 (utils.CommaJoin(conflicting_groups),
12446 self.op.instance_name),
12447 errors.ECODE_INVAL)
12449 if not self.target_uuids:
12450 raise errors.OpPrereqError("There are no possible target groups",
12451 errors.ECODE_INVAL)
12453 def BuildHooksEnv(self):
12454 """Build hooks env.
12457 assert self.target_uuids
12460 "TARGET_GROUPS": " ".join(self.target_uuids),
12463 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12467 def BuildHooksNodes(self):
12468 """Build hooks nodes.
12471 mn = self.cfg.GetMasterNode()
12472 return ([mn], [mn])
12474 def Exec(self, feedback_fn):
12475 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12477 assert instances == [self.op.instance_name], "Instance not locked"
12479 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12480 instances=instances, target_groups=list(self.target_uuids))
12482 ial.Run(self.op.iallocator)
12484 if not ial.success:
12485 raise errors.OpPrereqError("Can't compute solution for changing group of"
12486 " instance '%s' using iallocator '%s': %s" %
12487 (self.op.instance_name, self.op.iallocator,
12489 errors.ECODE_NORES)
12491 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12493 self.LogInfo("Iallocator returned %s job(s) for changing group of"
12494 " instance '%s'", len(jobs), self.op.instance_name)
12496 return ResultWithJobs(jobs)
12499 class LUBackupQuery(NoHooksLU):
12500 """Query the exports list
12505 def ExpandNames(self):
12506 self.needed_locks = {}
12507 self.share_locks[locking.LEVEL_NODE] = 1
12508 if not self.op.nodes:
12509 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12511 self.needed_locks[locking.LEVEL_NODE] = \
12512 _GetWantedNodes(self, self.op.nodes)
12514 def Exec(self, feedback_fn):
12515 """Compute the list of all the exported system images.
12518 @return: a dictionary with the structure node->(export-list)
12519 where export-list is a list of the instances exported on
12523 self.nodes = self.owned_locks(locking.LEVEL_NODE)
12524 rpcresult = self.rpc.call_export_list(self.nodes)
12526 for node in rpcresult:
12527 if rpcresult[node].fail_msg:
12528 result[node] = False
12530 result[node] = rpcresult[node].payload
12535 class LUBackupPrepare(NoHooksLU):
12536 """Prepares an instance for an export and returns useful information.
12541 def ExpandNames(self):
12542 self._ExpandAndLockInstance()
12544 def CheckPrereq(self):
12545 """Check prerequisites.
12548 instance_name = self.op.instance_name
12550 self.instance = self.cfg.GetInstanceInfo(instance_name)
12551 assert self.instance is not None, \
12552 "Cannot retrieve locked instance %s" % self.op.instance_name
12553 _CheckNodeOnline(self, self.instance.primary_node)
12555 self._cds = _GetClusterDomainSecret()
12557 def Exec(self, feedback_fn):
12558 """Prepares an instance for an export.
12561 instance = self.instance
12563 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12564 salt = utils.GenerateSecret(8)
12566 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12567 result = self.rpc.call_x509_cert_create(instance.primary_node,
12568 constants.RIE_CERT_VALIDITY)
12569 result.Raise("Can't create X509 key and certificate on %s" % result.node)
12571 (name, cert_pem) = result.payload
12573 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12577 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12578 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12580 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12586 class LUBackupExport(LogicalUnit):
12587 """Export an instance to an image in the cluster.
12590 HPATH = "instance-export"
12591 HTYPE = constants.HTYPE_INSTANCE
12594 def CheckArguments(self):
12595 """Check the arguments.
12598 self.x509_key_name = self.op.x509_key_name
12599 self.dest_x509_ca_pem = self.op.destination_x509_ca
12601 if self.op.mode == constants.EXPORT_MODE_REMOTE:
12602 if not self.x509_key_name:
12603 raise errors.OpPrereqError("Missing X509 key name for encryption",
12604 errors.ECODE_INVAL)
12606 if not self.dest_x509_ca_pem:
12607 raise errors.OpPrereqError("Missing destination X509 CA",
12608 errors.ECODE_INVAL)
12610 def ExpandNames(self):
12611 self._ExpandAndLockInstance()
12613 # Lock all nodes for local exports
12614 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12615 # FIXME: lock only instance primary and destination node
12617 # Sad but true, for now we have do lock all nodes, as we don't know where
12618 # the previous export might be, and in this LU we search for it and
12619 # remove it from its current node. In the future we could fix this by:
12620 # - making a tasklet to search (share-lock all), then create the
12621 # new one, then one to remove, after
12622 # - removing the removal operation altogether
12623 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12625 def DeclareLocks(self, level):
12626 """Last minute lock declaration."""
12627 # All nodes are locked anyway, so nothing to do here.
12629 def BuildHooksEnv(self):
12630 """Build hooks env.
12632 This will run on the master, primary node and target node.
12636 "EXPORT_MODE": self.op.mode,
12637 "EXPORT_NODE": self.op.target_node,
12638 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12639 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12640 # TODO: Generic function for boolean env variables
12641 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12644 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12648 def BuildHooksNodes(self):
12649 """Build hooks nodes.
12652 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12654 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12655 nl.append(self.op.target_node)
12659 def CheckPrereq(self):
12660 """Check prerequisites.
12662 This checks that the instance and node names are valid.
12665 instance_name = self.op.instance_name
12667 self.instance = self.cfg.GetInstanceInfo(instance_name)
12668 assert self.instance is not None, \
12669 "Cannot retrieve locked instance %s" % self.op.instance_name
12670 _CheckNodeOnline(self, self.instance.primary_node)
12672 if (self.op.remove_instance and
12673 self.instance.admin_state == constants.ADMINST_UP and
12674 not self.op.shutdown):
12675 raise errors.OpPrereqError("Can not remove instance without shutting it"
12678 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12679 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12680 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12681 assert self.dst_node is not None
12683 _CheckNodeOnline(self, self.dst_node.name)
12684 _CheckNodeNotDrained(self, self.dst_node.name)
12687 self.dest_disk_info = None
12688 self.dest_x509_ca = None
12690 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12691 self.dst_node = None
12693 if len(self.op.target_node) != len(self.instance.disks):
12694 raise errors.OpPrereqError(("Received destination information for %s"
12695 " disks, but instance %s has %s disks") %
12696 (len(self.op.target_node), instance_name,
12697 len(self.instance.disks)),
12698 errors.ECODE_INVAL)
12700 cds = _GetClusterDomainSecret()
12702 # Check X509 key name
12704 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12705 except (TypeError, ValueError), err:
12706 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12708 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12709 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12710 errors.ECODE_INVAL)
12712 # Load and verify CA
12714 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12715 except OpenSSL.crypto.Error, err:
12716 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12717 (err, ), errors.ECODE_INVAL)
12719 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12720 if errcode is not None:
12721 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12722 (msg, ), errors.ECODE_INVAL)
12724 self.dest_x509_ca = cert
12726 # Verify target information
12728 for idx, disk_data in enumerate(self.op.target_node):
12730 (host, port, magic) = \
12731 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12732 except errors.GenericError, err:
12733 raise errors.OpPrereqError("Target info for disk %s: %s" %
12734 (idx, err), errors.ECODE_INVAL)
12736 disk_info.append((host, port, magic))
12738 assert len(disk_info) == len(self.op.target_node)
12739 self.dest_disk_info = disk_info
12742 raise errors.ProgrammerError("Unhandled export mode %r" %
12745 # instance disk type verification
12746 # TODO: Implement export support for file-based disks
12747 for disk in self.instance.disks:
12748 if disk.dev_type == constants.LD_FILE:
12749 raise errors.OpPrereqError("Export not supported for instances with"
12750 " file-based disks", errors.ECODE_INVAL)
12752 def _CleanupExports(self, feedback_fn):
12753 """Removes exports of current instance from all other nodes.
12755 If an instance in a cluster with nodes A..D was exported to node C, its
12756 exports will be removed from the nodes A, B and D.
12759 assert self.op.mode != constants.EXPORT_MODE_REMOTE
12761 nodelist = self.cfg.GetNodeList()
12762 nodelist.remove(self.dst_node.name)
12764 # on one-node clusters nodelist will be empty after the removal
12765 # if we proceed the backup would be removed because OpBackupQuery
12766 # substitutes an empty list with the full cluster node list.
12767 iname = self.instance.name
12769 feedback_fn("Removing old exports for instance %s" % iname)
12770 exportlist = self.rpc.call_export_list(nodelist)
12771 for node in exportlist:
12772 if exportlist[node].fail_msg:
12774 if iname in exportlist[node].payload:
12775 msg = self.rpc.call_export_remove(node, iname).fail_msg
12777 self.LogWarning("Could not remove older export for instance %s"
12778 " on node %s: %s", iname, node, msg)
12780 def Exec(self, feedback_fn):
12781 """Export an instance to an image in the cluster.
12784 assert self.op.mode in constants.EXPORT_MODES
12786 instance = self.instance
12787 src_node = instance.primary_node
12789 if self.op.shutdown:
12790 # shutdown the instance, but not the disks
12791 feedback_fn("Shutting down instance %s" % instance.name)
12792 result = self.rpc.call_instance_shutdown(src_node, instance,
12793 self.op.shutdown_timeout)
12794 # TODO: Maybe ignore failures if ignore_remove_failures is set
12795 result.Raise("Could not shutdown instance %s on"
12796 " node %s" % (instance.name, src_node))
12798 # set the disks ID correctly since call_instance_start needs the
12799 # correct drbd minor to create the symlinks
12800 for disk in instance.disks:
12801 self.cfg.SetDiskID(disk, src_node)
12803 activate_disks = (instance.admin_state != constants.ADMINST_UP)
12806 # Activate the instance disks if we'exporting a stopped instance
12807 feedback_fn("Activating disks for %s" % instance.name)
12808 _StartInstanceDisks(self, instance, None)
12811 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12814 helper.CreateSnapshots()
12816 if (self.op.shutdown and
12817 instance.admin_state == constants.ADMINST_UP and
12818 not self.op.remove_instance):
12819 assert not activate_disks
12820 feedback_fn("Starting instance %s" % instance.name)
12821 result = self.rpc.call_instance_start(src_node,
12822 (instance, None, None), False)
12823 msg = result.fail_msg
12825 feedback_fn("Failed to start instance: %s" % msg)
12826 _ShutdownInstanceDisks(self, instance)
12827 raise errors.OpExecError("Could not start instance: %s" % msg)
12829 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12830 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12831 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12832 connect_timeout = constants.RIE_CONNECT_TIMEOUT
12833 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12835 (key_name, _, _) = self.x509_key_name
12838 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12841 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12842 key_name, dest_ca_pem,
12847 # Check for backwards compatibility
12848 assert len(dresults) == len(instance.disks)
12849 assert compat.all(isinstance(i, bool) for i in dresults), \
12850 "Not all results are boolean: %r" % dresults
12854 feedback_fn("Deactivating disks for %s" % instance.name)
12855 _ShutdownInstanceDisks(self, instance)
12857 if not (compat.all(dresults) and fin_resu):
12860 failures.append("export finalization")
12861 if not compat.all(dresults):
12862 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12864 failures.append("disk export: disk(s) %s" % fdsk)
12866 raise errors.OpExecError("Export failed, errors in %s" %
12867 utils.CommaJoin(failures))
12869 # At this point, the export was successful, we can cleanup/finish
12871 # Remove instance if requested
12872 if self.op.remove_instance:
12873 feedback_fn("Removing instance %s" % instance.name)
12874 _RemoveInstance(self, feedback_fn, instance,
12875 self.op.ignore_remove_failures)
12877 if self.op.mode == constants.EXPORT_MODE_LOCAL:
12878 self._CleanupExports(feedback_fn)
12880 return fin_resu, dresults
12883 class LUBackupRemove(NoHooksLU):
12884 """Remove exports related to the named instance.
12889 def ExpandNames(self):
12890 self.needed_locks = {}
12891 # We need all nodes to be locked in order for RemoveExport to work, but we
12892 # don't need to lock the instance itself, as nothing will happen to it (and
12893 # we can remove exports also for a removed instance)
12894 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12896 def Exec(self, feedback_fn):
12897 """Remove any export.
12900 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12901 # If the instance was not found we'll try with the name that was passed in.
12902 # This will only work if it was an FQDN, though.
12904 if not instance_name:
12906 instance_name = self.op.instance_name
12908 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12909 exportlist = self.rpc.call_export_list(locked_nodes)
12911 for node in exportlist:
12912 msg = exportlist[node].fail_msg
12914 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12916 if instance_name in exportlist[node].payload:
12918 result = self.rpc.call_export_remove(node, instance_name)
12919 msg = result.fail_msg
12921 logging.error("Could not remove export for instance %s"
12922 " on node %s: %s", instance_name, node, msg)
12924 if fqdn_warn and not found:
12925 feedback_fn("Export not found. If trying to remove an export belonging"
12926 " to a deleted instance please use its Fully Qualified"
12930 class LUGroupAdd(LogicalUnit):
12931 """Logical unit for creating node groups.
12934 HPATH = "group-add"
12935 HTYPE = constants.HTYPE_GROUP
12938 def ExpandNames(self):
12939 # We need the new group's UUID here so that we can create and acquire the
12940 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12941 # that it should not check whether the UUID exists in the configuration.
12942 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12943 self.needed_locks = {}
12944 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12946 def CheckPrereq(self):
12947 """Check prerequisites.
12949 This checks that the given group name is not an existing node group
12954 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12955 except errors.OpPrereqError:
12958 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12959 " node group (UUID: %s)" %
12960 (self.op.group_name, existing_uuid),
12961 errors.ECODE_EXISTS)
12963 if self.op.ndparams:
12964 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12966 if self.op.hv_state:
12967 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
12969 self.new_hv_state = None
12971 if self.op.disk_state:
12972 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
12974 self.new_disk_state = None
12976 if self.op.diskparams:
12977 for templ in constants.DISK_TEMPLATES:
12978 if templ not in self.op.diskparams:
12979 self.op.diskparams[templ] = {}
12980 utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12982 self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12984 if self.op.ipolicy:
12985 cluster = self.cfg.GetClusterInfo()
12986 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
12987 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy)
12989 def BuildHooksEnv(self):
12990 """Build hooks env.
12994 "GROUP_NAME": self.op.group_name,
12997 def BuildHooksNodes(self):
12998 """Build hooks nodes.
13001 mn = self.cfg.GetMasterNode()
13002 return ([mn], [mn])
13004 def Exec(self, feedback_fn):
13005 """Add the node group to the cluster.
13008 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
13009 uuid=self.group_uuid,
13010 alloc_policy=self.op.alloc_policy,
13011 ndparams=self.op.ndparams,
13012 diskparams=self.op.diskparams,
13013 ipolicy=self.op.ipolicy,
13014 hv_state_static=self.new_hv_state,
13015 disk_state_static=self.new_disk_state)
13017 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
13018 del self.remove_locks[locking.LEVEL_NODEGROUP]
13021 class LUGroupAssignNodes(NoHooksLU):
13022 """Logical unit for assigning nodes to groups.
13027 def ExpandNames(self):
13028 # These raise errors.OpPrereqError on their own:
13029 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13030 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
13032 # We want to lock all the affected nodes and groups. We have readily
13033 # available the list of nodes, and the *destination* group. To gather the
13034 # list of "source" groups, we need to fetch node information later on.
13035 self.needed_locks = {
13036 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
13037 locking.LEVEL_NODE: self.op.nodes,
13040 def DeclareLocks(self, level):
13041 if level == locking.LEVEL_NODEGROUP:
13042 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
13044 # Try to get all affected nodes' groups without having the group or node
13045 # lock yet. Needs verification later in the code flow.
13046 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
13048 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
13050 def CheckPrereq(self):
13051 """Check prerequisites.
13054 assert self.needed_locks[locking.LEVEL_NODEGROUP]
13055 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
13056 frozenset(self.op.nodes))
13058 expected_locks = (set([self.group_uuid]) |
13059 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
13060 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
13061 if actual_locks != expected_locks:
13062 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
13063 " current groups are '%s', used to be '%s'" %
13064 (utils.CommaJoin(expected_locks),
13065 utils.CommaJoin(actual_locks)))
13067 self.node_data = self.cfg.GetAllNodesInfo()
13068 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13069 instance_data = self.cfg.GetAllInstancesInfo()
13071 if self.group is None:
13072 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13073 (self.op.group_name, self.group_uuid))
13075 (new_splits, previous_splits) = \
13076 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
13077 for node in self.op.nodes],
13078 self.node_data, instance_data)
13081 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
13083 if not self.op.force:
13084 raise errors.OpExecError("The following instances get split by this"
13085 " change and --force was not given: %s" %
13088 self.LogWarning("This operation will split the following instances: %s",
13091 if previous_splits:
13092 self.LogWarning("In addition, these already-split instances continue"
13093 " to be split across groups: %s",
13094 utils.CommaJoin(utils.NiceSort(previous_splits)))
13096 def Exec(self, feedback_fn):
13097 """Assign nodes to a new group.
13100 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
13102 self.cfg.AssignGroupNodes(mods)
13105 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
13106 """Check for split instances after a node assignment.
13108 This method considers a series of node assignments as an atomic operation,
13109 and returns information about split instances after applying the set of
13112 In particular, it returns information about newly split instances, and
13113 instances that were already split, and remain so after the change.
13115 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
13118 @type changes: list of (node_name, new_group_uuid) pairs.
13119 @param changes: list of node assignments to consider.
13120 @param node_data: a dict with data for all nodes
13121 @param instance_data: a dict with all instances to consider
13122 @rtype: a two-tuple
13123 @return: a list of instances that were previously okay and result split as a
13124 consequence of this change, and a list of instances that were previously
13125 split and this change does not fix.
13128 changed_nodes = dict((node, group) for node, group in changes
13129 if node_data[node].group != group)
13131 all_split_instances = set()
13132 previously_split_instances = set()
13134 def InstanceNodes(instance):
13135 return [instance.primary_node] + list(instance.secondary_nodes)
13137 for inst in instance_data.values():
13138 if inst.disk_template not in constants.DTS_INT_MIRROR:
13141 instance_nodes = InstanceNodes(inst)
13143 if len(set(node_data[node].group for node in instance_nodes)) > 1:
13144 previously_split_instances.add(inst.name)
13146 if len(set(changed_nodes.get(node, node_data[node].group)
13147 for node in instance_nodes)) > 1:
13148 all_split_instances.add(inst.name)
13150 return (list(all_split_instances - previously_split_instances),
13151 list(previously_split_instances & all_split_instances))
13154 class _GroupQuery(_QueryBase):
13155 FIELDS = query.GROUP_FIELDS
13157 def ExpandNames(self, lu):
13158 lu.needed_locks = {}
13160 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
13161 self._cluster = lu.cfg.GetClusterInfo()
13162 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
13165 self.wanted = [name_to_uuid[name]
13166 for name in utils.NiceSort(name_to_uuid.keys())]
13168 # Accept names to be either names or UUIDs.
13171 all_uuid = frozenset(self._all_groups.keys())
13173 for name in self.names:
13174 if name in all_uuid:
13175 self.wanted.append(name)
13176 elif name in name_to_uuid:
13177 self.wanted.append(name_to_uuid[name])
13179 missing.append(name)
13182 raise errors.OpPrereqError("Some groups do not exist: %s" %
13183 utils.CommaJoin(missing),
13184 errors.ECODE_NOENT)
13186 def DeclareLocks(self, lu, level):
13189 def _GetQueryData(self, lu):
13190 """Computes the list of node groups and their attributes.
13193 do_nodes = query.GQ_NODE in self.requested_data
13194 do_instances = query.GQ_INST in self.requested_data
13196 group_to_nodes = None
13197 group_to_instances = None
13199 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
13200 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
13201 # latter GetAllInstancesInfo() is not enough, for we have to go through
13202 # instance->node. Hence, we will need to process nodes even if we only need
13203 # instance information.
13204 if do_nodes or do_instances:
13205 all_nodes = lu.cfg.GetAllNodesInfo()
13206 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
13209 for node in all_nodes.values():
13210 if node.group in group_to_nodes:
13211 group_to_nodes[node.group].append(node.name)
13212 node_to_group[node.name] = node.group
13215 all_instances = lu.cfg.GetAllInstancesInfo()
13216 group_to_instances = dict((uuid, []) for uuid in self.wanted)
13218 for instance in all_instances.values():
13219 node = instance.primary_node
13220 if node in node_to_group:
13221 group_to_instances[node_to_group[node]].append(instance.name)
13224 # Do not pass on node information if it was not requested.
13225 group_to_nodes = None
13227 return query.GroupQueryData(self._cluster,
13228 [self._all_groups[uuid]
13229 for uuid in self.wanted],
13230 group_to_nodes, group_to_instances)
13233 class LUGroupQuery(NoHooksLU):
13234 """Logical unit for querying node groups.
13239 def CheckArguments(self):
13240 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
13241 self.op.output_fields, False)
13243 def ExpandNames(self):
13244 self.gq.ExpandNames(self)
13246 def DeclareLocks(self, level):
13247 self.gq.DeclareLocks(self, level)
13249 def Exec(self, feedback_fn):
13250 return self.gq.OldStyleQuery(self)
13253 class LUGroupSetParams(LogicalUnit):
13254 """Modifies the parameters of a node group.
13257 HPATH = "group-modify"
13258 HTYPE = constants.HTYPE_GROUP
13261 def CheckArguments(self):
13264 self.op.diskparams,
13265 self.op.alloc_policy,
13267 self.op.disk_state,
13271 if all_changes.count(None) == len(all_changes):
13272 raise errors.OpPrereqError("Please pass at least one modification",
13273 errors.ECODE_INVAL)
13275 def ExpandNames(self):
13276 # This raises errors.OpPrereqError on its own:
13277 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13279 self.needed_locks = {
13280 locking.LEVEL_NODEGROUP: [self.group_uuid],
13283 def CheckPrereq(self):
13284 """Check prerequisites.
13287 self.group = self.cfg.GetNodeGroup(self.group_uuid)
13289 if self.group is None:
13290 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13291 (self.op.group_name, self.group_uuid))
13293 if self.op.ndparams:
13294 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
13295 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
13296 self.new_ndparams = new_ndparams
13298 if self.op.diskparams:
13299 self.new_diskparams = dict()
13300 for templ in constants.DISK_TEMPLATES:
13301 if templ not in self.op.diskparams:
13302 self.op.diskparams[templ] = {}
13303 new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
13304 self.op.diskparams[templ])
13305 utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
13306 self.new_diskparams[templ] = new_templ_params
13308 if self.op.hv_state:
13309 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
13310 self.group.hv_state_static)
13312 if self.op.disk_state:
13313 self.new_disk_state = \
13314 _MergeAndVerifyDiskState(self.op.disk_state,
13315 self.group.disk_state_static)
13317 if self.op.ipolicy:
13319 for key, value in self.op.ipolicy.iteritems():
13320 g_ipolicy[key] = _GetUpdatedParams(self.group.ipolicy.get(key, {}),
13323 utils.ForceDictType(g_ipolicy[key], constants.ISPECS_PARAMETER_TYPES)
13324 self.new_ipolicy = g_ipolicy
13325 objects.InstancePolicy.CheckParameterSyntax(self.new_ipolicy)
13327 def BuildHooksEnv(self):
13328 """Build hooks env.
13332 "GROUP_NAME": self.op.group_name,
13333 "NEW_ALLOC_POLICY": self.op.alloc_policy,
13336 def BuildHooksNodes(self):
13337 """Build hooks nodes.
13340 mn = self.cfg.GetMasterNode()
13341 return ([mn], [mn])
13343 def Exec(self, feedback_fn):
13344 """Modifies the node group.
13349 if self.op.ndparams:
13350 self.group.ndparams = self.new_ndparams
13351 result.append(("ndparams", str(self.group.ndparams)))
13353 if self.op.diskparams:
13354 self.group.diskparams = self.new_diskparams
13355 result.append(("diskparams", str(self.group.diskparams)))
13357 if self.op.alloc_policy:
13358 self.group.alloc_policy = self.op.alloc_policy
13360 if self.op.hv_state:
13361 self.group.hv_state_static = self.new_hv_state
13363 if self.op.disk_state:
13364 self.group.disk_state_static = self.new_disk_state
13366 if self.op.ipolicy:
13367 self.group.ipolicy = self.new_ipolicy
13369 self.cfg.Update(self.group, feedback_fn)
13373 class LUGroupRemove(LogicalUnit):
13374 HPATH = "group-remove"
13375 HTYPE = constants.HTYPE_GROUP
13378 def ExpandNames(self):
13379 # This will raises errors.OpPrereqError on its own:
13380 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13381 self.needed_locks = {
13382 locking.LEVEL_NODEGROUP: [self.group_uuid],
13385 def CheckPrereq(self):
13386 """Check prerequisites.
13388 This checks that the given group name exists as a node group, that is
13389 empty (i.e., contains no nodes), and that is not the last group of the
13393 # Verify that the group is empty.
13394 group_nodes = [node.name
13395 for node in self.cfg.GetAllNodesInfo().values()
13396 if node.group == self.group_uuid]
13399 raise errors.OpPrereqError("Group '%s' not empty, has the following"
13401 (self.op.group_name,
13402 utils.CommaJoin(utils.NiceSort(group_nodes))),
13403 errors.ECODE_STATE)
13405 # Verify the cluster would not be left group-less.
13406 if len(self.cfg.GetNodeGroupList()) == 1:
13407 raise errors.OpPrereqError("Group '%s' is the only group,"
13408 " cannot be removed" %
13409 self.op.group_name,
13410 errors.ECODE_STATE)
13412 def BuildHooksEnv(self):
13413 """Build hooks env.
13417 "GROUP_NAME": self.op.group_name,
13420 def BuildHooksNodes(self):
13421 """Build hooks nodes.
13424 mn = self.cfg.GetMasterNode()
13425 return ([mn], [mn])
13427 def Exec(self, feedback_fn):
13428 """Remove the node group.
13432 self.cfg.RemoveNodeGroup(self.group_uuid)
13433 except errors.ConfigurationError:
13434 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13435 (self.op.group_name, self.group_uuid))
13437 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13440 class LUGroupRename(LogicalUnit):
13441 HPATH = "group-rename"
13442 HTYPE = constants.HTYPE_GROUP
13445 def ExpandNames(self):
13446 # This raises errors.OpPrereqError on its own:
13447 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13449 self.needed_locks = {
13450 locking.LEVEL_NODEGROUP: [self.group_uuid],
13453 def CheckPrereq(self):
13454 """Check prerequisites.
13456 Ensures requested new name is not yet used.
13460 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13461 except errors.OpPrereqError:
13464 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13465 " node group (UUID: %s)" %
13466 (self.op.new_name, new_name_uuid),
13467 errors.ECODE_EXISTS)
13469 def BuildHooksEnv(self):
13470 """Build hooks env.
13474 "OLD_NAME": self.op.group_name,
13475 "NEW_NAME": self.op.new_name,
13478 def BuildHooksNodes(self):
13479 """Build hooks nodes.
13482 mn = self.cfg.GetMasterNode()
13484 all_nodes = self.cfg.GetAllNodesInfo()
13485 all_nodes.pop(mn, None)
13488 run_nodes.extend(node.name for node in all_nodes.values()
13489 if node.group == self.group_uuid)
13491 return (run_nodes, run_nodes)
13493 def Exec(self, feedback_fn):
13494 """Rename the node group.
13497 group = self.cfg.GetNodeGroup(self.group_uuid)
13500 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13501 (self.op.group_name, self.group_uuid))
13503 group.name = self.op.new_name
13504 self.cfg.Update(group, feedback_fn)
13506 return self.op.new_name
13509 class LUGroupEvacuate(LogicalUnit):
13510 HPATH = "group-evacuate"
13511 HTYPE = constants.HTYPE_GROUP
13514 def ExpandNames(self):
13515 # This raises errors.OpPrereqError on its own:
13516 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13518 if self.op.target_groups:
13519 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13520 self.op.target_groups)
13522 self.req_target_uuids = []
13524 if self.group_uuid in self.req_target_uuids:
13525 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13526 " as a target group (targets are %s)" %
13528 utils.CommaJoin(self.req_target_uuids)),
13529 errors.ECODE_INVAL)
13531 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13533 self.share_locks = _ShareAll()
13534 self.needed_locks = {
13535 locking.LEVEL_INSTANCE: [],
13536 locking.LEVEL_NODEGROUP: [],
13537 locking.LEVEL_NODE: [],
13540 def DeclareLocks(self, level):
13541 if level == locking.LEVEL_INSTANCE:
13542 assert not self.needed_locks[locking.LEVEL_INSTANCE]
13544 # Lock instances optimistically, needs verification once node and group
13545 # locks have been acquired
13546 self.needed_locks[locking.LEVEL_INSTANCE] = \
13547 self.cfg.GetNodeGroupInstances(self.group_uuid)
13549 elif level == locking.LEVEL_NODEGROUP:
13550 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13552 if self.req_target_uuids:
13553 lock_groups = set([self.group_uuid] + self.req_target_uuids)
13555 # Lock all groups used by instances optimistically; this requires going
13556 # via the node before it's locked, requiring verification later on
13557 lock_groups.update(group_uuid
13558 for instance_name in
13559 self.owned_locks(locking.LEVEL_INSTANCE)
13561 self.cfg.GetInstanceNodeGroups(instance_name))
13563 # No target groups, need to lock all of them
13564 lock_groups = locking.ALL_SET
13566 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13568 elif level == locking.LEVEL_NODE:
13569 # This will only lock the nodes in the group to be evacuated which
13570 # contain actual instances
13571 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13572 self._LockInstancesNodes()
13574 # Lock all nodes in group to be evacuated and target groups
13575 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13576 assert self.group_uuid in owned_groups
13577 member_nodes = [node_name
13578 for group in owned_groups
13579 for node_name in self.cfg.GetNodeGroup(group).members]
13580 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13582 def CheckPrereq(self):
13583 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13584 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13585 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13587 assert owned_groups.issuperset(self.req_target_uuids)
13588 assert self.group_uuid in owned_groups
13590 # Check if locked instances are still correct
13591 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13593 # Get instance information
13594 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13596 # Check if node groups for locked instances are still correct
13597 for instance_name in owned_instances:
13598 inst = self.instances[instance_name]
13599 assert owned_nodes.issuperset(inst.all_nodes), \
13600 "Instance %s's nodes changed while we kept the lock" % instance_name
13602 inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13605 assert self.group_uuid in inst_groups, \
13606 "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13608 if self.req_target_uuids:
13609 # User requested specific target groups
13610 self.target_uuids = self.req_target_uuids
13612 # All groups except the one to be evacuated are potential targets
13613 self.target_uuids = [group_uuid for group_uuid in owned_groups
13614 if group_uuid != self.group_uuid]
13616 if not self.target_uuids:
13617 raise errors.OpPrereqError("There are no possible target groups",
13618 errors.ECODE_INVAL)
13620 def BuildHooksEnv(self):
13621 """Build hooks env.
13625 "GROUP_NAME": self.op.group_name,
13626 "TARGET_GROUPS": " ".join(self.target_uuids),
13629 def BuildHooksNodes(self):
13630 """Build hooks nodes.
13633 mn = self.cfg.GetMasterNode()
13635 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13637 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13639 return (run_nodes, run_nodes)
13641 def Exec(self, feedback_fn):
13642 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13644 assert self.group_uuid not in self.target_uuids
13646 ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13647 instances=instances, target_groups=self.target_uuids)
13649 ial.Run(self.op.iallocator)
13651 if not ial.success:
13652 raise errors.OpPrereqError("Can't compute group evacuation using"
13653 " iallocator '%s': %s" %
13654 (self.op.iallocator, ial.info),
13655 errors.ECODE_NORES)
13657 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13659 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13660 len(jobs), self.op.group_name)
13662 return ResultWithJobs(jobs)
13665 class TagsLU(NoHooksLU): # pylint: disable=W0223
13666 """Generic tags LU.
13668 This is an abstract class which is the parent of all the other tags LUs.
13671 def ExpandNames(self):
13672 self.group_uuid = None
13673 self.needed_locks = {}
13674 if self.op.kind == constants.TAG_NODE:
13675 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13676 self.needed_locks[locking.LEVEL_NODE] = self.op.name
13677 elif self.op.kind == constants.TAG_INSTANCE:
13678 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13679 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13680 elif self.op.kind == constants.TAG_NODEGROUP:
13681 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13683 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13684 # not possible to acquire the BGL based on opcode parameters)
13686 def CheckPrereq(self):
13687 """Check prerequisites.
13690 if self.op.kind == constants.TAG_CLUSTER:
13691 self.target = self.cfg.GetClusterInfo()
13692 elif self.op.kind == constants.TAG_NODE:
13693 self.target = self.cfg.GetNodeInfo(self.op.name)
13694 elif self.op.kind == constants.TAG_INSTANCE:
13695 self.target = self.cfg.GetInstanceInfo(self.op.name)
13696 elif self.op.kind == constants.TAG_NODEGROUP:
13697 self.target = self.cfg.GetNodeGroup(self.group_uuid)
13699 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13700 str(self.op.kind), errors.ECODE_INVAL)
13703 class LUTagsGet(TagsLU):
13704 """Returns the tags of a given object.
13709 def ExpandNames(self):
13710 TagsLU.ExpandNames(self)
13712 # Share locks as this is only a read operation
13713 self.share_locks = _ShareAll()
13715 def Exec(self, feedback_fn):
13716 """Returns the tag list.
13719 return list(self.target.GetTags())
13722 class LUTagsSearch(NoHooksLU):
13723 """Searches the tags for a given pattern.
13728 def ExpandNames(self):
13729 self.needed_locks = {}
13731 def CheckPrereq(self):
13732 """Check prerequisites.
13734 This checks the pattern passed for validity by compiling it.
13738 self.re = re.compile(self.op.pattern)
13739 except re.error, err:
13740 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13741 (self.op.pattern, err), errors.ECODE_INVAL)
13743 def Exec(self, feedback_fn):
13744 """Returns the tag list.
13748 tgts = [("/cluster", cfg.GetClusterInfo())]
13749 ilist = cfg.GetAllInstancesInfo().values()
13750 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13751 nlist = cfg.GetAllNodesInfo().values()
13752 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13753 tgts.extend(("/nodegroup/%s" % n.name, n)
13754 for n in cfg.GetAllNodeGroupsInfo().values())
13756 for path, target in tgts:
13757 for tag in target.GetTags():
13758 if self.re.search(tag):
13759 results.append((path, tag))
13763 class LUTagsSet(TagsLU):
13764 """Sets a tag on a given object.
13769 def CheckPrereq(self):
13770 """Check prerequisites.
13772 This checks the type and length of the tag name and value.
13775 TagsLU.CheckPrereq(self)
13776 for tag in self.op.tags:
13777 objects.TaggableObject.ValidateTag(tag)
13779 def Exec(self, feedback_fn):
13784 for tag in self.op.tags:
13785 self.target.AddTag(tag)
13786 except errors.TagError, err:
13787 raise errors.OpExecError("Error while setting tag: %s" % str(err))
13788 self.cfg.Update(self.target, feedback_fn)
13791 class LUTagsDel(TagsLU):
13792 """Delete a list of tags from a given object.
13797 def CheckPrereq(self):
13798 """Check prerequisites.
13800 This checks that we have the given tag.
13803 TagsLU.CheckPrereq(self)
13804 for tag in self.op.tags:
13805 objects.TaggableObject.ValidateTag(tag)
13806 del_tags = frozenset(self.op.tags)
13807 cur_tags = self.target.GetTags()
13809 diff_tags = del_tags - cur_tags
13811 diff_names = ("'%s'" % i for i in sorted(diff_tags))
13812 raise errors.OpPrereqError("Tag(s) %s not found" %
13813 (utils.CommaJoin(diff_names), ),
13814 errors.ECODE_NOENT)
13816 def Exec(self, feedback_fn):
13817 """Remove the tag from the object.
13820 for tag in self.op.tags:
13821 self.target.RemoveTag(tag)
13822 self.cfg.Update(self.target, feedback_fn)
13825 class LUTestDelay(NoHooksLU):
13826 """Sleep for a specified amount of time.
13828 This LU sleeps on the master and/or nodes for a specified amount of
13834 def ExpandNames(self):
13835 """Expand names and set required locks.
13837 This expands the node list, if any.
13840 self.needed_locks = {}
13841 if self.op.on_nodes:
13842 # _GetWantedNodes can be used here, but is not always appropriate to use
13843 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13844 # more information.
13845 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13846 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13848 def _TestDelay(self):
13849 """Do the actual sleep.
13852 if self.op.on_master:
13853 if not utils.TestDelay(self.op.duration):
13854 raise errors.OpExecError("Error during master delay test")
13855 if self.op.on_nodes:
13856 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13857 for node, node_result in result.items():
13858 node_result.Raise("Failure during rpc call to node %s" % node)
13860 def Exec(self, feedback_fn):
13861 """Execute the test delay opcode, with the wanted repetitions.
13864 if self.op.repeat == 0:
13867 top_value = self.op.repeat - 1
13868 for i in range(self.op.repeat):
13869 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13873 class LUTestJqueue(NoHooksLU):
13874 """Utility LU to test some aspects of the job queue.
13879 # Must be lower than default timeout for WaitForJobChange to see whether it
13880 # notices changed jobs
13881 _CLIENT_CONNECT_TIMEOUT = 20.0
13882 _CLIENT_CONFIRM_TIMEOUT = 60.0
13885 def _NotifyUsingSocket(cls, cb, errcls):
13886 """Opens a Unix socket and waits for another program to connect.
13889 @param cb: Callback to send socket name to client
13890 @type errcls: class
13891 @param errcls: Exception class to use for errors
13894 # Using a temporary directory as there's no easy way to create temporary
13895 # sockets without writing a custom loop around tempfile.mktemp and
13897 tmpdir = tempfile.mkdtemp()
13899 tmpsock = utils.PathJoin(tmpdir, "sock")
13901 logging.debug("Creating temporary socket at %s", tmpsock)
13902 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13907 # Send details to client
13910 # Wait for client to connect before continuing
13911 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13913 (conn, _) = sock.accept()
13914 except socket.error, err:
13915 raise errcls("Client didn't connect in time (%s)" % err)
13919 # Remove as soon as client is connected
13920 shutil.rmtree(tmpdir)
13922 # Wait for client to close
13925 # pylint: disable=E1101
13926 # Instance of '_socketobject' has no ... member
13927 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13929 except socket.error, err:
13930 raise errcls("Client failed to confirm notification (%s)" % err)
13934 def _SendNotification(self, test, arg, sockname):
13935 """Sends a notification to the client.
13938 @param test: Test name
13939 @param arg: Test argument (depends on test)
13940 @type sockname: string
13941 @param sockname: Socket path
13944 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13946 def _Notify(self, prereq, test, arg):
13947 """Notifies the client of a test.
13950 @param prereq: Whether this is a prereq-phase test
13952 @param test: Test name
13953 @param arg: Test argument (depends on test)
13957 errcls = errors.OpPrereqError
13959 errcls = errors.OpExecError
13961 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13965 def CheckArguments(self):
13966 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13967 self.expandnames_calls = 0
13969 def ExpandNames(self):
13970 checkargs_calls = getattr(self, "checkargs_calls", 0)
13971 if checkargs_calls < 1:
13972 raise errors.ProgrammerError("CheckArguments was not called")
13974 self.expandnames_calls += 1
13976 if self.op.notify_waitlock:
13977 self._Notify(True, constants.JQT_EXPANDNAMES, None)
13979 self.LogInfo("Expanding names")
13981 # Get lock on master node (just to get a lock, not for a particular reason)
13982 self.needed_locks = {
13983 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13986 def Exec(self, feedback_fn):
13987 if self.expandnames_calls < 1:
13988 raise errors.ProgrammerError("ExpandNames was not called")
13990 if self.op.notify_exec:
13991 self._Notify(False, constants.JQT_EXEC, None)
13993 self.LogInfo("Executing")
13995 if self.op.log_messages:
13996 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13997 for idx, msg in enumerate(self.op.log_messages):
13998 self.LogInfo("Sending log message %s", idx + 1)
13999 feedback_fn(constants.JQT_MSGPREFIX + msg)
14000 # Report how many test messages have been sent
14001 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
14004 raise errors.OpExecError("Opcode failure was requested")
14009 class IAllocator(object):
14010 """IAllocator framework.
14012 An IAllocator instance has three sets of attributes:
14013 - cfg that is needed to query the cluster
14014 - input data (all members of the _KEYS class attribute are required)
14015 - four buffer attributes (in|out_data|text), that represent the
14016 input (to the external script) in text and data structure format,
14017 and the output from it, again in two formats
14018 - the result variables from the script (success, info, nodes) for
14022 # pylint: disable=R0902
14023 # lots of instance attributes
14025 def __init__(self, cfg, rpc_runner, mode, **kwargs):
14027 self.rpc = rpc_runner
14028 # init buffer variables
14029 self.in_text = self.out_text = self.in_data = self.out_data = None
14030 # init all input fields so that pylint is happy
14032 self.memory = self.disks = self.disk_template = None
14033 self.os = self.tags = self.nics = self.vcpus = None
14034 self.hypervisor = None
14035 self.relocate_from = None
14037 self.instances = None
14038 self.evac_mode = None
14039 self.target_groups = []
14041 self.required_nodes = None
14042 # init result fields
14043 self.success = self.info = self.result = None
14046 (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
14048 raise errors.ProgrammerError("Unknown mode '%s' passed to the"
14049 " IAllocator" % self.mode)
14051 keyset = [n for (n, _) in keydata]
14054 if key not in keyset:
14055 raise errors.ProgrammerError("Invalid input parameter '%s' to"
14056 " IAllocator" % key)
14057 setattr(self, key, kwargs[key])
14060 if key not in kwargs:
14061 raise errors.ProgrammerError("Missing input parameter '%s' to"
14062 " IAllocator" % key)
14063 self._BuildInputData(compat.partial(fn, self), keydata)
14065 def _ComputeClusterData(self):
14066 """Compute the generic allocator input data.
14068 This is the data that is independent of the actual operation.
14072 cluster_info = cfg.GetClusterInfo()
14075 "version": constants.IALLOCATOR_VERSION,
14076 "cluster_name": cfg.GetClusterName(),
14077 "cluster_tags": list(cluster_info.GetTags()),
14078 "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
14079 # we don't have job IDs
14081 ninfo = cfg.GetAllNodesInfo()
14082 iinfo = cfg.GetAllInstancesInfo().values()
14083 i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
14086 node_list = [n.name for n in ninfo.values() if n.vm_capable]
14088 if self.mode == constants.IALLOCATOR_MODE_ALLOC:
14089 hypervisor_name = self.hypervisor
14090 elif self.mode == constants.IALLOCATOR_MODE_RELOC:
14091 hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
14093 hypervisor_name = cluster_info.primary_hypervisor
14095 node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
14098 self.rpc.call_all_instances_info(node_list,
14099 cluster_info.enabled_hypervisors)
14101 data["nodegroups"] = self._ComputeNodeGroupData(cfg)
14103 config_ndata = self._ComputeBasicNodeData(ninfo)
14104 data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
14105 i_list, config_ndata)
14106 assert len(data["nodes"]) == len(ninfo), \
14107 "Incomplete node data computed"
14109 data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
14111 self.in_data = data
14114 def _ComputeNodeGroupData(cfg):
14115 """Compute node groups data.
14118 ng = dict((guuid, {
14119 "name": gdata.name,
14120 "alloc_policy": gdata.alloc_policy,
14122 for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
14127 def _ComputeBasicNodeData(node_cfg):
14128 """Compute global node data.
14131 @returns: a dict of name: (node dict, node config)
14134 # fill in static (config-based) values
14135 node_results = dict((ninfo.name, {
14136 "tags": list(ninfo.GetTags()),
14137 "primary_ip": ninfo.primary_ip,
14138 "secondary_ip": ninfo.secondary_ip,
14139 "offline": ninfo.offline,
14140 "drained": ninfo.drained,
14141 "master_candidate": ninfo.master_candidate,
14142 "group": ninfo.group,
14143 "master_capable": ninfo.master_capable,
14144 "vm_capable": ninfo.vm_capable,
14146 for ninfo in node_cfg.values())
14148 return node_results
14151 def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
14153 """Compute global node data.
14155 @param node_results: the basic node structures as filled from the config
14158 #TODO(dynmem): compute the right data on MAX and MIN memory
14159 # make a copy of the current dict
14160 node_results = dict(node_results)
14161 for nname, nresult in node_data.items():
14162 assert nname in node_results, "Missing basic data for node %s" % nname
14163 ninfo = node_cfg[nname]
14165 if not (ninfo.offline or ninfo.drained):
14166 nresult.Raise("Can't get data for node %s" % nname)
14167 node_iinfo[nname].Raise("Can't get node instance info from node %s" %
14169 remote_info = _MakeLegacyNodeInfo(nresult.payload)
14171 for attr in ["memory_total", "memory_free", "memory_dom0",
14172 "vg_size", "vg_free", "cpu_total"]:
14173 if attr not in remote_info:
14174 raise errors.OpExecError("Node '%s' didn't return attribute"
14175 " '%s'" % (nname, attr))
14176 if not isinstance(remote_info[attr], int):
14177 raise errors.OpExecError("Node '%s' returned invalid value"
14179 (nname, attr, remote_info[attr]))
14180 # compute memory used by primary instances
14181 i_p_mem = i_p_up_mem = 0
14182 for iinfo, beinfo in i_list:
14183 if iinfo.primary_node == nname:
14184 i_p_mem += beinfo[constants.BE_MAXMEM]
14185 if iinfo.name not in node_iinfo[nname].payload:
14188 i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
14189 i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
14190 remote_info["memory_free"] -= max(0, i_mem_diff)
14192 if iinfo.admin_state == constants.ADMINST_UP:
14193 i_p_up_mem += beinfo[constants.BE_MAXMEM]
14195 # compute memory used by instances
14197 "total_memory": remote_info["memory_total"],
14198 "reserved_memory": remote_info["memory_dom0"],
14199 "free_memory": remote_info["memory_free"],
14200 "total_disk": remote_info["vg_size"],
14201 "free_disk": remote_info["vg_free"],
14202 "total_cpus": remote_info["cpu_total"],
14203 "i_pri_memory": i_p_mem,
14204 "i_pri_up_memory": i_p_up_mem,
14206 pnr_dyn.update(node_results[nname])
14207 node_results[nname] = pnr_dyn
14209 return node_results
14212 def _ComputeInstanceData(cluster_info, i_list):
14213 """Compute global instance data.
14217 for iinfo, beinfo in i_list:
14219 for nic in iinfo.nics:
14220 filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
14224 "mode": filled_params[constants.NIC_MODE],
14225 "link": filled_params[constants.NIC_LINK],
14227 if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
14228 nic_dict["bridge"] = filled_params[constants.NIC_LINK]
14229 nic_data.append(nic_dict)
14231 "tags": list(iinfo.GetTags()),
14232 "admin_state": iinfo.admin_state,
14233 "vcpus": beinfo[constants.BE_VCPUS],
14234 "memory": beinfo[constants.BE_MAXMEM],
14236 "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
14238 "disks": [{constants.IDISK_SIZE: dsk.size,
14239 constants.IDISK_MODE: dsk.mode}
14240 for dsk in iinfo.disks],
14241 "disk_template": iinfo.disk_template,
14242 "hypervisor": iinfo.hypervisor,
14244 pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
14246 instance_data[iinfo.name] = pir
14248 return instance_data
14250 def _AddNewInstance(self):
14251 """Add new instance data to allocator structure.
14253 This in combination with _AllocatorGetClusterData will create the
14254 correct structure needed as input for the allocator.
14256 The checks for the completeness of the opcode must have already been
14260 disk_space = _ComputeDiskSize(self.disk_template, self.disks)
14262 if self.disk_template in constants.DTS_INT_MIRROR:
14263 self.required_nodes = 2
14265 self.required_nodes = 1
14269 "disk_template": self.disk_template,
14272 "vcpus": self.vcpus,
14273 "memory": self.memory,
14274 "disks": self.disks,
14275 "disk_space_total": disk_space,
14277 "required_nodes": self.required_nodes,
14278 "hypervisor": self.hypervisor,
14283 def _AddRelocateInstance(self):
14284 """Add relocate instance data to allocator structure.
14286 This in combination with _IAllocatorGetClusterData will create the
14287 correct structure needed as input for the allocator.
14289 The checks for the completeness of the opcode must have already been
14293 instance = self.cfg.GetInstanceInfo(self.name)
14294 if instance is None:
14295 raise errors.ProgrammerError("Unknown instance '%s' passed to"
14296 " IAllocator" % self.name)
14298 if instance.disk_template not in constants.DTS_MIRRORED:
14299 raise errors.OpPrereqError("Can't relocate non-mirrored instances",
14300 errors.ECODE_INVAL)
14302 if instance.disk_template in constants.DTS_INT_MIRROR and \
14303 len(instance.secondary_nodes) != 1:
14304 raise errors.OpPrereqError("Instance has not exactly one secondary node",
14305 errors.ECODE_STATE)
14307 self.required_nodes = 1
14308 disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
14309 disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
14313 "disk_space_total": disk_space,
14314 "required_nodes": self.required_nodes,
14315 "relocate_from": self.relocate_from,
14319 def _AddNodeEvacuate(self):
14320 """Get data for node-evacuate requests.
14324 "instances": self.instances,
14325 "evac_mode": self.evac_mode,
14328 def _AddChangeGroup(self):
14329 """Get data for node-evacuate requests.
14333 "instances": self.instances,
14334 "target_groups": self.target_groups,
14337 def _BuildInputData(self, fn, keydata):
14338 """Build input data structures.
14341 self._ComputeClusterData()
14344 request["type"] = self.mode
14345 for keyname, keytype in keydata:
14346 if keyname not in request:
14347 raise errors.ProgrammerError("Request parameter %s is missing" %
14349 val = request[keyname]
14350 if not keytype(val):
14351 raise errors.ProgrammerError("Request parameter %s doesn't pass"
14352 " validation, value %s, expected"
14353 " type %s" % (keyname, val, keytype))
14354 self.in_data["request"] = request
14356 self.in_text = serializer.Dump(self.in_data)
14358 _STRING_LIST = ht.TListOf(ht.TString)
14359 _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
14360 # pylint: disable=E1101
14361 # Class '...' has no 'OP_ID' member
14362 "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
14363 opcodes.OpInstanceMigrate.OP_ID,
14364 opcodes.OpInstanceReplaceDisks.OP_ID])
14368 ht.TListOf(ht.TAnd(ht.TIsLength(3),
14369 ht.TItems([ht.TNonEmptyString,
14370 ht.TNonEmptyString,
14371 ht.TListOf(ht.TNonEmptyString),
14374 ht.TListOf(ht.TAnd(ht.TIsLength(2),
14375 ht.TItems([ht.TNonEmptyString,
14378 _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
14379 ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
14382 constants.IALLOCATOR_MODE_ALLOC:
14385 ("name", ht.TString),
14386 ("memory", ht.TInt),
14387 ("disks", ht.TListOf(ht.TDict)),
14388 ("disk_template", ht.TString),
14389 ("os", ht.TString),
14390 ("tags", _STRING_LIST),
14391 ("nics", ht.TListOf(ht.TDict)),
14392 ("vcpus", ht.TInt),
14393 ("hypervisor", ht.TString),
14395 constants.IALLOCATOR_MODE_RELOC:
14396 (_AddRelocateInstance,
14397 [("name", ht.TString), ("relocate_from", _STRING_LIST)],
14399 constants.IALLOCATOR_MODE_NODE_EVAC:
14400 (_AddNodeEvacuate, [
14401 ("instances", _STRING_LIST),
14402 ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14404 constants.IALLOCATOR_MODE_CHG_GROUP:
14405 (_AddChangeGroup, [
14406 ("instances", _STRING_LIST),
14407 ("target_groups", _STRING_LIST),
14411 def Run(self, name, validate=True, call_fn=None):
14412 """Run an instance allocator and return the results.
14415 if call_fn is None:
14416 call_fn = self.rpc.call_iallocator_runner
14418 result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14419 result.Raise("Failure while running the iallocator script")
14421 self.out_text = result.payload
14423 self._ValidateResult()
14425 def _ValidateResult(self):
14426 """Process the allocator results.
14428 This will process and if successful save the result in
14429 self.out_data and the other parameters.
14433 rdict = serializer.Load(self.out_text)
14434 except Exception, err:
14435 raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14437 if not isinstance(rdict, dict):
14438 raise errors.OpExecError("Can't parse iallocator results: not a dict")
14440 # TODO: remove backwards compatiblity in later versions
14441 if "nodes" in rdict and "result" not in rdict:
14442 rdict["result"] = rdict["nodes"]
14445 for key in "success", "info", "result":
14446 if key not in rdict:
14447 raise errors.OpExecError("Can't parse iallocator results:"
14448 " missing key '%s'" % key)
14449 setattr(self, key, rdict[key])
14451 if not self._result_check(self.result):
14452 raise errors.OpExecError("Iallocator returned invalid result,"
14453 " expected %s, got %s" %
14454 (self._result_check, self.result),
14455 errors.ECODE_INVAL)
14457 if self.mode == constants.IALLOCATOR_MODE_RELOC:
14458 assert self.relocate_from is not None
14459 assert self.required_nodes == 1
14461 node2group = dict((name, ndata["group"])
14462 for (name, ndata) in self.in_data["nodes"].items())
14464 fn = compat.partial(self._NodesToGroups, node2group,
14465 self.in_data["nodegroups"])
14467 instance = self.cfg.GetInstanceInfo(self.name)
14468 request_groups = fn(self.relocate_from + [instance.primary_node])
14469 result_groups = fn(rdict["result"] + [instance.primary_node])
14471 if self.success and not set(result_groups).issubset(request_groups):
14472 raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14473 " differ from original groups (%s)" %
14474 (utils.CommaJoin(result_groups),
14475 utils.CommaJoin(request_groups)))
14477 elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14478 assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14480 self.out_data = rdict
14483 def _NodesToGroups(node2group, groups, nodes):
14484 """Returns a list of unique group names for a list of nodes.
14486 @type node2group: dict
14487 @param node2group: Map from node name to group UUID
14489 @param groups: Group information
14491 @param nodes: Node names
14498 group_uuid = node2group[node]
14500 # Ignore unknown node
14504 group = groups[group_uuid]
14506 # Can't find group, let's use UUID
14507 group_name = group_uuid
14509 group_name = group["name"]
14511 result.add(group_name)
14513 return sorted(result)
14516 class LUTestAllocator(NoHooksLU):
14517 """Run allocator tests.
14519 This LU runs the allocator tests
14522 def CheckPrereq(self):
14523 """Check prerequisites.
14525 This checks the opcode parameters depending on the director and mode test.
14528 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14529 for attr in ["memory", "disks", "disk_template",
14530 "os", "tags", "nics", "vcpus"]:
14531 if not hasattr(self.op, attr):
14532 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14533 attr, errors.ECODE_INVAL)
14534 iname = self.cfg.ExpandInstanceName(self.op.name)
14535 if iname is not None:
14536 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14537 iname, errors.ECODE_EXISTS)
14538 if not isinstance(self.op.nics, list):
14539 raise errors.OpPrereqError("Invalid parameter 'nics'",
14540 errors.ECODE_INVAL)
14541 if not isinstance(self.op.disks, list):
14542 raise errors.OpPrereqError("Invalid parameter 'disks'",
14543 errors.ECODE_INVAL)
14544 for row in self.op.disks:
14545 if (not isinstance(row, dict) or
14546 constants.IDISK_SIZE not in row or
14547 not isinstance(row[constants.IDISK_SIZE], int) or
14548 constants.IDISK_MODE not in row or
14549 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14550 raise errors.OpPrereqError("Invalid contents of the 'disks'"
14551 " parameter", errors.ECODE_INVAL)
14552 if self.op.hypervisor is None:
14553 self.op.hypervisor = self.cfg.GetHypervisorType()
14554 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14555 fname = _ExpandInstanceName(self.cfg, self.op.name)
14556 self.op.name = fname
14557 self.relocate_from = \
14558 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14559 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14560 constants.IALLOCATOR_MODE_NODE_EVAC):
14561 if not self.op.instances:
14562 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14563 self.op.instances = _GetWantedInstances(self, self.op.instances)
14565 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14566 self.op.mode, errors.ECODE_INVAL)
14568 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14569 if self.op.allocator is None:
14570 raise errors.OpPrereqError("Missing allocator name",
14571 errors.ECODE_INVAL)
14572 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14573 raise errors.OpPrereqError("Wrong allocator test '%s'" %
14574 self.op.direction, errors.ECODE_INVAL)
14576 def Exec(self, feedback_fn):
14577 """Run the allocator test.
14580 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14581 ial = IAllocator(self.cfg, self.rpc,
14584 memory=self.op.memory,
14585 disks=self.op.disks,
14586 disk_template=self.op.disk_template,
14590 vcpus=self.op.vcpus,
14591 hypervisor=self.op.hypervisor,
14593 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14594 ial = IAllocator(self.cfg, self.rpc,
14597 relocate_from=list(self.relocate_from),
14599 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14600 ial = IAllocator(self.cfg, self.rpc,
14602 instances=self.op.instances,
14603 target_groups=self.op.target_groups)
14604 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14605 ial = IAllocator(self.cfg, self.rpc,
14607 instances=self.op.instances,
14608 evac_mode=self.op.evac_mode)
14610 raise errors.ProgrammerError("Uncatched mode %s in"
14611 " LUTestAllocator.Exec", self.op.mode)
14613 if self.op.direction == constants.IALLOCATOR_DIR_IN:
14614 result = ial.in_text
14616 ial.Run(self.op.allocator, validate=False)
14617 result = ial.out_text
14621 #: Query type implementations
14623 constants.QR_INSTANCE: _InstanceQuery,
14624 constants.QR_NODE: _NodeQuery,
14625 constants.QR_GROUP: _GroupQuery,
14626 constants.QR_OS: _OsQuery,
14629 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14632 def _GetQueryImplementation(name):
14633 """Returns the implemtnation for a query type.
14635 @param name: Query type, must be one of L{constants.QR_VIA_OP}
14639 return _QUERY_IMPL[name]
14641 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14642 errors.ECODE_INVAL)