4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
11 # This program is distributed in the hope that it will be useful, but
12 # WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 # General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
22 """Module implementing the master-side code."""
24 # pylint: disable=W0201,C0302
26 # W0201 since most LU attributes are defined in CheckPrereq or similar
29 # C0302: since we have waaaay too many lines in this module
44 from ganeti import ssh
45 from ganeti import utils
46 from ganeti import errors
47 from ganeti import hypervisor
48 from ganeti import locking
49 from ganeti import constants
50 from ganeti import objects
51 from ganeti import ssconf
52 from ganeti import uidpool
53 from ganeti import compat
54 from ganeti import masterd
55 from ganeti import netutils
56 from ganeti import query
57 from ganeti import qlang
58 from ganeti import opcodes
60 from ganeti import rpc
61 from ganeti import runtime
62 from ganeti import pathutils
63 from ganeti import vcluster
64 from ganeti.masterd import iallocator
66 import ganeti.masterd.instance # pylint: disable=W0611
70 INSTANCE_DOWN = [constants.ADMINST_DOWN]
71 INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
72 INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
74 #: Instance status in which an instance can be marked as offline/online
75 CAN_CHANGE_INSTANCE_OFFLINE = (frozenset(INSTANCE_DOWN) | frozenset([
76 constants.ADMINST_OFFLINE,
81 """Data container for LU results with jobs.
83 Instances of this class returned from L{LogicalUnit.Exec} will be recognized
84 by L{mcpu._ProcessResult}. The latter will then submit the jobs
85 contained in the C{jobs} attribute and include the job IDs in the opcode
89 def __init__(self, jobs, **kwargs):
90 """Initializes this class.
92 Additional return values can be specified as keyword arguments.
94 @type jobs: list of lists of L{opcode.OpCode}
95 @param jobs: A list of lists of opcode objects
102 class LogicalUnit(object):
103 """Logical Unit base class.
105 Subclasses must follow these rules:
106 - implement ExpandNames
107 - implement CheckPrereq (except when tasklets are used)
108 - implement Exec (except when tasklets are used)
109 - implement BuildHooksEnv
110 - implement BuildHooksNodes
111 - redefine HPATH and HTYPE
112 - optionally redefine their run requirements:
113 REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
115 Note that all commands require root permissions.
117 @ivar dry_run_result: the value (if any) that will be returned to the caller
118 in dry-run mode (signalled by opcode dry_run parameter)
125 def __init__(self, processor, op, context, rpc_runner):
126 """Constructor for LogicalUnit.
128 This needs to be overridden in derived classes in order to check op
132 self.proc = processor
134 self.cfg = context.cfg
135 self.glm = context.glm
137 self.owned_locks = context.glm.list_owned
138 self.context = context
139 self.rpc = rpc_runner
140 # Dicts used to declare locking needs to mcpu
141 self.needed_locks = None
142 self.share_locks = dict.fromkeys(locking.LEVELS, 0)
144 self.remove_locks = {}
145 # Used to force good behavior when calling helper functions
146 self.recalculate_locks = {}
148 self.Log = processor.Log # pylint: disable=C0103
149 self.LogWarning = processor.LogWarning # pylint: disable=C0103
150 self.LogInfo = processor.LogInfo # pylint: disable=C0103
151 self.LogStep = processor.LogStep # pylint: disable=C0103
152 # support for dry-run
153 self.dry_run_result = None
154 # support for generic debug attribute
155 if (not hasattr(self.op, "debug_level") or
156 not isinstance(self.op.debug_level, int)):
157 self.op.debug_level = 0
162 # Validate opcode parameters and set defaults
163 self.op.Validate(True)
165 self.CheckArguments()
167 def CheckArguments(self):
168 """Check syntactic validity for the opcode arguments.
170 This method is for doing a simple syntactic check and ensure
171 validity of opcode parameters, without any cluster-related
172 checks. While the same can be accomplished in ExpandNames and/or
173 CheckPrereq, doing these separate is better because:
175 - ExpandNames is left as as purely a lock-related function
176 - CheckPrereq is run after we have acquired locks (and possible
179 The function is allowed to change the self.op attribute so that
180 later methods can no longer worry about missing parameters.
185 def ExpandNames(self):
186 """Expand names for this LU.
188 This method is called before starting to execute the opcode, and it should
189 update all the parameters of the opcode to their canonical form (e.g. a
190 short node name must be fully expanded after this method has successfully
191 completed). This way locking, hooks, logging, etc. can work correctly.
193 LUs which implement this method must also populate the self.needed_locks
194 member, as a dict with lock levels as keys, and a list of needed lock names
197 - use an empty dict if you don't need any lock
198 - if you don't need any lock at a particular level omit that
199 level (note that in this case C{DeclareLocks} won't be called
200 at all for that level)
201 - if you need locks at a level, but you can't calculate it in
202 this function, initialise that level with an empty list and do
203 further processing in L{LogicalUnit.DeclareLocks} (see that
204 function's docstring)
205 - don't put anything for the BGL level
206 - if you want all locks at a level use L{locking.ALL_SET} as a value
208 If you need to share locks (rather than acquire them exclusively) at one
209 level you can modify self.share_locks, setting a true value (usually 1) for
210 that level. By default locks are not shared.
212 This function can also define a list of tasklets, which then will be
213 executed in order instead of the usual LU-level CheckPrereq and Exec
214 functions, if those are not defined by the LU.
218 # Acquire all nodes and one instance
219 self.needed_locks = {
220 locking.LEVEL_NODE: locking.ALL_SET,
221 locking.LEVEL_INSTANCE: ['instance1.example.com'],
223 # Acquire just two nodes
224 self.needed_locks = {
225 locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
228 self.needed_locks = {} # No, you can't leave it to the default value None
231 # The implementation of this method is mandatory only if the new LU is
232 # concurrent, so that old LUs don't need to be changed all at the same
235 self.needed_locks = {} # Exclusive LUs don't need locks.
237 raise NotImplementedError
239 def DeclareLocks(self, level):
240 """Declare LU locking needs for a level
242 While most LUs can just declare their locking needs at ExpandNames time,
243 sometimes there's the need to calculate some locks after having acquired
244 the ones before. This function is called just before acquiring locks at a
245 particular level, but after acquiring the ones at lower levels, and permits
246 such calculations. It can be used to modify self.needed_locks, and by
247 default it does nothing.
249 This function is only called if you have something already set in
250 self.needed_locks for the level.
252 @param level: Locking level which is going to be locked
253 @type level: member of L{ganeti.locking.LEVELS}
257 def CheckPrereq(self):
258 """Check prerequisites for this LU.
260 This method should check that the prerequisites for the execution
261 of this LU are fulfilled. It can do internode communication, but
262 it should be idempotent - no cluster or system changes are
265 The method should raise errors.OpPrereqError in case something is
266 not fulfilled. Its return value is ignored.
268 This method should also update all the parameters of the opcode to
269 their canonical form if it hasn't been done by ExpandNames before.
272 if self.tasklets is not None:
273 for (idx, tl) in enumerate(self.tasklets):
274 logging.debug("Checking prerequisites for tasklet %s/%s",
275 idx + 1, len(self.tasklets))
280 def Exec(self, feedback_fn):
283 This method should implement the actual work. It should raise
284 errors.OpExecError for failures that are somewhat dealt with in
288 if self.tasklets is not None:
289 for (idx, tl) in enumerate(self.tasklets):
290 logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
293 raise NotImplementedError
295 def BuildHooksEnv(self):
296 """Build hooks environment for this LU.
299 @return: Dictionary containing the environment that will be used for
300 running the hooks for this LU. The keys of the dict must not be prefixed
301 with "GANETI_"--that'll be added by the hooks runner. The hooks runner
302 will extend the environment with additional variables. If no environment
303 should be defined, an empty dictionary should be returned (not C{None}).
304 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
308 raise NotImplementedError
310 def BuildHooksNodes(self):
311 """Build list of nodes to run LU's hooks.
313 @rtype: tuple; (list, list)
314 @return: Tuple containing a list of node names on which the hook
315 should run before the execution and a list of node names on which the
316 hook should run after the execution. No nodes should be returned as an
317 empty list (and not None).
318 @note: If the C{HPATH} attribute of the LU class is C{None}, this function
322 raise NotImplementedError
324 def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
325 """Notify the LU about the results of its hooks.
327 This method is called every time a hooks phase is executed, and notifies
328 the Logical Unit about the hooks' result. The LU can then use it to alter
329 its result based on the hooks. By default the method does nothing and the
330 previous result is passed back unchanged but any LU can define it if it
331 wants to use the local cluster hook-scripts somehow.
333 @param phase: one of L{constants.HOOKS_PHASE_POST} or
334 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
335 @param hook_results: the results of the multi-node hooks rpc call
336 @param feedback_fn: function used send feedback back to the caller
337 @param lu_result: the previous Exec result this LU had, or None
339 @return: the new Exec result, based on the previous result
343 # API must be kept, thus we ignore the unused argument and could
344 # be a function warnings
345 # pylint: disable=W0613,R0201
348 def _ExpandAndLockInstance(self):
349 """Helper function to expand and lock an instance.
351 Many LUs that work on an instance take its name in self.op.instance_name
352 and need to expand it and then declare the expanded name for locking. This
353 function does it, and then updates self.op.instance_name to the expanded
354 name. It also initializes needed_locks as a dict, if this hasn't been done
358 if self.needed_locks is None:
359 self.needed_locks = {}
361 assert locking.LEVEL_INSTANCE not in self.needed_locks, \
362 "_ExpandAndLockInstance called with instance-level locks set"
363 self.op.instance_name = _ExpandInstanceName(self.cfg,
364 self.op.instance_name)
365 self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
367 def _LockInstancesNodes(self, primary_only=False,
368 level=locking.LEVEL_NODE):
369 """Helper function to declare instances' nodes for locking.
371 This function should be called after locking one or more instances to lock
372 their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
373 with all primary or secondary nodes for instances already locked and
374 present in self.needed_locks[locking.LEVEL_INSTANCE].
376 It should be called from DeclareLocks, and for safety only works if
377 self.recalculate_locks[locking.LEVEL_NODE] is set.
379 In the future it may grow parameters to just lock some instance's nodes, or
380 to just lock primaries or secondary nodes, if needed.
382 If should be called in DeclareLocks in a way similar to::
384 if level == locking.LEVEL_NODE:
385 self._LockInstancesNodes()
387 @type primary_only: boolean
388 @param primary_only: only lock primary nodes of locked instances
389 @param level: Which lock level to use for locking nodes
392 assert level in self.recalculate_locks, \
393 "_LockInstancesNodes helper function called with no nodes to recalculate"
395 # TODO: check if we're really been called with the instance locks held
397 # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
398 # future we might want to have different behaviors depending on the value
399 # of self.recalculate_locks[locking.LEVEL_NODE]
401 locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
402 for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
403 wanted_nodes.append(instance.primary_node)
405 wanted_nodes.extend(instance.secondary_nodes)
407 if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
408 self.needed_locks[level] = wanted_nodes
409 elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
410 self.needed_locks[level].extend(wanted_nodes)
412 raise errors.ProgrammerError("Unknown recalculation mode")
414 del self.recalculate_locks[level]
417 class NoHooksLU(LogicalUnit): # pylint: disable=W0223
418 """Simple LU which runs no hooks.
420 This LU is intended as a parent for other LogicalUnits which will
421 run no hooks, in order to reduce duplicate code.
427 def BuildHooksEnv(self):
428 """Empty BuildHooksEnv for NoHooksLu.
430 This just raises an error.
433 raise AssertionError("BuildHooksEnv called for NoHooksLUs")
435 def BuildHooksNodes(self):
436 """Empty BuildHooksNodes for NoHooksLU.
439 raise AssertionError("BuildHooksNodes called for NoHooksLU")
443 """Tasklet base class.
445 Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
446 they can mix legacy code with tasklets. Locking needs to be done in the LU,
447 tasklets know nothing about locks.
449 Subclasses must follow these rules:
450 - Implement CheckPrereq
454 def __init__(self, lu):
461 def CheckPrereq(self):
462 """Check prerequisites for this tasklets.
464 This method should check whether the prerequisites for the execution of
465 this tasklet are fulfilled. It can do internode communication, but it
466 should be idempotent - no cluster or system changes are allowed.
468 The method should raise errors.OpPrereqError in case something is not
469 fulfilled. Its return value is ignored.
471 This method should also update all parameters to their canonical form if it
472 hasn't been done before.
477 def Exec(self, feedback_fn):
478 """Execute the tasklet.
480 This method should implement the actual work. It should raise
481 errors.OpExecError for failures that are somewhat dealt with in code, or
485 raise NotImplementedError
489 """Base for query utility classes.
492 #: Attribute holding field definitions
498 def __init__(self, qfilter, fields, use_locking):
499 """Initializes this class.
502 self.use_locking = use_locking
504 self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
505 namefield=self.SORT_FIELD)
506 self.requested_data = self.query.RequestedData()
507 self.names = self.query.RequestedNames()
509 # Sort only if no names were requested
510 self.sort_by_name = not self.names
512 self.do_locking = None
515 def _GetNames(self, lu, all_names, lock_level):
516 """Helper function to determine names asked for in the query.
520 names = lu.owned_locks(lock_level)
524 if self.wanted == locking.ALL_SET:
525 assert not self.names
526 # caller didn't specify names, so ordering is not important
527 return utils.NiceSort(names)
529 # caller specified names and we must keep the same order
531 assert not self.do_locking or lu.glm.is_owned(lock_level)
533 missing = set(self.wanted).difference(names)
535 raise errors.OpExecError("Some items were removed before retrieving"
536 " their data: %s" % missing)
538 # Return expanded names
541 def ExpandNames(self, lu):
542 """Expand names for this query.
544 See L{LogicalUnit.ExpandNames}.
547 raise NotImplementedError()
549 def DeclareLocks(self, lu, level):
550 """Declare locks for this query.
552 See L{LogicalUnit.DeclareLocks}.
555 raise NotImplementedError()
557 def _GetQueryData(self, lu):
558 """Collects all data for this query.
560 @return: Query data object
563 raise NotImplementedError()
565 def NewStyleQuery(self, lu):
566 """Collect data and execute query.
569 return query.GetQueryResponse(self.query, self._GetQueryData(lu),
570 sort_by_name=self.sort_by_name)
572 def OldStyleQuery(self, lu):
573 """Collect data and execute query.
576 return self.query.OldStyleQuery(self._GetQueryData(lu),
577 sort_by_name=self.sort_by_name)
581 """Returns a dict declaring all lock levels shared.
584 return dict.fromkeys(locking.LEVELS, 1)
587 def _AnnotateDiskParams(instance, devs, cfg):
588 """Little helper wrapper to the rpc annotation method.
590 @param instance: The instance object
591 @type devs: List of L{objects.Disk}
592 @param devs: The root devices (not any of its children!)
593 @param cfg: The config object
594 @returns The annotated disk copies
595 @see L{rpc.AnnotateDiskParams}
598 return rpc.AnnotateDiskParams(instance.disk_template, devs,
599 cfg.GetInstanceDiskParams(instance))
602 def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
604 """Checks if node groups for locked instances are still correct.
606 @type cfg: L{config.ConfigWriter}
607 @param cfg: Cluster configuration
608 @type instances: dict; string as key, L{objects.Instance} as value
609 @param instances: Dictionary, instance name as key, instance object as value
610 @type owned_groups: iterable of string
611 @param owned_groups: List of owned groups
612 @type owned_nodes: iterable of string
613 @param owned_nodes: List of owned nodes
614 @type cur_group_uuid: string or None
615 @param cur_group_uuid: Optional group UUID to check against instance's groups
618 for (name, inst) in instances.items():
619 assert owned_nodes.issuperset(inst.all_nodes), \
620 "Instance %s's nodes changed while we kept the lock" % name
622 inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
624 assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
625 "Instance %s has no node in group %s" % (name, cur_group_uuid)
628 def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups,
630 """Checks if the owned node groups are still correct for an instance.
632 @type cfg: L{config.ConfigWriter}
633 @param cfg: The cluster configuration
634 @type instance_name: string
635 @param instance_name: Instance name
636 @type owned_groups: set or frozenset
637 @param owned_groups: List of currently owned node groups
638 @type primary_only: boolean
639 @param primary_only: Whether to check node groups for only the primary node
642 inst_groups = cfg.GetInstanceNodeGroups(instance_name, primary_only)
644 if not owned_groups.issuperset(inst_groups):
645 raise errors.OpPrereqError("Instance %s's node groups changed since"
646 " locks were acquired, current groups are"
647 " are '%s', owning groups '%s'; retry the"
650 utils.CommaJoin(inst_groups),
651 utils.CommaJoin(owned_groups)),
657 def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
658 """Checks if the instances in a node group are still correct.
660 @type cfg: L{config.ConfigWriter}
661 @param cfg: The cluster configuration
662 @type group_uuid: string
663 @param group_uuid: Node group UUID
664 @type owned_instances: set or frozenset
665 @param owned_instances: List of currently owned instances
668 wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
669 if owned_instances != wanted_instances:
670 raise errors.OpPrereqError("Instances in node group '%s' changed since"
671 " locks were acquired, wanted '%s', have '%s';"
672 " retry the operation" %
674 utils.CommaJoin(wanted_instances),
675 utils.CommaJoin(owned_instances)),
678 return wanted_instances
681 def _SupportsOob(cfg, node):
682 """Tells if node supports OOB.
684 @type cfg: L{config.ConfigWriter}
685 @param cfg: The cluster configuration
686 @type node: L{objects.Node}
687 @param node: The node
688 @return: The OOB script if supported or an empty string otherwise
691 return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
694 def _CopyLockList(names):
695 """Makes a copy of a list of lock names.
697 Handles L{locking.ALL_SET} correctly.
700 if names == locking.ALL_SET:
701 return locking.ALL_SET
706 def _GetWantedNodes(lu, nodes):
707 """Returns list of checked and expanded node names.
709 @type lu: L{LogicalUnit}
710 @param lu: the logical unit on whose behalf we execute
712 @param nodes: list of node names or None for all nodes
714 @return: the list of nodes, sorted
715 @raise errors.ProgrammerError: if the nodes parameter is wrong type
719 return [_ExpandNodeName(lu.cfg, name) for name in nodes]
721 return utils.NiceSort(lu.cfg.GetNodeList())
724 def _GetWantedInstances(lu, instances):
725 """Returns list of checked and expanded instance names.
727 @type lu: L{LogicalUnit}
728 @param lu: the logical unit on whose behalf we execute
729 @type instances: list
730 @param instances: list of instance names or None for all instances
732 @return: the list of instances, sorted
733 @raise errors.OpPrereqError: if the instances parameter is wrong type
734 @raise errors.OpPrereqError: if any of the passed instances is not found
738 wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
740 wanted = utils.NiceSort(lu.cfg.GetInstanceList())
744 def _GetUpdatedParams(old_params, update_dict,
745 use_default=True, use_none=False):
746 """Return the new version of a parameter dictionary.
748 @type old_params: dict
749 @param old_params: old parameters
750 @type update_dict: dict
751 @param update_dict: dict containing new parameter values, or
752 constants.VALUE_DEFAULT to reset the parameter to its default
754 @param use_default: boolean
755 @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
756 values as 'to be deleted' values
757 @param use_none: boolean
758 @type use_none: whether to recognise C{None} values as 'to be
761 @return: the new parameter dictionary
764 params_copy = copy.deepcopy(old_params)
765 for key, val in update_dict.iteritems():
766 if ((use_default and val == constants.VALUE_DEFAULT) or
767 (use_none and val is None)):
773 params_copy[key] = val
777 def _GetUpdatedIPolicy(old_ipolicy, new_ipolicy, group_policy=False):
778 """Return the new version of a instance policy.
780 @param group_policy: whether this policy applies to a group and thus
781 we should support removal of policy entries
784 use_none = use_default = group_policy
785 ipolicy = copy.deepcopy(old_ipolicy)
786 for key, value in new_ipolicy.items():
787 if key not in constants.IPOLICY_ALL_KEYS:
788 raise errors.OpPrereqError("Invalid key in new ipolicy: %s" % key,
790 if key in constants.IPOLICY_ISPECS:
791 utils.ForceDictType(value, constants.ISPECS_PARAMETER_TYPES)
792 ipolicy[key] = _GetUpdatedParams(old_ipolicy.get(key, {}), value,
794 use_default=use_default)
796 if (not value or value == [constants.VALUE_DEFAULT] or
797 value == constants.VALUE_DEFAULT):
801 raise errors.OpPrereqError("Can't unset ipolicy attribute '%s'"
802 " on the cluster'" % key,
805 if key in constants.IPOLICY_PARAMETERS:
806 # FIXME: we assume all such values are float
808 ipolicy[key] = float(value)
809 except (TypeError, ValueError), err:
810 raise errors.OpPrereqError("Invalid value for attribute"
811 " '%s': '%s', error: %s" %
812 (key, value, err), errors.ECODE_INVAL)
814 # FIXME: we assume all others are lists; this should be redone
816 ipolicy[key] = list(value)
818 objects.InstancePolicy.CheckParameterSyntax(ipolicy, not group_policy)
819 except errors.ConfigurationError, err:
820 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
825 def _UpdateAndVerifySubDict(base, updates, type_check):
826 """Updates and verifies a dict with sub dicts of the same type.
828 @param base: The dict with the old data
829 @param updates: The dict with the new data
830 @param type_check: Dict suitable to ForceDictType to verify correct types
831 @returns: A new dict with updated and verified values
835 new = _GetUpdatedParams(old, value)
836 utils.ForceDictType(new, type_check)
839 ret = copy.deepcopy(base)
840 ret.update(dict((key, fn(base.get(key, {}), value))
841 for key, value in updates.items()))
845 def _MergeAndVerifyHvState(op_input, obj_input):
846 """Combines the hv state from an opcode with the one of the object
848 @param op_input: The input dict from the opcode
849 @param obj_input: The input dict from the objects
850 @return: The verified and updated dict
854 invalid_hvs = set(op_input) - constants.HYPER_TYPES
856 raise errors.OpPrereqError("Invalid hypervisor(s) in hypervisor state:"
857 " %s" % utils.CommaJoin(invalid_hvs),
859 if obj_input is None:
861 type_check = constants.HVSTS_PARAMETER_TYPES
862 return _UpdateAndVerifySubDict(obj_input, op_input, type_check)
867 def _MergeAndVerifyDiskState(op_input, obj_input):
868 """Combines the disk state from an opcode with the one of the object
870 @param op_input: The input dict from the opcode
871 @param obj_input: The input dict from the objects
872 @return: The verified and updated dict
875 invalid_dst = set(op_input) - constants.DS_VALID_TYPES
877 raise errors.OpPrereqError("Invalid storage type(s) in disk state: %s" %
878 utils.CommaJoin(invalid_dst),
880 type_check = constants.DSS_PARAMETER_TYPES
881 if obj_input is None:
883 return dict((key, _UpdateAndVerifySubDict(obj_input.get(key, {}), value,
885 for key, value in op_input.items())
890 def _ReleaseLocks(lu, level, names=None, keep=None):
891 """Releases locks owned by an LU.
893 @type lu: L{LogicalUnit}
894 @param level: Lock level
895 @type names: list or None
896 @param names: Names of locks to release
897 @type keep: list or None
898 @param keep: Names of locks to retain
901 assert not (keep is not None and names is not None), \
902 "Only one of the 'names' and the 'keep' parameters can be given"
904 if names is not None:
905 should_release = names.__contains__
907 should_release = lambda name: name not in keep
909 should_release = None
911 owned = lu.owned_locks(level)
913 # Not owning any lock at this level, do nothing
920 # Determine which locks to release
922 if should_release(name):
927 assert len(lu.owned_locks(level)) == (len(retain) + len(release))
929 # Release just some locks
930 lu.glm.release(level, names=release)
932 assert frozenset(lu.owned_locks(level)) == frozenset(retain)
935 lu.glm.release(level)
937 assert not lu.glm.is_owned(level), "No locks should be owned"
940 def _MapInstanceDisksToNodes(instances):
941 """Creates a map from (node, volume) to instance name.
943 @type instances: list of L{objects.Instance}
944 @rtype: dict; tuple of (node name, volume name) as key, instance name as value
947 return dict(((node, vol), inst.name)
948 for inst in instances
949 for (node, vols) in inst.MapLVsByNode().items()
953 def _RunPostHook(lu, node_name):
954 """Runs the post-hook for an opcode on a single node.
957 hm = lu.proc.BuildHooksManager(lu)
959 hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
960 except Exception, err: # pylint: disable=W0703
961 lu.LogWarning("Errors occurred running hooks on %s: %s" % (node_name, err))
964 def _CheckOutputFields(static, dynamic, selected):
965 """Checks whether all selected fields are valid.
967 @type static: L{utils.FieldSet}
968 @param static: static fields set
969 @type dynamic: L{utils.FieldSet}
970 @param dynamic: dynamic fields set
977 delta = f.NonMatching(selected)
979 raise errors.OpPrereqError("Unknown output fields selected: %s"
980 % ",".join(delta), errors.ECODE_INVAL)
983 def _CheckGlobalHvParams(params):
984 """Validates that given hypervisor params are not global ones.
986 This will ensure that instances don't get customised versions of
990 used_globals = constants.HVC_GLOBALS.intersection(params)
992 msg = ("The following hypervisor parameters are global and cannot"
993 " be customized at instance level, please modify them at"
994 " cluster level: %s" % utils.CommaJoin(used_globals))
995 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
998 def _CheckNodeOnline(lu, node, msg=None):
999 """Ensure that a given node is online.
1001 @param lu: the LU on behalf of which we make the check
1002 @param node: the node to check
1003 @param msg: if passed, should be a message to replace the default one
1004 @raise errors.OpPrereqError: if the node is offline
1008 msg = "Can't use offline node"
1009 if lu.cfg.GetNodeInfo(node).offline:
1010 raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
1013 def _CheckNodeNotDrained(lu, node):
1014 """Ensure that a given node is not drained.
1016 @param lu: the LU on behalf of which we make the check
1017 @param node: the node to check
1018 @raise errors.OpPrereqError: if the node is drained
1021 if lu.cfg.GetNodeInfo(node).drained:
1022 raise errors.OpPrereqError("Can't use drained node %s" % node,
1026 def _CheckNodeVmCapable(lu, node):
1027 """Ensure that a given node is vm capable.
1029 @param lu: the LU on behalf of which we make the check
1030 @param node: the node to check
1031 @raise errors.OpPrereqError: if the node is not vm capable
1034 if not lu.cfg.GetNodeInfo(node).vm_capable:
1035 raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
1039 def _CheckNodeHasOS(lu, node, os_name, force_variant):
1040 """Ensure that a node supports a given OS.
1042 @param lu: the LU on behalf of which we make the check
1043 @param node: the node to check
1044 @param os_name: the OS to query about
1045 @param force_variant: whether to ignore variant errors
1046 @raise errors.OpPrereqError: if the node is not supporting the OS
1049 result = lu.rpc.call_os_get(node, os_name)
1050 result.Raise("OS '%s' not in supported OS list for node %s" %
1052 prereq=True, ecode=errors.ECODE_INVAL)
1053 if not force_variant:
1054 _CheckOSVariant(result.payload, os_name)
1057 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
1058 """Ensure that a node has the given secondary ip.
1060 @type lu: L{LogicalUnit}
1061 @param lu: the LU on behalf of which we make the check
1063 @param node: the node to check
1064 @type secondary_ip: string
1065 @param secondary_ip: the ip to check
1066 @type prereq: boolean
1067 @param prereq: whether to throw a prerequisite or an execute error
1068 @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
1069 @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
1072 result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
1073 result.Raise("Failure checking secondary ip on node %s" % node,
1074 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1075 if not result.payload:
1076 msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
1077 " please fix and re-run this command" % secondary_ip)
1079 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
1081 raise errors.OpExecError(msg)
1084 def _GetClusterDomainSecret():
1085 """Reads the cluster domain secret.
1088 return utils.ReadOneLineFile(pathutils.CLUSTER_DOMAIN_SECRET_FILE,
1092 def _CheckInstanceState(lu, instance, req_states, msg=None):
1093 """Ensure that an instance is in one of the required states.
1095 @param lu: the LU on behalf of which we make the check
1096 @param instance: the instance to check
1097 @param msg: if passed, should be a message to replace the default one
1098 @raise errors.OpPrereqError: if the instance is not in the required state
1102 msg = "can't use instance from outside %s states" % ", ".join(req_states)
1103 if instance.admin_state not in req_states:
1104 raise errors.OpPrereqError("Instance '%s' is marked to be %s, %s" %
1105 (instance.name, instance.admin_state, msg),
1108 if constants.ADMINST_UP not in req_states:
1109 pnode = instance.primary_node
1110 if not lu.cfg.GetNodeInfo(pnode).offline:
1111 ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
1112 ins_l.Raise("Can't contact node %s for instance information" % pnode,
1113 prereq=True, ecode=errors.ECODE_ENVIRON)
1114 if instance.name in ins_l.payload:
1115 raise errors.OpPrereqError("Instance %s is running, %s" %
1116 (instance.name, msg), errors.ECODE_STATE)
1118 lu.LogWarning("Primary node offline, ignoring check that instance"
1122 def _ComputeMinMaxSpec(name, qualifier, ipolicy, value):
1123 """Computes if value is in the desired range.
1125 @param name: name of the parameter for which we perform the check
1126 @param qualifier: a qualifier used in the error message (e.g. 'disk/1',
1128 @param ipolicy: dictionary containing min, max and std values
1129 @param value: actual value that we want to use
1130 @return: None or element not meeting the criteria
1134 if value in [None, constants.VALUE_AUTO]:
1136 max_v = ipolicy[constants.ISPECS_MAX].get(name, value)
1137 min_v = ipolicy[constants.ISPECS_MIN].get(name, value)
1138 if value > max_v or min_v > value:
1140 fqn = "%s/%s" % (name, qualifier)
1143 return ("%s value %s is not in range [%s, %s]" %
1144 (fqn, value, min_v, max_v))
1148 def _ComputeIPolicySpecViolation(ipolicy, mem_size, cpu_count, disk_count,
1149 nic_count, disk_sizes, spindle_use,
1150 _compute_fn=_ComputeMinMaxSpec):
1151 """Verifies ipolicy against provided specs.
1154 @param ipolicy: The ipolicy
1156 @param mem_size: The memory size
1157 @type cpu_count: int
1158 @param cpu_count: Used cpu cores
1159 @type disk_count: int
1160 @param disk_count: Number of disks used
1161 @type nic_count: int
1162 @param nic_count: Number of nics used
1163 @type disk_sizes: list of ints
1164 @param disk_sizes: Disk sizes of used disk (len must match C{disk_count})
1165 @type spindle_use: int
1166 @param spindle_use: The number of spindles this instance uses
1167 @param _compute_fn: The compute function (unittest only)
1168 @return: A list of violations, or an empty list of no violations are found
1171 assert disk_count == len(disk_sizes)
1174 (constants.ISPEC_MEM_SIZE, "", mem_size),
1175 (constants.ISPEC_CPU_COUNT, "", cpu_count),
1176 (constants.ISPEC_DISK_COUNT, "", disk_count),
1177 (constants.ISPEC_NIC_COUNT, "", nic_count),
1178 (constants.ISPEC_SPINDLE_USE, "", spindle_use),
1179 ] + [(constants.ISPEC_DISK_SIZE, str(idx), d)
1180 for idx, d in enumerate(disk_sizes)]
1183 (_compute_fn(name, qualifier, ipolicy, value)
1184 for (name, qualifier, value) in test_settings))
1187 def _ComputeIPolicyInstanceViolation(ipolicy, instance,
1188 _compute_fn=_ComputeIPolicySpecViolation):
1189 """Compute if instance meets the specs of ipolicy.
1192 @param ipolicy: The ipolicy to verify against
1193 @type instance: L{objects.Instance}
1194 @param instance: The instance to verify
1195 @param _compute_fn: The function to verify ipolicy (unittest only)
1196 @see: L{_ComputeIPolicySpecViolation}
1199 mem_size = instance.beparams.get(constants.BE_MAXMEM, None)
1200 cpu_count = instance.beparams.get(constants.BE_VCPUS, None)
1201 spindle_use = instance.beparams.get(constants.BE_SPINDLE_USE, None)
1202 disk_count = len(instance.disks)
1203 disk_sizes = [disk.size for disk in instance.disks]
1204 nic_count = len(instance.nics)
1206 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1207 disk_sizes, spindle_use)
1210 def _ComputeIPolicyInstanceSpecViolation(
1211 ipolicy, instance_spec, _compute_fn=_ComputeIPolicySpecViolation):
1212 """Compute if instance specs meets the specs of ipolicy.
1215 @param ipolicy: The ipolicy to verify against
1216 @param instance_spec: dict
1217 @param instance_spec: The instance spec to verify
1218 @param _compute_fn: The function to verify ipolicy (unittest only)
1219 @see: L{_ComputeIPolicySpecViolation}
1222 mem_size = instance_spec.get(constants.ISPEC_MEM_SIZE, None)
1223 cpu_count = instance_spec.get(constants.ISPEC_CPU_COUNT, None)
1224 disk_count = instance_spec.get(constants.ISPEC_DISK_COUNT, 0)
1225 disk_sizes = instance_spec.get(constants.ISPEC_DISK_SIZE, [])
1226 nic_count = instance_spec.get(constants.ISPEC_NIC_COUNT, 0)
1227 spindle_use = instance_spec.get(constants.ISPEC_SPINDLE_USE, None)
1229 return _compute_fn(ipolicy, mem_size, cpu_count, disk_count, nic_count,
1230 disk_sizes, spindle_use)
1233 def _ComputeIPolicyNodeViolation(ipolicy, instance, current_group,
1235 _compute_fn=_ComputeIPolicyInstanceViolation):
1236 """Compute if instance meets the specs of the new target group.
1238 @param ipolicy: The ipolicy to verify
1239 @param instance: The instance object to verify
1240 @param current_group: The current group of the instance
1241 @param target_group: The new group of the instance
1242 @param _compute_fn: The function to verify ipolicy (unittest only)
1243 @see: L{_ComputeIPolicySpecViolation}
1246 if current_group == target_group:
1249 return _compute_fn(ipolicy, instance)
1252 def _CheckTargetNodeIPolicy(lu, ipolicy, instance, node, ignore=False,
1253 _compute_fn=_ComputeIPolicyNodeViolation):
1254 """Checks that the target node is correct in terms of instance policy.
1256 @param ipolicy: The ipolicy to verify
1257 @param instance: The instance object to verify
1258 @param node: The new node to relocate
1259 @param ignore: Ignore violations of the ipolicy
1260 @param _compute_fn: The function to verify ipolicy (unittest only)
1261 @see: L{_ComputeIPolicySpecViolation}
1264 primary_node = lu.cfg.GetNodeInfo(instance.primary_node)
1265 res = _compute_fn(ipolicy, instance, primary_node.group, node.group)
1268 msg = ("Instance does not meet target node group's (%s) instance"
1269 " policy: %s") % (node.group, utils.CommaJoin(res))
1273 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
1276 def _ComputeNewInstanceViolations(old_ipolicy, new_ipolicy, instances):
1277 """Computes a set of any instances that would violate the new ipolicy.
1279 @param old_ipolicy: The current (still in-place) ipolicy
1280 @param new_ipolicy: The new (to become) ipolicy
1281 @param instances: List of instances to verify
1282 @return: A list of instances which violates the new ipolicy but
1286 return (_ComputeViolatingInstances(new_ipolicy, instances) -
1287 _ComputeViolatingInstances(old_ipolicy, instances))
1290 def _ExpandItemName(fn, name, kind):
1291 """Expand an item name.
1293 @param fn: the function to use for expansion
1294 @param name: requested item name
1295 @param kind: text description ('Node' or 'Instance')
1296 @return: the resolved (full) name
1297 @raise errors.OpPrereqError: if the item is not found
1300 full_name = fn(name)
1301 if full_name is None:
1302 raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
1307 def _ExpandNodeName(cfg, name):
1308 """Wrapper over L{_ExpandItemName} for nodes."""
1309 return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
1312 def _ExpandInstanceName(cfg, name):
1313 """Wrapper over L{_ExpandItemName} for instance."""
1314 return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
1317 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1318 minmem, maxmem, vcpus, nics, disk_template, disks,
1319 bep, hvp, hypervisor_name, tags):
1320 """Builds instance related env variables for hooks
1322 This builds the hook environment from individual variables.
1325 @param name: the name of the instance
1326 @type primary_node: string
1327 @param primary_node: the name of the instance's primary node
1328 @type secondary_nodes: list
1329 @param secondary_nodes: list of secondary nodes as strings
1330 @type os_type: string
1331 @param os_type: the name of the instance's OS
1332 @type status: string
1333 @param status: the desired status of the instance
1334 @type minmem: string
1335 @param minmem: the minimum memory size of the instance
1336 @type maxmem: string
1337 @param maxmem: the maximum memory size of the instance
1339 @param vcpus: the count of VCPUs the instance has
1341 @param nics: list of tuples (ip, mac, mode, link) representing
1342 the NICs the instance has
1343 @type disk_template: string
1344 @param disk_template: the disk template of the instance
1346 @param disks: the list of (size, mode) pairs
1348 @param bep: the backend parameters for the instance
1350 @param hvp: the hypervisor parameters for the instance
1351 @type hypervisor_name: string
1352 @param hypervisor_name: the hypervisor for the instance
1354 @param tags: list of instance tags as strings
1356 @return: the hook environment for this instance
1361 "INSTANCE_NAME": name,
1362 "INSTANCE_PRIMARY": primary_node,
1363 "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1364 "INSTANCE_OS_TYPE": os_type,
1365 "INSTANCE_STATUS": status,
1366 "INSTANCE_MINMEM": minmem,
1367 "INSTANCE_MAXMEM": maxmem,
1368 # TODO(2.7) remove deprecated "memory" value
1369 "INSTANCE_MEMORY": maxmem,
1370 "INSTANCE_VCPUS": vcpus,
1371 "INSTANCE_DISK_TEMPLATE": disk_template,
1372 "INSTANCE_HYPERVISOR": hypervisor_name,
1375 nic_count = len(nics)
1376 for idx, (ip, mac, mode, link) in enumerate(nics):
1379 env["INSTANCE_NIC%d_IP" % idx] = ip
1380 env["INSTANCE_NIC%d_MAC" % idx] = mac
1381 env["INSTANCE_NIC%d_MODE" % idx] = mode
1382 env["INSTANCE_NIC%d_LINK" % idx] = link
1383 if mode == constants.NIC_MODE_BRIDGED:
1384 env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1388 env["INSTANCE_NIC_COUNT"] = nic_count
1391 disk_count = len(disks)
1392 for idx, (size, mode) in enumerate(disks):
1393 env["INSTANCE_DISK%d_SIZE" % idx] = size
1394 env["INSTANCE_DISK%d_MODE" % idx] = mode
1398 env["INSTANCE_DISK_COUNT"] = disk_count
1403 env["INSTANCE_TAGS"] = " ".join(tags)
1405 for source, kind in [(bep, "BE"), (hvp, "HV")]:
1406 for key, value in source.items():
1407 env["INSTANCE_%s_%s" % (kind, key)] = value
1412 def _NICListToTuple(lu, nics):
1413 """Build a list of nic information tuples.
1415 This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1416 value in LUInstanceQueryData.
1418 @type lu: L{LogicalUnit}
1419 @param lu: the logical unit on whose behalf we execute
1420 @type nics: list of L{objects.NIC}
1421 @param nics: list of nics to convert to hooks tuples
1425 cluster = lu.cfg.GetClusterInfo()
1429 filled_params = cluster.SimpleFillNIC(nic.nicparams)
1430 mode = filled_params[constants.NIC_MODE]
1431 link = filled_params[constants.NIC_LINK]
1432 hooks_nics.append((ip, mac, mode, link))
1436 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1437 """Builds instance related env variables for hooks from an object.
1439 @type lu: L{LogicalUnit}
1440 @param lu: the logical unit on whose behalf we execute
1441 @type instance: L{objects.Instance}
1442 @param instance: the instance for which we should build the
1444 @type override: dict
1445 @param override: dictionary with key/values that will override
1448 @return: the hook environment dictionary
1451 cluster = lu.cfg.GetClusterInfo()
1452 bep = cluster.FillBE(instance)
1453 hvp = cluster.FillHV(instance)
1455 "name": instance.name,
1456 "primary_node": instance.primary_node,
1457 "secondary_nodes": instance.secondary_nodes,
1458 "os_type": instance.os,
1459 "status": instance.admin_state,
1460 "maxmem": bep[constants.BE_MAXMEM],
1461 "minmem": bep[constants.BE_MINMEM],
1462 "vcpus": bep[constants.BE_VCPUS],
1463 "nics": _NICListToTuple(lu, instance.nics),
1464 "disk_template": instance.disk_template,
1465 "disks": [(disk.size, disk.mode) for disk in instance.disks],
1468 "hypervisor_name": instance.hypervisor,
1469 "tags": instance.tags,
1472 args.update(override)
1473 return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1476 def _AdjustCandidatePool(lu, exceptions):
1477 """Adjust the candidate pool after node operations.
1480 mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1482 lu.LogInfo("Promoted nodes to master candidate role: %s",
1483 utils.CommaJoin(node.name for node in mod_list))
1484 for name in mod_list:
1485 lu.context.ReaddNode(name)
1486 mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1488 lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1492 def _DecideSelfPromotion(lu, exceptions=None):
1493 """Decide whether I should promote myself as a master candidate.
1496 cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1497 mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1498 # the new node will increase mc_max with one, so:
1499 mc_should = min(mc_should + 1, cp_size)
1500 return mc_now < mc_should
1503 def _ComputeViolatingInstances(ipolicy, instances):
1504 """Computes a set of instances who violates given ipolicy.
1506 @param ipolicy: The ipolicy to verify
1507 @type instances: object.Instance
1508 @param instances: List of instances to verify
1509 @return: A frozenset of instance names violating the ipolicy
1512 return frozenset([inst.name for inst in instances
1513 if _ComputeIPolicyInstanceViolation(ipolicy, inst)])
1516 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1517 """Check that the brigdes needed by a list of nics exist.
1520 cluster = lu.cfg.GetClusterInfo()
1521 paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1522 brlist = [params[constants.NIC_LINK] for params in paramslist
1523 if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1525 result = lu.rpc.call_bridges_exist(target_node, brlist)
1526 result.Raise("Error checking bridges on destination node '%s'" %
1527 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1530 def _CheckInstanceBridgesExist(lu, instance, node=None):
1531 """Check that the brigdes needed by an instance exist.
1535 node = instance.primary_node
1536 _CheckNicsBridgesExist(lu, instance.nics, node)
1539 def _CheckOSVariant(os_obj, name):
1540 """Check whether an OS name conforms to the os variants specification.
1542 @type os_obj: L{objects.OS}
1543 @param os_obj: OS object to check
1545 @param name: OS name passed by the user, to check for validity
1548 variant = objects.OS.GetVariant(name)
1549 if not os_obj.supported_variants:
1551 raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1552 " passed)" % (os_obj.name, variant),
1556 raise errors.OpPrereqError("OS name must include a variant",
1559 if variant not in os_obj.supported_variants:
1560 raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1563 def _GetNodeInstancesInner(cfg, fn):
1564 return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1567 def _GetNodeInstances(cfg, node_name):
1568 """Returns a list of all primary and secondary instances on a node.
1572 return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1575 def _GetNodePrimaryInstances(cfg, node_name):
1576 """Returns primary instances on a node.
1579 return _GetNodeInstancesInner(cfg,
1580 lambda inst: node_name == inst.primary_node)
1583 def _GetNodeSecondaryInstances(cfg, node_name):
1584 """Returns secondary instances on a node.
1587 return _GetNodeInstancesInner(cfg,
1588 lambda inst: node_name in inst.secondary_nodes)
1591 def _GetStorageTypeArgs(cfg, storage_type):
1592 """Returns the arguments for a storage type.
1595 # Special case for file storage
1596 if storage_type == constants.ST_FILE:
1597 # storage.FileStorage wants a list of storage directories
1598 return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1603 def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1606 for dev in instance.disks:
1607 cfg.SetDiskID(dev, node_name)
1609 result = rpc_runner.call_blockdev_getmirrorstatus(node_name, (instance.disks,
1611 result.Raise("Failed to get disk status from node %s" % node_name,
1612 prereq=prereq, ecode=errors.ECODE_ENVIRON)
1614 for idx, bdev_status in enumerate(result.payload):
1615 if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1621 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1622 """Check the sanity of iallocator and node arguments and use the
1623 cluster-wide iallocator if appropriate.
1625 Check that at most one of (iallocator, node) is specified. If none is
1626 specified, or the iallocator is L{constants.DEFAULT_IALLOCATOR_SHORTCUT},
1627 then the LU's opcode's iallocator slot is filled with the cluster-wide
1630 @type iallocator_slot: string
1631 @param iallocator_slot: the name of the opcode iallocator slot
1632 @type node_slot: string
1633 @param node_slot: the name of the opcode target node slot
1636 node = getattr(lu.op, node_slot, None)
1637 ialloc = getattr(lu.op, iallocator_slot, None)
1641 if node is not None and ialloc is not None:
1642 raise errors.OpPrereqError("Do not specify both, iallocator and node",
1644 elif ((node is None and ialloc is None) or
1645 ialloc == constants.DEFAULT_IALLOCATOR_SHORTCUT):
1646 default_iallocator = lu.cfg.GetDefaultIAllocator()
1647 if default_iallocator:
1648 setattr(lu.op, iallocator_slot, default_iallocator)
1650 raise errors.OpPrereqError("No iallocator or node given and no"
1651 " cluster-wide default iallocator found;"
1652 " please specify either an iallocator or a"
1653 " node, or set a cluster-wide default"
1654 " iallocator", errors.ECODE_INVAL)
1657 def _GetDefaultIAllocator(cfg, ialloc):
1658 """Decides on which iallocator to use.
1660 @type cfg: L{config.ConfigWriter}
1661 @param cfg: Cluster configuration object
1662 @type ialloc: string or None
1663 @param ialloc: Iallocator specified in opcode
1665 @return: Iallocator name
1669 # Use default iallocator
1670 ialloc = cfg.GetDefaultIAllocator()
1673 raise errors.OpPrereqError("No iallocator was specified, neither in the"
1674 " opcode nor as a cluster-wide default",
1680 def _CheckHostnameSane(lu, name):
1681 """Ensures that a given hostname resolves to a 'sane' name.
1683 The given name is required to be a prefix of the resolved hostname,
1684 to prevent accidental mismatches.
1686 @param lu: the logical unit on behalf of which we're checking
1687 @param name: the name we should resolve and check
1688 @return: the resolved hostname object
1691 hostname = netutils.GetHostname(name=name)
1692 if hostname.name != name:
1693 lu.LogInfo("Resolved given name '%s' to '%s'", name, hostname.name)
1694 if not utils.MatchNameComponent(name, [hostname.name]):
1695 raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
1696 " same as given hostname '%s'") %
1697 (hostname.name, name), errors.ECODE_INVAL)
1701 class LUClusterPostInit(LogicalUnit):
1702 """Logical unit for running hooks after cluster initialization.
1705 HPATH = "cluster-init"
1706 HTYPE = constants.HTYPE_CLUSTER
1708 def BuildHooksEnv(self):
1713 "OP_TARGET": self.cfg.GetClusterName(),
1716 def BuildHooksNodes(self):
1717 """Build hooks nodes.
1720 return ([], [self.cfg.GetMasterNode()])
1722 def Exec(self, feedback_fn):
1729 class LUClusterDestroy(LogicalUnit):
1730 """Logical unit for destroying the cluster.
1733 HPATH = "cluster-destroy"
1734 HTYPE = constants.HTYPE_CLUSTER
1736 def BuildHooksEnv(self):
1741 "OP_TARGET": self.cfg.GetClusterName(),
1744 def BuildHooksNodes(self):
1745 """Build hooks nodes.
1750 def CheckPrereq(self):
1751 """Check prerequisites.
1753 This checks whether the cluster is empty.
1755 Any errors are signaled by raising errors.OpPrereqError.
1758 master = self.cfg.GetMasterNode()
1760 nodelist = self.cfg.GetNodeList()
1761 if len(nodelist) != 1 or nodelist[0] != master:
1762 raise errors.OpPrereqError("There are still %d node(s) in"
1763 " this cluster." % (len(nodelist) - 1),
1765 instancelist = self.cfg.GetInstanceList()
1767 raise errors.OpPrereqError("There are still %d instance(s) in"
1768 " this cluster." % len(instancelist),
1771 def Exec(self, feedback_fn):
1772 """Destroys the cluster.
1775 master_params = self.cfg.GetMasterNetworkParameters()
1777 # Run post hooks on master node before it's removed
1778 _RunPostHook(self, master_params.name)
1780 ems = self.cfg.GetUseExternalMipScript()
1781 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1784 self.LogWarning("Error disabling the master IP address: %s",
1787 return master_params.name
1790 def _VerifyCertificate(filename):
1791 """Verifies a certificate for L{LUClusterVerifyConfig}.
1793 @type filename: string
1794 @param filename: Path to PEM file
1798 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1799 utils.ReadFile(filename))
1800 except Exception, err: # pylint: disable=W0703
1801 return (LUClusterVerifyConfig.ETYPE_ERROR,
1802 "Failed to load X509 certificate %s: %s" % (filename, err))
1805 utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1806 constants.SSL_CERT_EXPIRATION_ERROR)
1809 fnamemsg = "While verifying %s: %s" % (filename, msg)
1814 return (None, fnamemsg)
1815 elif errcode == utils.CERT_WARNING:
1816 return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1817 elif errcode == utils.CERT_ERROR:
1818 return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1820 raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1823 def _GetAllHypervisorParameters(cluster, instances):
1824 """Compute the set of all hypervisor parameters.
1826 @type cluster: L{objects.Cluster}
1827 @param cluster: the cluster object
1828 @param instances: list of L{objects.Instance}
1829 @param instances: additional instances from which to obtain parameters
1830 @rtype: list of (origin, hypervisor, parameters)
1831 @return: a list with all parameters found, indicating the hypervisor they
1832 apply to, and the origin (can be "cluster", "os X", or "instance Y")
1837 for hv_name in cluster.enabled_hypervisors:
1838 hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1840 for os_name, os_hvp in cluster.os_hvp.items():
1841 for hv_name, hv_params in os_hvp.items():
1843 full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1844 hvp_data.append(("os %s" % os_name, hv_name, full_params))
1846 # TODO: collapse identical parameter values in a single one
1847 for instance in instances:
1848 if instance.hvparams:
1849 hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1850 cluster.FillHV(instance)))
1855 class _VerifyErrors(object):
1856 """Mix-in for cluster/group verify LUs.
1858 It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1859 self.op and self._feedback_fn to be available.)
1863 ETYPE_FIELD = "code"
1864 ETYPE_ERROR = "ERROR"
1865 ETYPE_WARNING = "WARNING"
1867 def _Error(self, ecode, item, msg, *args, **kwargs):
1868 """Format an error message.
1870 Based on the opcode's error_codes parameter, either format a
1871 parseable error code, or a simpler error string.
1873 This must be called only from Exec and functions called from Exec.
1876 ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1877 itype, etxt, _ = ecode
1878 # first complete the msg
1881 # then format the whole message
1882 if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1883 msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1889 msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1890 # and finally report it via the feedback_fn
1891 self._feedback_fn(" - %s" % msg) # Mix-in. pylint: disable=E1101
1893 def _ErrorIf(self, cond, ecode, *args, **kwargs):
1894 """Log an error message if the passed condition is True.
1898 or self.op.debug_simulate_errors) # pylint: disable=E1101
1900 # If the error code is in the list of ignored errors, demote the error to a
1902 (_, etxt, _) = ecode
1903 if etxt in self.op.ignore_errors: # pylint: disable=E1101
1904 kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1907 self._Error(ecode, *args, **kwargs)
1909 # do not mark the operation as failed for WARN cases only
1910 if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1911 self.bad = self.bad or cond
1914 class LUClusterVerify(NoHooksLU):
1915 """Submits all jobs necessary to verify the cluster.
1920 def ExpandNames(self):
1921 self.needed_locks = {}
1923 def Exec(self, feedback_fn):
1926 if self.op.group_name:
1927 groups = [self.op.group_name]
1928 depends_fn = lambda: None
1930 groups = self.cfg.GetNodeGroupList()
1932 # Verify global configuration
1934 opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1937 # Always depend on global verification
1938 depends_fn = lambda: [(-len(jobs), [])]
1941 [opcodes.OpClusterVerifyGroup(group_name=group,
1942 ignore_errors=self.op.ignore_errors,
1943 depends=depends_fn())]
1944 for group in groups)
1946 # Fix up all parameters
1947 for op in itertools.chain(*jobs): # pylint: disable=W0142
1948 op.debug_simulate_errors = self.op.debug_simulate_errors
1949 op.verbose = self.op.verbose
1950 op.error_codes = self.op.error_codes
1952 op.skip_checks = self.op.skip_checks
1953 except AttributeError:
1954 assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1956 return ResultWithJobs(jobs)
1959 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1960 """Verifies the cluster config.
1965 def _VerifyHVP(self, hvp_data):
1966 """Verifies locally the syntax of the hypervisor parameters.
1969 for item, hv_name, hv_params in hvp_data:
1970 msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1973 hv_class = hypervisor.GetHypervisor(hv_name)
1974 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1975 hv_class.CheckParameterSyntax(hv_params)
1976 except errors.GenericError, err:
1977 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1979 def ExpandNames(self):
1980 self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1981 self.share_locks = _ShareAll()
1983 def CheckPrereq(self):
1984 """Check prerequisites.
1987 # Retrieve all information
1988 self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1989 self.all_node_info = self.cfg.GetAllNodesInfo()
1990 self.all_inst_info = self.cfg.GetAllInstancesInfo()
1992 def Exec(self, feedback_fn):
1993 """Verify integrity of cluster, performing various test on nodes.
1997 self._feedback_fn = feedback_fn
1999 feedback_fn("* Verifying cluster config")
2001 for msg in self.cfg.VerifyConfig():
2002 self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
2004 feedback_fn("* Verifying cluster certificate files")
2006 for cert_filename in pathutils.ALL_CERT_FILES:
2007 (errcode, msg) = _VerifyCertificate(cert_filename)
2008 self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
2010 feedback_fn("* Verifying hypervisor parameters")
2012 self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
2013 self.all_inst_info.values()))
2015 feedback_fn("* Verifying all nodes belong to an existing group")
2017 # We do this verification here because, should this bogus circumstance
2018 # occur, it would never be caught by VerifyGroup, which only acts on
2019 # nodes/instances reachable from existing node groups.
2021 dangling_nodes = set(node.name for node in self.all_node_info.values()
2022 if node.group not in self.all_group_info)
2024 dangling_instances = {}
2025 no_node_instances = []
2027 for inst in self.all_inst_info.values():
2028 if inst.primary_node in dangling_nodes:
2029 dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
2030 elif inst.primary_node not in self.all_node_info:
2031 no_node_instances.append(inst.name)
2036 utils.CommaJoin(dangling_instances.get(node.name,
2038 for node in dangling_nodes]
2040 self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
2042 "the following nodes (and their instances) belong to a non"
2043 " existing group: %s", utils.CommaJoin(pretty_dangling))
2045 self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
2047 "the following instances have a non-existing primary-node:"
2048 " %s", utils.CommaJoin(no_node_instances))
2053 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
2054 """Verifies the status of a node group.
2057 HPATH = "cluster-verify"
2058 HTYPE = constants.HTYPE_CLUSTER
2061 _HOOKS_INDENT_RE = re.compile("^", re.M)
2063 class NodeImage(object):
2064 """A class representing the logical and physical status of a node.
2067 @ivar name: the node name to which this object refers
2068 @ivar volumes: a structure as returned from
2069 L{ganeti.backend.GetVolumeList} (runtime)
2070 @ivar instances: a list of running instances (runtime)
2071 @ivar pinst: list of configured primary instances (config)
2072 @ivar sinst: list of configured secondary instances (config)
2073 @ivar sbp: dictionary of {primary-node: list of instances} for all
2074 instances for which this node is secondary (config)
2075 @ivar mfree: free memory, as reported by hypervisor (runtime)
2076 @ivar dfree: free disk, as reported by the node (runtime)
2077 @ivar offline: the offline status (config)
2078 @type rpc_fail: boolean
2079 @ivar rpc_fail: whether the RPC verify call was successfull (overall,
2080 not whether the individual keys were correct) (runtime)
2081 @type lvm_fail: boolean
2082 @ivar lvm_fail: whether the RPC call didn't return valid LVM data
2083 @type hyp_fail: boolean
2084 @ivar hyp_fail: whether the RPC call didn't return the instance list
2085 @type ghost: boolean
2086 @ivar ghost: whether this is a known node or not (config)
2087 @type os_fail: boolean
2088 @ivar os_fail: whether the RPC call didn't return valid OS data
2090 @ivar oslist: list of OSes as diagnosed by DiagnoseOS
2091 @type vm_capable: boolean
2092 @ivar vm_capable: whether the node can host instances
2095 def __init__(self, offline=False, name=None, vm_capable=True):
2104 self.offline = offline
2105 self.vm_capable = vm_capable
2106 self.rpc_fail = False
2107 self.lvm_fail = False
2108 self.hyp_fail = False
2110 self.os_fail = False
2113 def ExpandNames(self):
2114 # This raises errors.OpPrereqError on its own:
2115 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2117 # Get instances in node group; this is unsafe and needs verification later
2119 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2121 self.needed_locks = {
2122 locking.LEVEL_INSTANCE: inst_names,
2123 locking.LEVEL_NODEGROUP: [self.group_uuid],
2124 locking.LEVEL_NODE: [],
2127 self.share_locks = _ShareAll()
2129 def DeclareLocks(self, level):
2130 if level == locking.LEVEL_NODE:
2131 # Get members of node group; this is unsafe and needs verification later
2132 nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
2134 all_inst_info = self.cfg.GetAllInstancesInfo()
2136 # In Exec(), we warn about mirrored instances that have primary and
2137 # secondary living in separate node groups. To fully verify that
2138 # volumes for these instances are healthy, we will need to do an
2139 # extra call to their secondaries. We ensure here those nodes will
2141 for inst in self.owned_locks(locking.LEVEL_INSTANCE):
2142 # Important: access only the instances whose lock is owned
2143 if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
2144 nodes.update(all_inst_info[inst].secondary_nodes)
2146 self.needed_locks[locking.LEVEL_NODE] = nodes
2148 def CheckPrereq(self):
2149 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
2150 self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
2152 group_nodes = set(self.group_info.members)
2154 self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
2157 group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2159 unlocked_instances = \
2160 group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
2163 raise errors.OpPrereqError("Missing lock for nodes: %s" %
2164 utils.CommaJoin(unlocked_nodes),
2167 if unlocked_instances:
2168 raise errors.OpPrereqError("Missing lock for instances: %s" %
2169 utils.CommaJoin(unlocked_instances),
2172 self.all_node_info = self.cfg.GetAllNodesInfo()
2173 self.all_inst_info = self.cfg.GetAllInstancesInfo()
2175 self.my_node_names = utils.NiceSort(group_nodes)
2176 self.my_inst_names = utils.NiceSort(group_instances)
2178 self.my_node_info = dict((name, self.all_node_info[name])
2179 for name in self.my_node_names)
2181 self.my_inst_info = dict((name, self.all_inst_info[name])
2182 for name in self.my_inst_names)
2184 # We detect here the nodes that will need the extra RPC calls for verifying
2185 # split LV volumes; they should be locked.
2186 extra_lv_nodes = set()
2188 for inst in self.my_inst_info.values():
2189 if inst.disk_template in constants.DTS_INT_MIRROR:
2190 for nname in inst.all_nodes:
2191 if self.all_node_info[nname].group != self.group_uuid:
2192 extra_lv_nodes.add(nname)
2194 unlocked_lv_nodes = \
2195 extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
2197 if unlocked_lv_nodes:
2198 raise errors.OpPrereqError("Missing node locks for LV check: %s" %
2199 utils.CommaJoin(unlocked_lv_nodes),
2201 self.extra_lv_nodes = list(extra_lv_nodes)
2203 def _VerifyNode(self, ninfo, nresult):
2204 """Perform some basic validation on data returned from a node.
2206 - check the result data structure is well formed and has all the
2208 - check ganeti version
2210 @type ninfo: L{objects.Node}
2211 @param ninfo: the node to check
2212 @param nresult: the results from the node
2214 @return: whether overall this call was successful (and we can expect
2215 reasonable values in the respose)
2219 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2221 # main result, nresult should be a non-empty dict
2222 test = not nresult or not isinstance(nresult, dict)
2223 _ErrorIf(test, constants.CV_ENODERPC, node,
2224 "unable to verify node: no data returned")
2228 # compares ganeti version
2229 local_version = constants.PROTOCOL_VERSION
2230 remote_version = nresult.get("version", None)
2231 test = not (remote_version and
2232 isinstance(remote_version, (list, tuple)) and
2233 len(remote_version) == 2)
2234 _ErrorIf(test, constants.CV_ENODERPC, node,
2235 "connection to node returned invalid data")
2239 test = local_version != remote_version[0]
2240 _ErrorIf(test, constants.CV_ENODEVERSION, node,
2241 "incompatible protocol versions: master %s,"
2242 " node %s", local_version, remote_version[0])
2246 # node seems compatible, we can actually try to look into its results
2248 # full package version
2249 self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
2250 constants.CV_ENODEVERSION, node,
2251 "software version mismatch: master %s, node %s",
2252 constants.RELEASE_VERSION, remote_version[1],
2253 code=self.ETYPE_WARNING)
2255 hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
2256 if ninfo.vm_capable and isinstance(hyp_result, dict):
2257 for hv_name, hv_result in hyp_result.iteritems():
2258 test = hv_result is not None
2259 _ErrorIf(test, constants.CV_ENODEHV, node,
2260 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
2262 hvp_result = nresult.get(constants.NV_HVPARAMS, None)
2263 if ninfo.vm_capable and isinstance(hvp_result, list):
2264 for item, hv_name, hv_result in hvp_result:
2265 _ErrorIf(True, constants.CV_ENODEHV, node,
2266 "hypervisor %s parameter verify failure (source %s): %s",
2267 hv_name, item, hv_result)
2269 test = nresult.get(constants.NV_NODESETUP,
2270 ["Missing NODESETUP results"])
2271 _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
2276 def _VerifyNodeTime(self, ninfo, nresult,
2277 nvinfo_starttime, nvinfo_endtime):
2278 """Check the node time.
2280 @type ninfo: L{objects.Node}
2281 @param ninfo: the node to check
2282 @param nresult: the remote results for the node
2283 @param nvinfo_starttime: the start time of the RPC call
2284 @param nvinfo_endtime: the end time of the RPC call
2288 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2290 ntime = nresult.get(constants.NV_TIME, None)
2292 ntime_merged = utils.MergeTime(ntime)
2293 except (ValueError, TypeError):
2294 _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
2297 if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
2298 ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
2299 elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
2300 ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
2304 _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
2305 "Node time diverges by at least %s from master node time",
2308 def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
2309 """Check the node LVM results.
2311 @type ninfo: L{objects.Node}
2312 @param ninfo: the node to check
2313 @param nresult: the remote results for the node
2314 @param vg_name: the configured VG name
2321 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2323 # checks vg existence and size > 20G
2324 vglist = nresult.get(constants.NV_VGLIST, None)
2326 _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
2328 vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
2329 constants.MIN_VG_SIZE)
2330 _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
2333 pvlist = nresult.get(constants.NV_PVLIST, None)
2334 test = pvlist is None
2335 _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
2337 # check that ':' is not present in PV names, since it's a
2338 # special character for lvcreate (denotes the range of PEs to
2340 for _, pvname, owner_vg in pvlist:
2341 test = ":" in pvname
2342 _ErrorIf(test, constants.CV_ENODELVM, node,
2343 "Invalid character ':' in PV '%s' of VG '%s'",
2346 def _VerifyNodeBridges(self, ninfo, nresult, bridges):
2347 """Check the node bridges.
2349 @type ninfo: L{objects.Node}
2350 @param ninfo: the node to check
2351 @param nresult: the remote results for the node
2352 @param bridges: the expected list of bridges
2359 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2361 missing = nresult.get(constants.NV_BRIDGES, None)
2362 test = not isinstance(missing, list)
2363 _ErrorIf(test, constants.CV_ENODENET, node,
2364 "did not return valid bridge information")
2366 _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2367 "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2369 def _VerifyNodeUserScripts(self, ninfo, nresult):
2370 """Check the results of user scripts presence and executability on the node
2372 @type ninfo: L{objects.Node}
2373 @param ninfo: the node to check
2374 @param nresult: the remote results for the node
2379 test = not constants.NV_USERSCRIPTS in nresult
2380 self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2381 "did not return user scripts information")
2383 broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2385 self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2386 "user scripts not present or not executable: %s" %
2387 utils.CommaJoin(sorted(broken_scripts)))
2389 def _VerifyNodeNetwork(self, ninfo, nresult):
2390 """Check the node network connectivity results.
2392 @type ninfo: L{objects.Node}
2393 @param ninfo: the node to check
2394 @param nresult: the remote results for the node
2398 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2400 test = constants.NV_NODELIST not in nresult
2401 _ErrorIf(test, constants.CV_ENODESSH, node,
2402 "node hasn't returned node ssh connectivity data")
2404 if nresult[constants.NV_NODELIST]:
2405 for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2406 _ErrorIf(True, constants.CV_ENODESSH, node,
2407 "ssh communication with node '%s': %s", a_node, a_msg)
2409 test = constants.NV_NODENETTEST not in nresult
2410 _ErrorIf(test, constants.CV_ENODENET, node,
2411 "node hasn't returned node tcp connectivity data")
2413 if nresult[constants.NV_NODENETTEST]:
2414 nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2416 _ErrorIf(True, constants.CV_ENODENET, node,
2417 "tcp communication with node '%s': %s",
2418 anode, nresult[constants.NV_NODENETTEST][anode])
2420 test = constants.NV_MASTERIP not in nresult
2421 _ErrorIf(test, constants.CV_ENODENET, node,
2422 "node hasn't returned node master IP reachability data")
2424 if not nresult[constants.NV_MASTERIP]:
2425 if node == self.master_node:
2426 msg = "the master node cannot reach the master IP (not configured?)"
2428 msg = "cannot reach the master IP"
2429 _ErrorIf(True, constants.CV_ENODENET, node, msg)
2431 def _VerifyInstance(self, instance, instanceconfig, node_image,
2433 """Verify an instance.
2435 This function checks to see if the required block devices are
2436 available on the instance's node.
2439 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2440 node_current = instanceconfig.primary_node
2442 node_vol_should = {}
2443 instanceconfig.MapLVsByNode(node_vol_should)
2445 cluster = self.cfg.GetClusterInfo()
2446 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
2448 err = _ComputeIPolicyInstanceViolation(ipolicy, instanceconfig)
2449 _ErrorIf(err, constants.CV_EINSTANCEPOLICY, instance, utils.CommaJoin(err))
2451 for node in node_vol_should:
2452 n_img = node_image[node]
2453 if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2454 # ignore missing volumes on offline or broken nodes
2456 for volume in node_vol_should[node]:
2457 test = volume not in n_img.volumes
2458 _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2459 "volume %s missing on node %s", volume, node)
2461 if instanceconfig.admin_state == constants.ADMINST_UP:
2462 pri_img = node_image[node_current]
2463 test = instance not in pri_img.instances and not pri_img.offline
2464 _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2465 "instance not running on its primary node %s",
2468 diskdata = [(nname, success, status, idx)
2469 for (nname, disks) in diskstatus.items()
2470 for idx, (success, status) in enumerate(disks)]
2472 for nname, success, bdev_status, idx in diskdata:
2473 # the 'ghost node' construction in Exec() ensures that we have a
2475 snode = node_image[nname]
2476 bad_snode = snode.ghost or snode.offline
2477 _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2478 not success and not bad_snode,
2479 constants.CV_EINSTANCEFAULTYDISK, instance,
2480 "couldn't retrieve status for disk/%s on %s: %s",
2481 idx, nname, bdev_status)
2482 _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2483 success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2484 constants.CV_EINSTANCEFAULTYDISK, instance,
2485 "disk/%s on %s is faulty", idx, nname)
2487 def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2488 """Verify if there are any unknown volumes in the cluster.
2490 The .os, .swap and backup volumes are ignored. All other volumes are
2491 reported as unknown.
2493 @type reserved: L{ganeti.utils.FieldSet}
2494 @param reserved: a FieldSet of reserved volume names
2497 for node, n_img in node_image.items():
2498 if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2499 self.all_node_info[node].group != self.group_uuid):
2500 # skip non-healthy nodes
2502 for volume in n_img.volumes:
2503 test = ((node not in node_vol_should or
2504 volume not in node_vol_should[node]) and
2505 not reserved.Matches(volume))
2506 self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2507 "volume %s is unknown", volume)
2509 def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2510 """Verify N+1 Memory Resilience.
2512 Check that if one single node dies we can still start all the
2513 instances it was primary for.
2516 cluster_info = self.cfg.GetClusterInfo()
2517 for node, n_img in node_image.items():
2518 # This code checks that every node which is now listed as
2519 # secondary has enough memory to host all instances it is
2520 # supposed to should a single other node in the cluster fail.
2521 # FIXME: not ready for failover to an arbitrary node
2522 # FIXME: does not support file-backed instances
2523 # WARNING: we currently take into account down instances as well
2524 # as up ones, considering that even if they're down someone
2525 # might want to start them even in the event of a node failure.
2526 if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2527 # we're skipping nodes marked offline and nodes in other groups from
2528 # the N+1 warning, since most likely we don't have good memory
2529 # infromation from them; we already list instances living on such
2530 # nodes, and that's enough warning
2532 #TODO(dynmem): also consider ballooning out other instances
2533 for prinode, instances in n_img.sbp.items():
2535 for instance in instances:
2536 bep = cluster_info.FillBE(instance_cfg[instance])
2537 if bep[constants.BE_AUTO_BALANCE]:
2538 needed_mem += bep[constants.BE_MINMEM]
2539 test = n_img.mfree < needed_mem
2540 self._ErrorIf(test, constants.CV_ENODEN1, node,
2541 "not enough memory to accomodate instance failovers"
2542 " should node %s fail (%dMiB needed, %dMiB available)",
2543 prinode, needed_mem, n_img.mfree)
2546 def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2547 (files_all, files_opt, files_mc, files_vm)):
2548 """Verifies file checksums collected from all nodes.
2550 @param errorif: Callback for reporting errors
2551 @param nodeinfo: List of L{objects.Node} objects
2552 @param master_node: Name of master node
2553 @param all_nvinfo: RPC results
2556 # Define functions determining which nodes to consider for a file
2559 (files_mc, lambda node: (node.master_candidate or
2560 node.name == master_node)),
2561 (files_vm, lambda node: node.vm_capable),
2564 # Build mapping from filename to list of nodes which should have the file
2566 for (files, fn) in files2nodefn:
2568 filenodes = nodeinfo
2570 filenodes = filter(fn, nodeinfo)
2571 nodefiles.update((filename,
2572 frozenset(map(operator.attrgetter("name"), filenodes)))
2573 for filename in files)
2575 assert set(nodefiles) == (files_all | files_mc | files_vm)
2577 fileinfo = dict((filename, {}) for filename in nodefiles)
2578 ignore_nodes = set()
2580 for node in nodeinfo:
2582 ignore_nodes.add(node.name)
2585 nresult = all_nvinfo[node.name]
2587 if nresult.fail_msg or not nresult.payload:
2590 fingerprints = nresult.payload.get(constants.NV_FILELIST, None)
2591 node_files = dict((vcluster.LocalizeVirtualPath(key), value)
2592 for (key, value) in fingerprints.items())
2595 test = not (node_files and isinstance(node_files, dict))
2596 errorif(test, constants.CV_ENODEFILECHECK, node.name,
2597 "Node did not return file checksum data")
2599 ignore_nodes.add(node.name)
2602 # Build per-checksum mapping from filename to nodes having it
2603 for (filename, checksum) in node_files.items():
2604 assert filename in nodefiles
2605 fileinfo[filename].setdefault(checksum, set()).add(node.name)
2607 for (filename, checksums) in fileinfo.items():
2608 assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2610 # Nodes having the file
2611 with_file = frozenset(node_name
2612 for nodes in fileinfo[filename].values()
2613 for node_name in nodes) - ignore_nodes
2615 expected_nodes = nodefiles[filename] - ignore_nodes
2617 # Nodes missing file
2618 missing_file = expected_nodes - with_file
2620 if filename in files_opt:
2622 errorif(missing_file and missing_file != expected_nodes,
2623 constants.CV_ECLUSTERFILECHECK, None,
2624 "File %s is optional, but it must exist on all or no"
2625 " nodes (not found on %s)",
2626 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2628 errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2629 "File %s is missing from node(s) %s", filename,
2630 utils.CommaJoin(utils.NiceSort(missing_file)))
2632 # Warn if a node has a file it shouldn't
2633 unexpected = with_file - expected_nodes
2635 constants.CV_ECLUSTERFILECHECK, None,
2636 "File %s should not exist on node(s) %s",
2637 filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2639 # See if there are multiple versions of the file
2640 test = len(checksums) > 1
2642 variants = ["variant %s on %s" %
2643 (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2644 for (idx, (checksum, nodes)) in
2645 enumerate(sorted(checksums.items()))]
2649 errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2650 "File %s found with %s different checksums (%s)",
2651 filename, len(checksums), "; ".join(variants))
2653 def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2655 """Verifies and the node DRBD status.
2657 @type ninfo: L{objects.Node}
2658 @param ninfo: the node to check
2659 @param nresult: the remote results for the node
2660 @param instanceinfo: the dict of instances
2661 @param drbd_helper: the configured DRBD usermode helper
2662 @param drbd_map: the DRBD map as returned by
2663 L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2667 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2670 helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2671 test = (helper_result is None)
2672 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2673 "no drbd usermode helper returned")
2675 status, payload = helper_result
2677 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2678 "drbd usermode helper check unsuccessful: %s", payload)
2679 test = status and (payload != drbd_helper)
2680 _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2681 "wrong drbd usermode helper: %s", payload)
2683 # compute the DRBD minors
2685 for minor, instance in drbd_map[node].items():
2686 test = instance not in instanceinfo
2687 _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2688 "ghost instance '%s' in temporary DRBD map", instance)
2689 # ghost instance should not be running, but otherwise we
2690 # don't give double warnings (both ghost instance and
2691 # unallocated minor in use)
2693 node_drbd[minor] = (instance, False)
2695 instance = instanceinfo[instance]
2696 node_drbd[minor] = (instance.name,
2697 instance.admin_state == constants.ADMINST_UP)
2699 # and now check them
2700 used_minors = nresult.get(constants.NV_DRBDLIST, [])
2701 test = not isinstance(used_minors, (tuple, list))
2702 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2703 "cannot parse drbd status file: %s", str(used_minors))
2705 # we cannot check drbd status
2708 for minor, (iname, must_exist) in node_drbd.items():
2709 test = minor not in used_minors and must_exist
2710 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2711 "drbd minor %d of instance %s is not active", minor, iname)
2712 for minor in used_minors:
2713 test = minor not in node_drbd
2714 _ErrorIf(test, constants.CV_ENODEDRBD, node,
2715 "unallocated drbd minor %d is in use", minor)
2717 def _UpdateNodeOS(self, ninfo, nresult, nimg):
2718 """Builds the node OS structures.
2720 @type ninfo: L{objects.Node}
2721 @param ninfo: the node to check
2722 @param nresult: the remote results for the node
2723 @param nimg: the node image object
2727 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2729 remote_os = nresult.get(constants.NV_OSLIST, None)
2730 test = (not isinstance(remote_os, list) or
2731 not compat.all(isinstance(v, list) and len(v) == 7
2732 for v in remote_os))
2734 _ErrorIf(test, constants.CV_ENODEOS, node,
2735 "node hasn't returned valid OS data")
2744 for (name, os_path, status, diagnose,
2745 variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2747 if name not in os_dict:
2750 # parameters is a list of lists instead of list of tuples due to
2751 # JSON lacking a real tuple type, fix it:
2752 parameters = [tuple(v) for v in parameters]
2753 os_dict[name].append((os_path, status, diagnose,
2754 set(variants), set(parameters), set(api_ver)))
2756 nimg.oslist = os_dict
2758 def _VerifyNodeOS(self, ninfo, nimg, base):
2759 """Verifies the node OS list.
2761 @type ninfo: L{objects.Node}
2762 @param ninfo: the node to check
2763 @param nimg: the node image object
2764 @param base: the 'template' node we match against (e.g. from the master)
2768 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2770 assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2772 beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2773 for os_name, os_data in nimg.oslist.items():
2774 assert os_data, "Empty OS status for OS %s?!" % os_name
2775 f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2776 _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2777 "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2778 _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2779 "OS '%s' has multiple entries (first one shadows the rest): %s",
2780 os_name, utils.CommaJoin([v[0] for v in os_data]))
2781 # comparisons with the 'base' image
2782 test = os_name not in base.oslist
2783 _ErrorIf(test, constants.CV_ENODEOS, node,
2784 "Extra OS %s not present on reference node (%s)",
2788 assert base.oslist[os_name], "Base node has empty OS status?"
2789 _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2791 # base OS is invalid, skipping
2793 for kind, a, b in [("API version", f_api, b_api),
2794 ("variants list", f_var, b_var),
2795 ("parameters", beautify_params(f_param),
2796 beautify_params(b_param))]:
2797 _ErrorIf(a != b, constants.CV_ENODEOS, node,
2798 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2799 kind, os_name, base.name,
2800 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2802 # check any missing OSes
2803 missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2804 _ErrorIf(missing, constants.CV_ENODEOS, node,
2805 "OSes present on reference node %s but missing on this node: %s",
2806 base.name, utils.CommaJoin(missing))
2808 def _VerifyFileStoragePaths(self, ninfo, nresult, is_master):
2809 """Verifies paths in L{pathutils.FILE_STORAGE_PATHS_FILE}.
2811 @type ninfo: L{objects.Node}
2812 @param ninfo: the node to check
2813 @param nresult: the remote results for the node
2814 @type is_master: bool
2815 @param is_master: Whether node is the master node
2821 (constants.ENABLE_FILE_STORAGE or
2822 constants.ENABLE_SHARED_FILE_STORAGE)):
2824 fspaths = nresult[constants.NV_FILE_STORAGE_PATHS]
2826 # This should never happen
2827 self._ErrorIf(True, constants.CV_ENODEFILESTORAGEPATHS, node,
2828 "Node did not return forbidden file storage paths")
2830 self._ErrorIf(fspaths, constants.CV_ENODEFILESTORAGEPATHS, node,
2831 "Found forbidden file storage paths: %s",
2832 utils.CommaJoin(fspaths))
2834 self._ErrorIf(constants.NV_FILE_STORAGE_PATHS in nresult,
2835 constants.CV_ENODEFILESTORAGEPATHS, node,
2836 "Node should not have returned forbidden file storage"
2839 def _VerifyOob(self, ninfo, nresult):
2840 """Verifies out of band functionality of a node.
2842 @type ninfo: L{objects.Node}
2843 @param ninfo: the node to check
2844 @param nresult: the remote results for the node
2848 # We just have to verify the paths on master and/or master candidates
2849 # as the oob helper is invoked on the master
2850 if ((ninfo.master_candidate or ninfo.master_capable) and
2851 constants.NV_OOB_PATHS in nresult):
2852 for path_result in nresult[constants.NV_OOB_PATHS]:
2853 self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2855 def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2856 """Verifies and updates the node volume data.
2858 This function will update a L{NodeImage}'s internal structures
2859 with data from the remote call.
2861 @type ninfo: L{objects.Node}
2862 @param ninfo: the node to check
2863 @param nresult: the remote results for the node
2864 @param nimg: the node image object
2865 @param vg_name: the configured VG name
2869 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2871 nimg.lvm_fail = True
2872 lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2875 elif isinstance(lvdata, basestring):
2876 _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2877 utils.SafeEncode(lvdata))
2878 elif not isinstance(lvdata, dict):
2879 _ErrorIf(True, constants.CV_ENODELVM, node,
2880 "rpc call to node failed (lvlist)")
2882 nimg.volumes = lvdata
2883 nimg.lvm_fail = False
2885 def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2886 """Verifies and updates the node instance list.
2888 If the listing was successful, then updates this node's instance
2889 list. Otherwise, it marks the RPC call as failed for the instance
2892 @type ninfo: L{objects.Node}
2893 @param ninfo: the node to check
2894 @param nresult: the remote results for the node
2895 @param nimg: the node image object
2898 idata = nresult.get(constants.NV_INSTANCELIST, None)
2899 test = not isinstance(idata, list)
2900 self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2901 "rpc call to node failed (instancelist): %s",
2902 utils.SafeEncode(str(idata)))
2904 nimg.hyp_fail = True
2906 nimg.instances = idata
2908 def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2909 """Verifies and computes a node information map
2911 @type ninfo: L{objects.Node}
2912 @param ninfo: the node to check
2913 @param nresult: the remote results for the node
2914 @param nimg: the node image object
2915 @param vg_name: the configured VG name
2919 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2921 # try to read free memory (from the hypervisor)
2922 hv_info = nresult.get(constants.NV_HVINFO, None)
2923 test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2924 _ErrorIf(test, constants.CV_ENODEHV, node,
2925 "rpc call to node failed (hvinfo)")
2928 nimg.mfree = int(hv_info["memory_free"])
2929 except (ValueError, TypeError):
2930 _ErrorIf(True, constants.CV_ENODERPC, node,
2931 "node returned invalid nodeinfo, check hypervisor")
2933 # FIXME: devise a free space model for file based instances as well
2934 if vg_name is not None:
2935 test = (constants.NV_VGLIST not in nresult or
2936 vg_name not in nresult[constants.NV_VGLIST])
2937 _ErrorIf(test, constants.CV_ENODELVM, node,
2938 "node didn't return data for the volume group '%s'"
2939 " - it is either missing or broken", vg_name)
2942 nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2943 except (ValueError, TypeError):
2944 _ErrorIf(True, constants.CV_ENODERPC, node,
2945 "node returned invalid LVM info, check LVM status")
2947 def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2948 """Gets per-disk status information for all instances.
2950 @type nodelist: list of strings
2951 @param nodelist: Node names
2952 @type node_image: dict of (name, L{objects.Node})
2953 @param node_image: Node objects
2954 @type instanceinfo: dict of (name, L{objects.Instance})
2955 @param instanceinfo: Instance objects
2956 @rtype: {instance: {node: [(succes, payload)]}}
2957 @return: a dictionary of per-instance dictionaries with nodes as
2958 keys and disk information as values; the disk information is a
2959 list of tuples (success, payload)
2962 _ErrorIf = self._ErrorIf # pylint: disable=C0103
2965 node_disks_devonly = {}
2966 diskless_instances = set()
2967 diskless = constants.DT_DISKLESS
2969 for nname in nodelist:
2970 node_instances = list(itertools.chain(node_image[nname].pinst,
2971 node_image[nname].sinst))
2972 diskless_instances.update(inst for inst in node_instances
2973 if instanceinfo[inst].disk_template == diskless)
2974 disks = [(inst, disk)
2975 for inst in node_instances
2976 for disk in instanceinfo[inst].disks]
2979 # No need to collect data
2982 node_disks[nname] = disks
2984 # _AnnotateDiskParams makes already copies of the disks
2986 for (inst, dev) in disks:
2987 (anno_disk,) = _AnnotateDiskParams(instanceinfo[inst], [dev], self.cfg)
2988 self.cfg.SetDiskID(anno_disk, nname)
2989 devonly.append(anno_disk)
2991 node_disks_devonly[nname] = devonly
2993 assert len(node_disks) == len(node_disks_devonly)
2995 # Collect data from all nodes with disks
2996 result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2999 assert len(result) == len(node_disks)
3003 for (nname, nres) in result.items():
3004 disks = node_disks[nname]
3007 # No data from this node
3008 data = len(disks) * [(False, "node offline")]
3011 _ErrorIf(msg, constants.CV_ENODERPC, nname,
3012 "while getting disk information: %s", msg)
3014 # No data from this node
3015 data = len(disks) * [(False, msg)]
3018 for idx, i in enumerate(nres.payload):
3019 if isinstance(i, (tuple, list)) and len(i) == 2:
3022 logging.warning("Invalid result from node %s, entry %d: %s",
3024 data.append((False, "Invalid result from the remote node"))
3026 for ((inst, _), status) in zip(disks, data):
3027 instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
3029 # Add empty entries for diskless instances.
3030 for inst in diskless_instances:
3031 assert inst not in instdisk
3034 assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
3035 len(nnames) <= len(instanceinfo[inst].all_nodes) and
3036 compat.all(isinstance(s, (tuple, list)) and
3037 len(s) == 2 for s in statuses)
3038 for inst, nnames in instdisk.items()
3039 for nname, statuses in nnames.items())
3040 assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
3045 def _SshNodeSelector(group_uuid, all_nodes):
3046 """Create endless iterators for all potential SSH check hosts.
3049 nodes = [node for node in all_nodes
3050 if (node.group != group_uuid and
3052 keyfunc = operator.attrgetter("group")
3054 return map(itertools.cycle,
3055 [sorted(map(operator.attrgetter("name"), names))
3056 for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
3060 def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
3061 """Choose which nodes should talk to which other nodes.
3063 We will make nodes contact all nodes in their group, and one node from
3066 @warning: This algorithm has a known issue if one node group is much
3067 smaller than others (e.g. just one node). In such a case all other
3068 nodes will talk to the single node.
3071 online_nodes = sorted(node.name for node in group_nodes if not node.offline)
3072 sel = cls._SshNodeSelector(group_uuid, all_nodes)
3074 return (online_nodes,
3075 dict((name, sorted([i.next() for i in sel]))
3076 for name in online_nodes))
3078 def BuildHooksEnv(self):
3081 Cluster-Verify hooks just ran in the post phase and their failure makes
3082 the output be logged in the verify output and the verification to fail.
3086 "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
3089 env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
3090 for node in self.my_node_info.values())
3094 def BuildHooksNodes(self):
3095 """Build hooks nodes.
3098 return ([], self.my_node_names)
3100 def Exec(self, feedback_fn):
3101 """Verify integrity of the node group, performing various test on nodes.
3104 # This method has too many local variables. pylint: disable=R0914
3105 feedback_fn("* Verifying group '%s'" % self.group_info.name)
3107 if not self.my_node_names:
3109 feedback_fn("* Empty node group, skipping verification")
3113 _ErrorIf = self._ErrorIf # pylint: disable=C0103
3114 verbose = self.op.verbose
3115 self._feedback_fn = feedback_fn
3117 vg_name = self.cfg.GetVGName()
3118 drbd_helper = self.cfg.GetDRBDHelper()
3119 cluster = self.cfg.GetClusterInfo()
3120 groupinfo = self.cfg.GetAllNodeGroupsInfo()
3121 hypervisors = cluster.enabled_hypervisors
3122 node_data_list = [self.my_node_info[name] for name in self.my_node_names]
3124 i_non_redundant = [] # Non redundant instances
3125 i_non_a_balanced = [] # Non auto-balanced instances
3126 i_offline = 0 # Count of offline instances
3127 n_offline = 0 # Count of offline nodes
3128 n_drained = 0 # Count of nodes being drained
3129 node_vol_should = {}
3131 # FIXME: verify OS list
3134 filemap = _ComputeAncillaryFiles(cluster, False)
3136 # do local checksums
3137 master_node = self.master_node = self.cfg.GetMasterNode()
3138 master_ip = self.cfg.GetMasterIP()
3140 feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
3143 if self.cfg.GetUseExternalMipScript():
3144 user_scripts.append(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
3146 node_verify_param = {
3147 constants.NV_FILELIST:
3148 map(vcluster.MakeVirtualPath,
3149 utils.UniqueSequence(filename
3150 for files in filemap
3151 for filename in files)),
3152 constants.NV_NODELIST:
3153 self._SelectSshCheckNodes(node_data_list, self.group_uuid,
3154 self.all_node_info.values()),
3155 constants.NV_HYPERVISOR: hypervisors,
3156 constants.NV_HVPARAMS:
3157 _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
3158 constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
3159 for node in node_data_list
3160 if not node.offline],
3161 constants.NV_INSTANCELIST: hypervisors,
3162 constants.NV_VERSION: None,
3163 constants.NV_HVINFO: self.cfg.GetHypervisorType(),
3164 constants.NV_NODESETUP: None,
3165 constants.NV_TIME: None,
3166 constants.NV_MASTERIP: (master_node, master_ip),
3167 constants.NV_OSLIST: None,
3168 constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
3169 constants.NV_USERSCRIPTS: user_scripts,
3172 if vg_name is not None:
3173 node_verify_param[constants.NV_VGLIST] = None
3174 node_verify_param[constants.NV_LVLIST] = vg_name
3175 node_verify_param[constants.NV_PVLIST] = [vg_name]
3178 node_verify_param[constants.NV_DRBDLIST] = None
3179 node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
3181 if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
3182 # Load file storage paths only from master node
3183 node_verify_param[constants.NV_FILE_STORAGE_PATHS] = master_node
3186 # FIXME: this needs to be changed per node-group, not cluster-wide
3188 default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
3189 if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3190 bridges.add(default_nicpp[constants.NIC_LINK])
3191 for instance in self.my_inst_info.values():
3192 for nic in instance.nics:
3193 full_nic = cluster.SimpleFillNIC(nic.nicparams)
3194 if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3195 bridges.add(full_nic[constants.NIC_LINK])
3198 node_verify_param[constants.NV_BRIDGES] = list(bridges)
3200 # Build our expected cluster state
3201 node_image = dict((node.name, self.NodeImage(offline=node.offline,
3203 vm_capable=node.vm_capable))
3204 for node in node_data_list)
3208 for node in self.all_node_info.values():
3209 path = _SupportsOob(self.cfg, node)
3210 if path and path not in oob_paths:
3211 oob_paths.append(path)
3214 node_verify_param[constants.NV_OOB_PATHS] = oob_paths
3216 for instance in self.my_inst_names:
3217 inst_config = self.my_inst_info[instance]
3218 if inst_config.admin_state == constants.ADMINST_OFFLINE:
3221 for nname in inst_config.all_nodes:
3222 if nname not in node_image:
3223 gnode = self.NodeImage(name=nname)
3224 gnode.ghost = (nname not in self.all_node_info)
3225 node_image[nname] = gnode
3227 inst_config.MapLVsByNode(node_vol_should)
3229 pnode = inst_config.primary_node
3230 node_image[pnode].pinst.append(instance)
3232 for snode in inst_config.secondary_nodes:
3233 nimg = node_image[snode]
3234 nimg.sinst.append(instance)
3235 if pnode not in nimg.sbp:
3236 nimg.sbp[pnode] = []
3237 nimg.sbp[pnode].append(instance)
3239 # At this point, we have the in-memory data structures complete,
3240 # except for the runtime information, which we'll gather next
3242 # Due to the way our RPC system works, exact response times cannot be
3243 # guaranteed (e.g. a broken node could run into a timeout). By keeping the
3244 # time before and after executing the request, we can at least have a time
3246 nvinfo_starttime = time.time()
3247 all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
3249 self.cfg.GetClusterName())
3250 nvinfo_endtime = time.time()
3252 if self.extra_lv_nodes and vg_name is not None:
3254 self.rpc.call_node_verify(self.extra_lv_nodes,
3255 {constants.NV_LVLIST: vg_name},
3256 self.cfg.GetClusterName())
3258 extra_lv_nvinfo = {}
3260 all_drbd_map = self.cfg.ComputeDRBDMap()
3262 feedback_fn("* Gathering disk information (%s nodes)" %
3263 len(self.my_node_names))
3264 instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
3267 feedback_fn("* Verifying configuration file consistency")
3269 # If not all nodes are being checked, we need to make sure the master node
3270 # and a non-checked vm_capable node are in the list.
3271 absent_nodes = set(self.all_node_info).difference(self.my_node_info)
3273 vf_nvinfo = all_nvinfo.copy()
3274 vf_node_info = list(self.my_node_info.values())
3275 additional_nodes = []
3276 if master_node not in self.my_node_info:
3277 additional_nodes.append(master_node)
3278 vf_node_info.append(self.all_node_info[master_node])
3279 # Add the first vm_capable node we find which is not included,
3280 # excluding the master node (which we already have)
3281 for node in absent_nodes:
3282 nodeinfo = self.all_node_info[node]
3283 if (nodeinfo.vm_capable and not nodeinfo.offline and
3284 node != master_node):
3285 additional_nodes.append(node)
3286 vf_node_info.append(self.all_node_info[node])
3288 key = constants.NV_FILELIST
3289 vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
3290 {key: node_verify_param[key]},
3291 self.cfg.GetClusterName()))
3293 vf_nvinfo = all_nvinfo
3294 vf_node_info = self.my_node_info.values()
3296 self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
3298 feedback_fn("* Verifying node status")
3302 for node_i in node_data_list:
3304 nimg = node_image[node]
3308 feedback_fn("* Skipping offline node %s" % (node,))
3312 if node == master_node:
3314 elif node_i.master_candidate:
3315 ntype = "master candidate"
3316 elif node_i.drained:
3322 feedback_fn("* Verifying node %s (%s)" % (node, ntype))
3324 msg = all_nvinfo[node].fail_msg
3325 _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
3328 nimg.rpc_fail = True
3331 nresult = all_nvinfo[node].payload
3333 nimg.call_ok = self._VerifyNode(node_i, nresult)
3334 self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
3335 self._VerifyNodeNetwork(node_i, nresult)
3336 self._VerifyNodeUserScripts(node_i, nresult)
3337 self._VerifyOob(node_i, nresult)
3338 self._VerifyFileStoragePaths(node_i, nresult,
3339 node == master_node)
3342 self._VerifyNodeLVM(node_i, nresult, vg_name)
3343 self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
3346 self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
3347 self._UpdateNodeInstances(node_i, nresult, nimg)
3348 self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
3349 self._UpdateNodeOS(node_i, nresult, nimg)
3351 if not nimg.os_fail:
3352 if refos_img is None:
3354 self._VerifyNodeOS(node_i, nimg, refos_img)
3355 self._VerifyNodeBridges(node_i, nresult, bridges)
3357 # Check whether all running instancies are primary for the node. (This
3358 # can no longer be done from _VerifyInstance below, since some of the
3359 # wrong instances could be from other node groups.)
3360 non_primary_inst = set(nimg.instances).difference(nimg.pinst)
3362 for inst in non_primary_inst:
3363 test = inst in self.all_inst_info
3364 _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
3365 "instance should not run on node %s", node_i.name)
3366 _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
3367 "node is running unknown instance %s", inst)
3369 for node, result in extra_lv_nvinfo.items():
3370 self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
3371 node_image[node], vg_name)
3373 feedback_fn("* Verifying instance status")
3374 for instance in self.my_inst_names:
3376 feedback_fn("* Verifying instance %s" % instance)
3377 inst_config = self.my_inst_info[instance]
3378 self._VerifyInstance(instance, inst_config, node_image,
3380 inst_nodes_offline = []
3382 pnode = inst_config.primary_node
3383 pnode_img = node_image[pnode]
3384 _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
3385 constants.CV_ENODERPC, pnode, "instance %s, connection to"
3386 " primary node failed", instance)
3388 _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
3390 constants.CV_EINSTANCEBADNODE, instance,
3391 "instance is marked as running and lives on offline node %s",
3392 inst_config.primary_node)
3394 # If the instance is non-redundant we cannot survive losing its primary
3395 # node, so we are not N+1 compliant.
3396 if inst_config.disk_template not in constants.DTS_MIRRORED:
3397 i_non_redundant.append(instance)
3399 _ErrorIf(len(inst_config.secondary_nodes) > 1,
3400 constants.CV_EINSTANCELAYOUT,
3401 instance, "instance has multiple secondary nodes: %s",
3402 utils.CommaJoin(inst_config.secondary_nodes),
3403 code=self.ETYPE_WARNING)
3405 if inst_config.disk_template in constants.DTS_INT_MIRROR:
3406 pnode = inst_config.primary_node
3407 instance_nodes = utils.NiceSort(inst_config.all_nodes)
3408 instance_groups = {}
3410 for node in instance_nodes:
3411 instance_groups.setdefault(self.all_node_info[node].group,
3415 "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3416 # Sort so that we always list the primary node first.
3417 for group, nodes in sorted(instance_groups.items(),
3418 key=lambda (_, nodes): pnode in nodes,
3421 self._ErrorIf(len(instance_groups) > 1,
3422 constants.CV_EINSTANCESPLITGROUPS,
3423 instance, "instance has primary and secondary nodes in"
3424 " different groups: %s", utils.CommaJoin(pretty_list),
3425 code=self.ETYPE_WARNING)
3427 if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3428 i_non_a_balanced.append(instance)
3430 for snode in inst_config.secondary_nodes:
3431 s_img = node_image[snode]
3432 _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3433 snode, "instance %s, connection to secondary node failed",
3437 inst_nodes_offline.append(snode)
3439 # warn that the instance lives on offline nodes
3440 _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3441 "instance has offline secondary node(s) %s",
3442 utils.CommaJoin(inst_nodes_offline))
3443 # ... or ghost/non-vm_capable nodes
3444 for node in inst_config.all_nodes:
3445 _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3446 instance, "instance lives on ghost node %s", node)
3447 _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3448 instance, "instance lives on non-vm_capable node %s", node)
3450 feedback_fn("* Verifying orphan volumes")
3451 reserved = utils.FieldSet(*cluster.reserved_lvs)
3453 # We will get spurious "unknown volume" warnings if any node of this group
3454 # is secondary for an instance whose primary is in another group. To avoid
3455 # them, we find these instances and add their volumes to node_vol_should.
3456 for inst in self.all_inst_info.values():
3457 for secondary in inst.secondary_nodes:
3458 if (secondary in self.my_node_info
3459 and inst.name not in self.my_inst_info):
3460 inst.MapLVsByNode(node_vol_should)
3463 self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3465 if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3466 feedback_fn("* Verifying N+1 Memory redundancy")
3467 self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3469 feedback_fn("* Other Notes")
3471 feedback_fn(" - NOTICE: %d non-redundant instance(s) found."
3472 % len(i_non_redundant))
3474 if i_non_a_balanced:
3475 feedback_fn(" - NOTICE: %d non-auto-balanced instance(s) found."
3476 % len(i_non_a_balanced))
3479 feedback_fn(" - NOTICE: %d offline instance(s) found." % i_offline)
3482 feedback_fn(" - NOTICE: %d offline node(s) found." % n_offline)
3485 feedback_fn(" - NOTICE: %d drained node(s) found." % n_drained)
3489 def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3490 """Analyze the post-hooks' result
3492 This method analyses the hook result, handles it, and sends some
3493 nicely-formatted feedback back to the user.
3495 @param phase: one of L{constants.HOOKS_PHASE_POST} or
3496 L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3497 @param hooks_results: the results of the multi-node hooks rpc call
3498 @param feedback_fn: function used send feedback back to the caller
3499 @param lu_result: previous Exec result
3500 @return: the new Exec result, based on the previous result
3504 # We only really run POST phase hooks, only for non-empty groups,
3505 # and are only interested in their results
3506 if not self.my_node_names:
3509 elif phase == constants.HOOKS_PHASE_POST:
3510 # Used to change hooks' output to proper indentation
3511 feedback_fn("* Hooks Results")
3512 assert hooks_results, "invalid result from hooks"
3514 for node_name in hooks_results:
3515 res = hooks_results[node_name]
3517 test = msg and not res.offline
3518 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3519 "Communication failure in hooks execution: %s", msg)
3520 if res.offline or msg:
3521 # No need to investigate payload if node is offline or gave
3524 for script, hkr, output in res.payload:
3525 test = hkr == constants.HKR_FAIL
3526 self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3527 "Script %s failed, output:", script)
3529 output = self._HOOKS_INDENT_RE.sub(" ", output)
3530 feedback_fn("%s" % output)
3536 class LUClusterVerifyDisks(NoHooksLU):
3537 """Verifies the cluster disks status.
3542 def ExpandNames(self):
3543 self.share_locks = _ShareAll()
3544 self.needed_locks = {
3545 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3548 def Exec(self, feedback_fn):
3549 group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3551 # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3552 return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3553 for group in group_names])
3556 class LUGroupVerifyDisks(NoHooksLU):
3557 """Verifies the status of all disks in a node group.
3562 def ExpandNames(self):
3563 # Raises errors.OpPrereqError on its own if group can't be found
3564 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3566 self.share_locks = _ShareAll()
3567 self.needed_locks = {
3568 locking.LEVEL_INSTANCE: [],
3569 locking.LEVEL_NODEGROUP: [],
3570 locking.LEVEL_NODE: [],
3573 def DeclareLocks(self, level):
3574 if level == locking.LEVEL_INSTANCE:
3575 assert not self.needed_locks[locking.LEVEL_INSTANCE]
3577 # Lock instances optimistically, needs verification once node and group
3578 # locks have been acquired
3579 self.needed_locks[locking.LEVEL_INSTANCE] = \
3580 self.cfg.GetNodeGroupInstances(self.group_uuid)
3582 elif level == locking.LEVEL_NODEGROUP:
3583 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3585 self.needed_locks[locking.LEVEL_NODEGROUP] = \
3586 set([self.group_uuid] +
3587 # Lock all groups used by instances optimistically; this requires
3588 # going via the node before it's locked, requiring verification
3591 for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3592 for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3594 elif level == locking.LEVEL_NODE:
3595 # This will only lock the nodes in the group to be verified which contain
3597 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3598 self._LockInstancesNodes()
3600 # Lock all nodes in group to be verified
3601 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3602 member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3603 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3605 def CheckPrereq(self):
3606 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3607 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3608 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3610 assert self.group_uuid in owned_groups
3612 # Check if locked instances are still correct
3613 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3615 # Get instance information
3616 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3618 # Check if node groups for locked instances are still correct
3619 _CheckInstancesNodeGroups(self.cfg, self.instances,
3620 owned_groups, owned_nodes, self.group_uuid)
3622 def Exec(self, feedback_fn):
3623 """Verify integrity of cluster disks.
3625 @rtype: tuple of three items
3626 @return: a tuple of (dict of node-to-node_error, list of instances
3627 which need activate-disks, dict of instance: (node, volume) for
3632 res_instances = set()
3635 nv_dict = _MapInstanceDisksToNodes(
3636 [inst for inst in self.instances.values()
3637 if inst.admin_state == constants.ADMINST_UP])
3640 nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3641 set(self.cfg.GetVmCapableNodeList()))
3643 node_lvs = self.rpc.call_lv_list(nodes, [])
3645 for (node, node_res) in node_lvs.items():
3646 if node_res.offline:
3649 msg = node_res.fail_msg
3651 logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3652 res_nodes[node] = msg
3655 for lv_name, (_, _, lv_online) in node_res.payload.items():
3656 inst = nv_dict.pop((node, lv_name), None)
3657 if not (lv_online or inst is None):
3658 res_instances.add(inst)
3660 # any leftover items in nv_dict are missing LVs, let's arrange the data
3662 for key, inst in nv_dict.iteritems():
3663 res_missing.setdefault(inst, []).append(list(key))
3665 return (res_nodes, list(res_instances), res_missing)
3668 class LUClusterRepairDiskSizes(NoHooksLU):
3669 """Verifies the cluster disks sizes.
3674 def ExpandNames(self):
3675 if self.op.instances:
3676 self.wanted_names = _GetWantedInstances(self, self.op.instances)
3677 self.needed_locks = {
3678 locking.LEVEL_NODE_RES: [],
3679 locking.LEVEL_INSTANCE: self.wanted_names,
3681 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3683 self.wanted_names = None
3684 self.needed_locks = {
3685 locking.LEVEL_NODE_RES: locking.ALL_SET,
3686 locking.LEVEL_INSTANCE: locking.ALL_SET,
3688 self.share_locks = {
3689 locking.LEVEL_NODE_RES: 1,
3690 locking.LEVEL_INSTANCE: 0,
3693 def DeclareLocks(self, level):
3694 if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3695 self._LockInstancesNodes(primary_only=True, level=level)
3697 def CheckPrereq(self):
3698 """Check prerequisites.
3700 This only checks the optional instance list against the existing names.
3703 if self.wanted_names is None:
3704 self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3706 self.wanted_instances = \
3707 map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3709 def _EnsureChildSizes(self, disk):
3710 """Ensure children of the disk have the needed disk size.
3712 This is valid mainly for DRBD8 and fixes an issue where the
3713 children have smaller disk size.
3715 @param disk: an L{ganeti.objects.Disk} object
3718 if disk.dev_type == constants.LD_DRBD8:
3719 assert disk.children, "Empty children for DRBD8?"
3720 fchild = disk.children[0]
3721 mismatch = fchild.size < disk.size
3723 self.LogInfo("Child disk has size %d, parent %d, fixing",
3724 fchild.size, disk.size)
3725 fchild.size = disk.size
3727 # and we recurse on this child only, not on the metadev
3728 return self._EnsureChildSizes(fchild) or mismatch
3732 def Exec(self, feedback_fn):
3733 """Verify the size of cluster disks.
3736 # TODO: check child disks too
3737 # TODO: check differences in size between primary/secondary nodes
3739 for instance in self.wanted_instances:
3740 pnode = instance.primary_node
3741 if pnode not in per_node_disks:
3742 per_node_disks[pnode] = []
3743 for idx, disk in enumerate(instance.disks):
3744 per_node_disks[pnode].append((instance, idx, disk))
3746 assert not (frozenset(per_node_disks.keys()) -
3747 self.owned_locks(locking.LEVEL_NODE_RES)), \
3748 "Not owning correct locks"
3749 assert not self.owned_locks(locking.LEVEL_NODE)
3752 for node, dskl in per_node_disks.items():
3753 newl = [v[2].Copy() for v in dskl]
3755 self.cfg.SetDiskID(dsk, node)
3756 result = self.rpc.call_blockdev_getsize(node, newl)
3758 self.LogWarning("Failure in blockdev_getsize call to node"
3759 " %s, ignoring", node)
3761 if len(result.payload) != len(dskl):
3762 logging.warning("Invalid result from node %s: len(dksl)=%d,"
3763 " result.payload=%s", node, len(dskl), result.payload)
3764 self.LogWarning("Invalid result from node %s, ignoring node results",
3767 for ((instance, idx, disk), size) in zip(dskl, result.payload):
3769 self.LogWarning("Disk %d of instance %s did not return size"
3770 " information, ignoring", idx, instance.name)
3772 if not isinstance(size, (int, long)):
3773 self.LogWarning("Disk %d of instance %s did not return valid"
3774 " size information, ignoring", idx, instance.name)
3777 if size != disk.size:
3778 self.LogInfo("Disk %d of instance %s has mismatched size,"
3779 " correcting: recorded %d, actual %d", idx,
3780 instance.name, disk.size, size)
3782 self.cfg.Update(instance, feedback_fn)
3783 changed.append((instance.name, idx, size))
3784 if self._EnsureChildSizes(disk):
3785 self.cfg.Update(instance, feedback_fn)
3786 changed.append((instance.name, idx, disk.size))
3790 class LUClusterRename(LogicalUnit):
3791 """Rename the cluster.
3794 HPATH = "cluster-rename"
3795 HTYPE = constants.HTYPE_CLUSTER
3797 def BuildHooksEnv(self):
3802 "OP_TARGET": self.cfg.GetClusterName(),
3803 "NEW_NAME": self.op.name,
3806 def BuildHooksNodes(self):
3807 """Build hooks nodes.
3810 return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3812 def CheckPrereq(self):
3813 """Verify that the passed name is a valid one.
3816 hostname = netutils.GetHostname(name=self.op.name,
3817 family=self.cfg.GetPrimaryIPFamily())
3819 new_name = hostname.name
3820 self.ip = new_ip = hostname.ip
3821 old_name = self.cfg.GetClusterName()
3822 old_ip = self.cfg.GetMasterIP()
3823 if new_name == old_name and new_ip == old_ip:
3824 raise errors.OpPrereqError("Neither the name nor the IP address of the"
3825 " cluster has changed",
3827 if new_ip != old_ip:
3828 if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3829 raise errors.OpPrereqError("The given cluster IP address (%s) is"
3830 " reachable on the network" %
3831 new_ip, errors.ECODE_NOTUNIQUE)
3833 self.op.name = new_name
3835 def Exec(self, feedback_fn):
3836 """Rename the cluster.
3839 clustername = self.op.name
3842 # shutdown the master IP
3843 master_params = self.cfg.GetMasterNetworkParameters()
3844 ems = self.cfg.GetUseExternalMipScript()
3845 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3847 result.Raise("Could not disable the master role")
3850 cluster = self.cfg.GetClusterInfo()
3851 cluster.cluster_name = clustername
3852 cluster.master_ip = new_ip
3853 self.cfg.Update(cluster, feedback_fn)
3855 # update the known hosts file
3856 ssh.WriteKnownHostsFile(self.cfg, pathutils.SSH_KNOWN_HOSTS_FILE)
3857 node_list = self.cfg.GetOnlineNodeList()
3859 node_list.remove(master_params.name)
3862 _UploadHelper(self, node_list, pathutils.SSH_KNOWN_HOSTS_FILE)
3864 master_params.ip = new_ip
3865 result = self.rpc.call_node_activate_master_ip(master_params.name,
3867 msg = result.fail_msg
3869 self.LogWarning("Could not re-enable the master role on"
3870 " the master, please restart manually: %s", msg)
3875 def _ValidateNetmask(cfg, netmask):
3876 """Checks if a netmask is valid.
3878 @type cfg: L{config.ConfigWriter}
3879 @param cfg: The cluster configuration
3881 @param netmask: the netmask to be verified
3882 @raise errors.OpPrereqError: if the validation fails
3885 ip_family = cfg.GetPrimaryIPFamily()
3887 ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3888 except errors.ProgrammerError:
3889 raise errors.OpPrereqError("Invalid primary ip family: %s." %
3890 ip_family, errors.ECODE_INVAL)
3891 if not ipcls.ValidateNetmask(netmask):
3892 raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3893 (netmask), errors.ECODE_INVAL)
3896 class LUClusterSetParams(LogicalUnit):
3897 """Change the parameters of the cluster.
3900 HPATH = "cluster-modify"
3901 HTYPE = constants.HTYPE_CLUSTER
3904 def CheckArguments(self):
3908 if self.op.uid_pool:
3909 uidpool.CheckUidPool(self.op.uid_pool)
3911 if self.op.add_uids:
3912 uidpool.CheckUidPool(self.op.add_uids)
3914 if self.op.remove_uids:
3915 uidpool.CheckUidPool(self.op.remove_uids)
3917 if self.op.master_netmask is not None:
3918 _ValidateNetmask(self.cfg, self.op.master_netmask)
3920 if self.op.diskparams:
3921 for dt_params in self.op.diskparams.values():
3922 utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3924 utils.VerifyDictOptions(self.op.diskparams, constants.DISK_DT_DEFAULTS)
3925 except errors.OpPrereqError, err:
3926 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
3929 def ExpandNames(self):
3930 # FIXME: in the future maybe other cluster params won't require checking on
3931 # all nodes to be modified.
3932 self.needed_locks = {
3933 locking.LEVEL_NODE: locking.ALL_SET,
3934 locking.LEVEL_INSTANCE: locking.ALL_SET,
3935 locking.LEVEL_NODEGROUP: locking.ALL_SET,
3937 self.share_locks = {
3938 locking.LEVEL_NODE: 1,
3939 locking.LEVEL_INSTANCE: 1,
3940 locking.LEVEL_NODEGROUP: 1,
3943 def BuildHooksEnv(self):
3948 "OP_TARGET": self.cfg.GetClusterName(),
3949 "NEW_VG_NAME": self.op.vg_name,
3952 def BuildHooksNodes(self):
3953 """Build hooks nodes.
3956 mn = self.cfg.GetMasterNode()
3959 def CheckPrereq(self):
3960 """Check prerequisites.
3962 This checks whether the given params don't conflict and
3963 if the given volume group is valid.
3966 if self.op.vg_name is not None and not self.op.vg_name:
3967 if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3968 raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3969 " instances exist", errors.ECODE_INVAL)
3971 if self.op.drbd_helper is not None and not self.op.drbd_helper:
3972 if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3973 raise errors.OpPrereqError("Cannot disable drbd helper while"
3974 " drbd-based instances exist",
3977 node_list = self.owned_locks(locking.LEVEL_NODE)
3979 # if vg_name not None, checks given volume group on all nodes
3981 vglist = self.rpc.call_vg_list(node_list)
3982 for node in node_list:
3983 msg = vglist[node].fail_msg
3985 # ignoring down node
3986 self.LogWarning("Error while gathering data on node %s"
3987 " (ignoring node): %s", node, msg)
3989 vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3991 constants.MIN_VG_SIZE)
3993 raise errors.OpPrereqError("Error on node '%s': %s" %
3994 (node, vgstatus), errors.ECODE_ENVIRON)
3996 if self.op.drbd_helper:
3997 # checks given drbd helper on all nodes
3998 helpers = self.rpc.call_drbd_helper(node_list)
3999 for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
4001 self.LogInfo("Not checking drbd helper on offline node %s", node)
4003 msg = helpers[node].fail_msg
4005 raise errors.OpPrereqError("Error checking drbd helper on node"
4006 " '%s': %s" % (node, msg),
4007 errors.ECODE_ENVIRON)
4008 node_helper = helpers[node].payload
4009 if node_helper != self.op.drbd_helper:
4010 raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
4011 (node, node_helper), errors.ECODE_ENVIRON)
4013 self.cluster = cluster = self.cfg.GetClusterInfo()
4014 # validate params changes
4015 if self.op.beparams:
4016 objects.UpgradeBeParams(self.op.beparams)
4017 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4018 self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
4020 if self.op.ndparams:
4021 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4022 self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
4024 # TODO: we need a more general way to handle resetting
4025 # cluster-level parameters to default values
4026 if self.new_ndparams["oob_program"] == "":
4027 self.new_ndparams["oob_program"] = \
4028 constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
4030 if self.op.hv_state:
4031 new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
4032 self.cluster.hv_state_static)
4033 self.new_hv_state = dict((hv, cluster.SimpleFillHvState(values))
4034 for hv, values in new_hv_state.items())
4036 if self.op.disk_state:
4037 new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state,
4038 self.cluster.disk_state_static)
4039 self.new_disk_state = \
4040 dict((storage, dict((name, cluster.SimpleFillDiskState(values))
4041 for name, values in svalues.items()))
4042 for storage, svalues in new_disk_state.items())
4045 self.new_ipolicy = _GetUpdatedIPolicy(cluster.ipolicy, self.op.ipolicy,
4048 all_instances = self.cfg.GetAllInstancesInfo().values()
4050 for group in self.cfg.GetAllNodeGroupsInfo().values():
4051 instances = frozenset([inst for inst in all_instances
4052 if compat.any(node in group.members
4053 for node in inst.all_nodes)])
4054 new_ipolicy = objects.FillIPolicy(self.new_ipolicy, group.ipolicy)
4055 ipol = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group)
4056 new = _ComputeNewInstanceViolations(ipol,
4057 new_ipolicy, instances)
4059 violations.update(new)
4062 self.LogWarning("After the ipolicy change the following instances"
4063 " violate them: %s",
4064 utils.CommaJoin(utils.NiceSort(violations)))
4066 if self.op.nicparams:
4067 utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
4068 self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
4069 objects.NIC.CheckParameterSyntax(self.new_nicparams)
4072 # check all instances for consistency
4073 for instance in self.cfg.GetAllInstancesInfo().values():
4074 for nic_idx, nic in enumerate(instance.nics):
4075 params_copy = copy.deepcopy(nic.nicparams)
4076 params_filled = objects.FillDict(self.new_nicparams, params_copy)
4078 # check parameter syntax
4080 objects.NIC.CheckParameterSyntax(params_filled)
4081 except errors.ConfigurationError, err:
4082 nic_errors.append("Instance %s, nic/%d: %s" %
4083 (instance.name, nic_idx, err))
4085 # if we're moving instances to routed, check that they have an ip
4086 target_mode = params_filled[constants.NIC_MODE]
4087 if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
4088 nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
4089 " address" % (instance.name, nic_idx))
4091 raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
4092 "\n".join(nic_errors), errors.ECODE_INVAL)
4094 # hypervisor list/parameters
4095 self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
4096 if self.op.hvparams:
4097 for hv_name, hv_dict in self.op.hvparams.items():
4098 if hv_name not in self.new_hvparams:
4099 self.new_hvparams[hv_name] = hv_dict
4101 self.new_hvparams[hv_name].update(hv_dict)
4103 # disk template parameters
4104 self.new_diskparams = objects.FillDict(cluster.diskparams, {})
4105 if self.op.diskparams:
4106 for dt_name, dt_params in self.op.diskparams.items():
4107 if dt_name not in self.op.diskparams:
4108 self.new_diskparams[dt_name] = dt_params
4110 self.new_diskparams[dt_name].update(dt_params)
4112 # os hypervisor parameters
4113 self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
4115 for os_name, hvs in self.op.os_hvp.items():
4116 if os_name not in self.new_os_hvp:
4117 self.new_os_hvp[os_name] = hvs
4119 for hv_name, hv_dict in hvs.items():
4120 if hv_name not in self.new_os_hvp[os_name]:
4121 self.new_os_hvp[os_name][hv_name] = hv_dict
4123 self.new_os_hvp[os_name][hv_name].update(hv_dict)
4126 self.new_osp = objects.FillDict(cluster.osparams, {})
4127 if self.op.osparams:
4128 for os_name, osp in self.op.osparams.items():
4129 if os_name not in self.new_osp:
4130 self.new_osp[os_name] = {}
4132 self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
4135 if not self.new_osp[os_name]:
4136 # we removed all parameters
4137 del self.new_osp[os_name]
4139 # check the parameter validity (remote check)
4140 _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
4141 os_name, self.new_osp[os_name])
4143 # changes to the hypervisor list
4144 if self.op.enabled_hypervisors is not None:
4145 self.hv_list = self.op.enabled_hypervisors
4146 for hv in self.hv_list:
4147 # if the hypervisor doesn't already exist in the cluster
4148 # hvparams, we initialize it to empty, and then (in both
4149 # cases) we make sure to fill the defaults, as we might not
4150 # have a complete defaults list if the hypervisor wasn't
4152 if hv not in new_hvp:
4154 new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
4155 utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
4157 self.hv_list = cluster.enabled_hypervisors
4159 if self.op.hvparams or self.op.enabled_hypervisors is not None:
4160 # either the enabled list has changed, or the parameters have, validate
4161 for hv_name, hv_params in self.new_hvparams.items():
4162 if ((self.op.hvparams and hv_name in self.op.hvparams) or
4163 (self.op.enabled_hypervisors and
4164 hv_name in self.op.enabled_hypervisors)):
4165 # either this is a new hypervisor, or its parameters have changed
4166 hv_class = hypervisor.GetHypervisor(hv_name)
4167 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4168 hv_class.CheckParameterSyntax(hv_params)
4169 _CheckHVParams(self, node_list, hv_name, hv_params)
4172 # no need to check any newly-enabled hypervisors, since the
4173 # defaults have already been checked in the above code-block
4174 for os_name, os_hvp in self.new_os_hvp.items():
4175 for hv_name, hv_params in os_hvp.items():
4176 utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
4177 # we need to fill in the new os_hvp on top of the actual hv_p
4178 cluster_defaults = self.new_hvparams.get(hv_name, {})
4179 new_osp = objects.FillDict(cluster_defaults, hv_params)
4180 hv_class = hypervisor.GetHypervisor(hv_name)
4181 hv_class.CheckParameterSyntax(new_osp)
4182 _CheckHVParams(self, node_list, hv_name, new_osp)
4184 if self.op.default_iallocator:
4185 alloc_script = utils.FindFile(self.op.default_iallocator,
4186 constants.IALLOCATOR_SEARCH_PATH,
4188 if alloc_script is None:
4189 raise errors.OpPrereqError("Invalid default iallocator script '%s'"
4190 " specified" % self.op.default_iallocator,
4193 def Exec(self, feedback_fn):
4194 """Change the parameters of the cluster.
4197 if self.op.vg_name is not None:
4198 new_volume = self.op.vg_name
4201 if new_volume != self.cfg.GetVGName():
4202 self.cfg.SetVGName(new_volume)
4204 feedback_fn("Cluster LVM configuration already in desired"
4205 " state, not changing")
4206 if self.op.drbd_helper is not None:
4207 new_helper = self.op.drbd_helper
4210 if new_helper != self.cfg.GetDRBDHelper():
4211 self.cfg.SetDRBDHelper(new_helper)
4213 feedback_fn("Cluster DRBD helper already in desired state,"
4215 if self.op.hvparams:
4216 self.cluster.hvparams = self.new_hvparams
4218 self.cluster.os_hvp = self.new_os_hvp
4219 if self.op.enabled_hypervisors is not None:
4220 self.cluster.hvparams = self.new_hvparams
4221 self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
4222 if self.op.beparams:
4223 self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
4224 if self.op.nicparams:
4225 self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
4227 self.cluster.ipolicy = self.new_ipolicy
4228 if self.op.osparams:
4229 self.cluster.osparams = self.new_osp
4230 if self.op.ndparams:
4231 self.cluster.ndparams = self.new_ndparams
4232 if self.op.diskparams:
4233 self.cluster.diskparams = self.new_diskparams
4234 if self.op.hv_state:
4235 self.cluster.hv_state_static = self.new_hv_state
4236 if self.op.disk_state:
4237 self.cluster.disk_state_static = self.new_disk_state
4239 if self.op.candidate_pool_size is not None:
4240 self.cluster.candidate_pool_size = self.op.candidate_pool_size
4241 # we need to update the pool size here, otherwise the save will fail
4242 _AdjustCandidatePool(self, [])
4244 if self.op.maintain_node_health is not None:
4245 if self.op.maintain_node_health and not constants.ENABLE_CONFD:
4246 feedback_fn("Note: CONFD was disabled at build time, node health"
4247 " maintenance is not useful (still enabling it)")
4248 self.cluster.maintain_node_health = self.op.maintain_node_health
4250 if self.op.prealloc_wipe_disks is not None:
4251 self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
4253 if self.op.add_uids is not None:
4254 uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
4256 if self.op.remove_uids is not None:
4257 uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
4259 if self.op.uid_pool is not None:
4260 self.cluster.uid_pool = self.op.uid_pool
4262 if self.op.default_iallocator is not None:
4263 self.cluster.default_iallocator = self.op.default_iallocator
4265 if self.op.reserved_lvs is not None:
4266 self.cluster.reserved_lvs = self.op.reserved_lvs
4268 if self.op.use_external_mip_script is not None:
4269 self.cluster.use_external_mip_script = self.op.use_external_mip_script
4271 def helper_os(aname, mods, desc):
4273 lst = getattr(self.cluster, aname)
4274 for key, val in mods:
4275 if key == constants.DDM_ADD:
4277 feedback_fn("OS %s already in %s, ignoring" % (val, desc))
4280 elif key == constants.DDM_REMOVE:
4284 feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
4286 raise errors.ProgrammerError("Invalid modification '%s'" % key)
4288 if self.op.hidden_os:
4289 helper_os("hidden_os", self.op.hidden_os, "hidden")
4291 if self.op.blacklisted_os:
4292 helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
4294 if self.op.master_netdev:
4295 master_params = self.cfg.GetMasterNetworkParameters()
4296 ems = self.cfg.GetUseExternalMipScript()
4297 feedback_fn("Shutting down master ip on the current netdev (%s)" %
4298 self.cluster.master_netdev)
4299 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4301 result.Raise("Could not disable the master ip")
4302 feedback_fn("Changing master_netdev from %s to %s" %
4303 (master_params.netdev, self.op.master_netdev))
4304 self.cluster.master_netdev = self.op.master_netdev
4306 if self.op.master_netmask:
4307 master_params = self.cfg.GetMasterNetworkParameters()
4308 feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
4309 result = self.rpc.call_node_change_master_netmask(master_params.name,
4310 master_params.netmask,
4311 self.op.master_netmask,
4313 master_params.netdev)
4315 msg = "Could not change the master IP netmask: %s" % result.fail_msg
4318 self.cluster.master_netmask = self.op.master_netmask
4320 self.cfg.Update(self.cluster, feedback_fn)
4322 if self.op.master_netdev:
4323 master_params = self.cfg.GetMasterNetworkParameters()
4324 feedback_fn("Starting the master ip on the new master netdev (%s)" %
4325 self.op.master_netdev)
4326 ems = self.cfg.GetUseExternalMipScript()
4327 result = self.rpc.call_node_activate_master_ip(master_params.name,
4330 self.LogWarning("Could not re-enable the master ip on"
4331 " the master, please restart manually: %s",
4335 def _UploadHelper(lu, nodes, fname):
4336 """Helper for uploading a file and showing warnings.
4339 if os.path.exists(fname):
4340 result = lu.rpc.call_upload_file(nodes, fname)
4341 for to_node, to_result in result.items():
4342 msg = to_result.fail_msg
4344 msg = ("Copy of file %s to node %s failed: %s" %
4345 (fname, to_node, msg))
4346 lu.proc.LogWarning(msg)
4349 def _ComputeAncillaryFiles(cluster, redist):
4350 """Compute files external to Ganeti which need to be consistent.
4352 @type redist: boolean
4353 @param redist: Whether to include files which need to be redistributed
4356 # Compute files for all nodes
4358 pathutils.SSH_KNOWN_HOSTS_FILE,
4359 pathutils.CONFD_HMAC_KEY,
4360 pathutils.CLUSTER_DOMAIN_SECRET_FILE,
4361 pathutils.SPICE_CERT_FILE,
4362 pathutils.SPICE_CACERT_FILE,
4363 pathutils.RAPI_USERS_FILE,
4367 # we need to ship at least the RAPI certificate
4368 files_all.add(pathutils.RAPI_CERT_FILE)
4370 files_all.update(pathutils.ALL_CERT_FILES)
4371 files_all.update(ssconf.SimpleStore().GetFileList())
4373 if cluster.modify_etc_hosts:
4374 files_all.add(pathutils.ETC_HOSTS)
4376 if cluster.use_external_mip_script:
4377 files_all.add(pathutils.EXTERNAL_MASTER_SETUP_SCRIPT)
4379 # Files which are optional, these must:
4380 # - be present in one other category as well
4381 # - either exist or not exist on all nodes of that category (mc, vm all)
4383 pathutils.RAPI_USERS_FILE,
4386 # Files which should only be on master candidates
4390 files_mc.add(pathutils.CLUSTER_CONF_FILE)
4394 (constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE)):
4395 files_all.add(pathutils.FILE_STORAGE_PATHS_FILE)
4396 files_opt.add(pathutils.FILE_STORAGE_PATHS_FILE)
4398 # Files which should only be on VM-capable nodes
4401 for hv_name in cluster.enabled_hypervisors
4402 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
4406 for hv_name in cluster.enabled_hypervisors
4407 for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
4409 # Filenames in each category must be unique
4410 all_files_set = files_all | files_mc | files_vm
4411 assert (len(all_files_set) ==
4412 sum(map(len, [files_all, files_mc, files_vm]))), \
4413 "Found file listed in more than one file list"
4415 # Optional files must be present in one other category
4416 assert all_files_set.issuperset(files_opt), \
4417 "Optional file not in a different required list"
4419 # This one file should never ever be re-distributed via RPC
4420 assert not (redist and
4421 pathutils.FILE_STORAGE_PATHS_FILE in all_files_set)
4423 return (files_all, files_opt, files_mc, files_vm)
4426 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
4427 """Distribute additional files which are part of the cluster configuration.
4429 ConfigWriter takes care of distributing the config and ssconf files, but
4430 there are more files which should be distributed to all nodes. This function
4431 makes sure those are copied.
4433 @param lu: calling logical unit
4434 @param additional_nodes: list of nodes not in the config to distribute to
4435 @type additional_vm: boolean
4436 @param additional_vm: whether the additional nodes are vm-capable or not
4439 # Gather target nodes
4440 cluster = lu.cfg.GetClusterInfo()
4441 master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
4443 online_nodes = lu.cfg.GetOnlineNodeList()
4444 online_set = frozenset(online_nodes)
4445 vm_nodes = list(online_set.intersection(lu.cfg.GetVmCapableNodeList()))
4447 if additional_nodes is not None:
4448 online_nodes.extend(additional_nodes)
4450 vm_nodes.extend(additional_nodes)
4452 # Never distribute to master node
4453 for nodelist in [online_nodes, vm_nodes]:
4454 if master_info.name in nodelist:
4455 nodelist.remove(master_info.name)
4458 (files_all, _, files_mc, files_vm) = \
4459 _ComputeAncillaryFiles(cluster, True)
4461 # Never re-distribute configuration file from here
4462 assert not (pathutils.CLUSTER_CONF_FILE in files_all or
4463 pathutils.CLUSTER_CONF_FILE in files_vm)
4464 assert not files_mc, "Master candidates not handled in this function"
4467 (online_nodes, files_all),
4468 (vm_nodes, files_vm),
4472 for (node_list, files) in filemap:
4474 _UploadHelper(lu, node_list, fname)
4477 class LUClusterRedistConf(NoHooksLU):
4478 """Force the redistribution of cluster configuration.
4480 This is a very simple LU.
4485 def ExpandNames(self):
4486 self.needed_locks = {
4487 locking.LEVEL_NODE: locking.ALL_SET,
4489 self.share_locks[locking.LEVEL_NODE] = 1
4491 def Exec(self, feedback_fn):
4492 """Redistribute the configuration.
4495 self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4496 _RedistributeAncillaryFiles(self)
4499 class LUClusterActivateMasterIp(NoHooksLU):
4500 """Activate the master IP on the master node.
4503 def Exec(self, feedback_fn):
4504 """Activate the master IP.
4507 master_params = self.cfg.GetMasterNetworkParameters()
4508 ems = self.cfg.GetUseExternalMipScript()
4509 result = self.rpc.call_node_activate_master_ip(master_params.name,
4511 result.Raise("Could not activate the master IP")
4514 class LUClusterDeactivateMasterIp(NoHooksLU):
4515 """Deactivate the master IP on the master node.
4518 def Exec(self, feedback_fn):
4519 """Deactivate the master IP.
4522 master_params = self.cfg.GetMasterNetworkParameters()
4523 ems = self.cfg.GetUseExternalMipScript()
4524 result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4526 result.Raise("Could not deactivate the master IP")
4529 def _WaitForSync(lu, instance, disks=None, oneshot=False):
4530 """Sleep and poll for an instance's disk to sync.
4533 if not instance.disks or disks is not None and not disks:
4536 disks = _ExpandCheckDisks(instance, disks)
4539 lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4541 node = instance.primary_node
4544 lu.cfg.SetDiskID(dev, node)
4546 # TODO: Convert to utils.Retry
4549 degr_retries = 10 # in seconds, as we sleep 1 second each time
4553 cumul_degraded = False
4554 rstats = lu.rpc.call_blockdev_getmirrorstatus(node, (disks, instance))
4555 msg = rstats.fail_msg
4557 lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4560 raise errors.RemoteError("Can't contact node %s for mirror data,"
4561 " aborting." % node)
4564 rstats = rstats.payload
4566 for i, mstat in enumerate(rstats):
4568 lu.LogWarning("Can't compute data for node %s/%s",
4569 node, disks[i].iv_name)
4572 cumul_degraded = (cumul_degraded or
4573 (mstat.is_degraded and mstat.sync_percent is None))
4574 if mstat.sync_percent is not None:
4576 if mstat.estimated_time is not None:
4577 rem_time = ("%s remaining (estimated)" %
4578 utils.FormatSeconds(mstat.estimated_time))
4579 max_time = mstat.estimated_time
4581 rem_time = "no time estimate"
4582 lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4583 (disks[i].iv_name, mstat.sync_percent, rem_time))
4585 # if we're done but degraded, let's do a few small retries, to
4586 # make sure we see a stable and not transient situation; therefore
4587 # we force restart of the loop
4588 if (done or oneshot) and cumul_degraded and degr_retries > 0:
4589 logging.info("Degraded disks found, %d retries left", degr_retries)
4597 time.sleep(min(60, max_time))
4600 lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4601 return not cumul_degraded
4604 def _BlockdevFind(lu, node, dev, instance):
4605 """Wrapper around call_blockdev_find to annotate diskparams.
4607 @param lu: A reference to the lu object
4608 @param node: The node to call out
4609 @param dev: The device to find
4610 @param instance: The instance object the device belongs to
4611 @returns The result of the rpc call
4614 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4615 return lu.rpc.call_blockdev_find(node, disk)
4618 def _CheckDiskConsistency(lu, instance, dev, node, on_primary, ldisk=False):
4619 """Wrapper around L{_CheckDiskConsistencyInner}.
4622 (disk,) = _AnnotateDiskParams(instance, [dev], lu.cfg)
4623 return _CheckDiskConsistencyInner(lu, instance, disk, node, on_primary,
4627 def _CheckDiskConsistencyInner(lu, instance, dev, node, on_primary,
4629 """Check that mirrors are not degraded.
4631 @attention: The device has to be annotated already.
4633 The ldisk parameter, if True, will change the test from the
4634 is_degraded attribute (which represents overall non-ok status for
4635 the device(s)) to the ldisk (representing the local storage status).
4638 lu.cfg.SetDiskID(dev, node)
4642 if on_primary or dev.AssembleOnSecondary():
4643 rstats = lu.rpc.call_blockdev_find(node, dev)
4644 msg = rstats.fail_msg
4646 lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4648 elif not rstats.payload:
4649 lu.LogWarning("Can't find disk on node %s", node)
4653 result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4655 result = result and not rstats.payload.is_degraded
4658 for child in dev.children:
4659 result = result and _CheckDiskConsistencyInner(lu, instance, child, node,
4665 class LUOobCommand(NoHooksLU):
4666 """Logical unit for OOB handling.
4670 _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4672 def ExpandNames(self):
4673 """Gather locks we need.
4676 if self.op.node_names:
4677 self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4678 lock_names = self.op.node_names
4680 lock_names = locking.ALL_SET
4682 self.needed_locks = {
4683 locking.LEVEL_NODE: lock_names,
4686 def CheckPrereq(self):
4687 """Check prerequisites.
4690 - the node exists in the configuration
4693 Any errors are signaled by raising errors.OpPrereqError.
4697 self.master_node = self.cfg.GetMasterNode()
4699 assert self.op.power_delay >= 0.0
4701 if self.op.node_names:
4702 if (self.op.command in self._SKIP_MASTER and
4703 self.master_node in self.op.node_names):
4704 master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4705 master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4707 if master_oob_handler:
4708 additional_text = ("run '%s %s %s' if you want to operate on the"
4709 " master regardless") % (master_oob_handler,
4713 additional_text = "it does not support out-of-band operations"
4715 raise errors.OpPrereqError(("Operating on the master node %s is not"
4716 " allowed for %s; %s") %
4717 (self.master_node, self.op.command,
4718 additional_text), errors.ECODE_INVAL)
4720 self.op.node_names = self.cfg.GetNodeList()
4721 if self.op.command in self._SKIP_MASTER:
4722 self.op.node_names.remove(self.master_node)
4724 if self.op.command in self._SKIP_MASTER:
4725 assert self.master_node not in self.op.node_names
4727 for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4729 raise errors.OpPrereqError("Node %s not found" % node_name,
4732 self.nodes.append(node)
4734 if (not self.op.ignore_status and
4735 (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4736 raise errors.OpPrereqError(("Cannot power off node %s because it is"
4737 " not marked offline") % node_name,
4740 def Exec(self, feedback_fn):
4741 """Execute OOB and return result if we expect any.
4744 master_node = self.master_node
4747 for idx, node in enumerate(utils.NiceSort(self.nodes,
4748 key=lambda node: node.name)):
4749 node_entry = [(constants.RS_NORMAL, node.name)]
4750 ret.append(node_entry)
4752 oob_program = _SupportsOob(self.cfg, node)
4755 node_entry.append((constants.RS_UNAVAIL, None))
4758 logging.info("Executing out-of-band command '%s' using '%s' on %s",
4759 self.op.command, oob_program, node.name)
4760 result = self.rpc.call_run_oob(master_node, oob_program,
4761 self.op.command, node.name,
4765 self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4766 node.name, result.fail_msg)
4767 node_entry.append((constants.RS_NODATA, None))
4770 self._CheckPayload(result)
4771 except errors.OpExecError, err:
4772 self.LogWarning("Payload returned by node '%s' is not valid: %s",
4774 node_entry.append((constants.RS_NODATA, None))
4776 if self.op.command == constants.OOB_HEALTH:
4777 # For health we should log important events
4778 for item, status in result.payload:
4779 if status in [constants.OOB_STATUS_WARNING,
4780 constants.OOB_STATUS_CRITICAL]:
4781 self.LogWarning("Item '%s' on node '%s' has status '%s'",
4782 item, node.name, status)
4784 if self.op.command == constants.OOB_POWER_ON:
4786 elif self.op.command == constants.OOB_POWER_OFF:
4787 node.powered = False
4788 elif self.op.command == constants.OOB_POWER_STATUS:
4789 powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4790 if powered != node.powered:
4791 logging.warning(("Recorded power state (%s) of node '%s' does not"
4792 " match actual power state (%s)"), node.powered,
4795 # For configuration changing commands we should update the node
4796 if self.op.command in (constants.OOB_POWER_ON,
4797 constants.OOB_POWER_OFF):
4798 self.cfg.Update(node, feedback_fn)
4800 node_entry.append((constants.RS_NORMAL, result.payload))
4802 if (self.op.command == constants.OOB_POWER_ON and
4803 idx < len(self.nodes) - 1):
4804 time.sleep(self.op.power_delay)
4808 def _CheckPayload(self, result):
4809 """Checks if the payload is valid.
4811 @param result: RPC result
4812 @raises errors.OpExecError: If payload is not valid
4816 if self.op.command == constants.OOB_HEALTH:
4817 if not isinstance(result.payload, list):
4818 errs.append("command 'health' is expected to return a list but got %s" %
4819 type(result.payload))
4821 for item, status in result.payload:
4822 if status not in constants.OOB_STATUSES:
4823 errs.append("health item '%s' has invalid status '%s'" %
4826 if self.op.command == constants.OOB_POWER_STATUS:
4827 if not isinstance(result.payload, dict):
4828 errs.append("power-status is expected to return a dict but got %s" %
4829 type(result.payload))
4831 if self.op.command in [
4832 constants.OOB_POWER_ON,
4833 constants.OOB_POWER_OFF,
4834 constants.OOB_POWER_CYCLE,
4836 if result.payload is not None:
4837 errs.append("%s is expected to not return payload but got '%s'" %
4838 (self.op.command, result.payload))
4841 raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4842 utils.CommaJoin(errs))
4845 class _OsQuery(_QueryBase):
4846 FIELDS = query.OS_FIELDS
4848 def ExpandNames(self, lu):
4849 # Lock all nodes in shared mode
4850 # Temporary removal of locks, should be reverted later
4851 # TODO: reintroduce locks when they are lighter-weight
4852 lu.needed_locks = {}
4853 #self.share_locks[locking.LEVEL_NODE] = 1
4854 #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4856 # The following variables interact with _QueryBase._GetNames
4858 self.wanted = self.names
4860 self.wanted = locking.ALL_SET
4862 self.do_locking = self.use_locking
4864 def DeclareLocks(self, lu, level):
4868 def _DiagnoseByOS(rlist):
4869 """Remaps a per-node return list into an a per-os per-node dictionary
4871 @param rlist: a map with node names as keys and OS objects as values
4874 @return: a dictionary with osnames as keys and as value another
4875 map, with nodes as keys and tuples of (path, status, diagnose,
4876 variants, parameters, api_versions) as values, eg::
4878 {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4879 (/srv/..., False, "invalid api")],
4880 "node2": [(/srv/..., True, "", [], [])]}
4885 # we build here the list of nodes that didn't fail the RPC (at RPC
4886 # level), so that nodes with a non-responding node daemon don't
4887 # make all OSes invalid
4888 good_nodes = [node_name for node_name in rlist
4889 if not rlist[node_name].fail_msg]
4890 for node_name, nr in rlist.items():
4891 if nr.fail_msg or not nr.payload:
4893 for (name, path, status, diagnose, variants,
4894 params, api_versions) in nr.payload:
4895 if name not in all_os:
4896 # build a list of nodes for this os containing empty lists
4897 # for each node in node_list
4899 for nname in good_nodes:
4900 all_os[name][nname] = []
4901 # convert params from [name, help] to (name, help)
4902 params = [tuple(v) for v in params]
4903 all_os[name][node_name].append((path, status, diagnose,
4904 variants, params, api_versions))
4907 def _GetQueryData(self, lu):
4908 """Computes the list of nodes and their attributes.
4911 # Locking is not used
4912 assert not (compat.any(lu.glm.is_owned(level)
4913 for level in locking.LEVELS
4914 if level != locking.LEVEL_CLUSTER) or
4915 self.do_locking or self.use_locking)
4917 valid_nodes = [node.name
4918 for node in lu.cfg.GetAllNodesInfo().values()
4919 if not node.offline and node.vm_capable]
4920 pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4921 cluster = lu.cfg.GetClusterInfo()
4925 for (os_name, os_data) in pol.items():
4926 info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4927 hidden=(os_name in cluster.hidden_os),
4928 blacklisted=(os_name in cluster.blacklisted_os))
4932 api_versions = set()
4934 for idx, osl in enumerate(os_data.values()):
4935 info.valid = bool(info.valid and osl and osl[0][1])
4939 (node_variants, node_params, node_api) = osl[0][3:6]
4942 variants.update(node_variants)
4943 parameters.update(node_params)
4944 api_versions.update(node_api)
4946 # Filter out inconsistent values
4947 variants.intersection_update(node_variants)
4948 parameters.intersection_update(node_params)
4949 api_versions.intersection_update(node_api)
4951 info.variants = list(variants)
4952 info.parameters = list(parameters)
4953 info.api_versions = list(api_versions)
4955 data[os_name] = info
4957 # Prepare data in requested order
4958 return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4962 class LUOsDiagnose(NoHooksLU):
4963 """Logical unit for OS diagnose/query.
4969 def _BuildFilter(fields, names):
4970 """Builds a filter for querying OSes.
4973 name_filter = qlang.MakeSimpleFilter("name", names)
4975 # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4976 # respective field is not requested
4977 status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4978 for fname in ["hidden", "blacklisted"]
4979 if fname not in fields]
4980 if "valid" not in fields:
4981 status_filter.append([qlang.OP_TRUE, "valid"])
4984 status_filter.insert(0, qlang.OP_AND)
4986 status_filter = None
4988 if name_filter and status_filter:
4989 return [qlang.OP_AND, name_filter, status_filter]
4993 return status_filter
4995 def CheckArguments(self):
4996 self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4997 self.op.output_fields, False)
4999 def ExpandNames(self):
5000 self.oq.ExpandNames(self)
5002 def Exec(self, feedback_fn):
5003 return self.oq.OldStyleQuery(self)
5006 class LUNodeRemove(LogicalUnit):
5007 """Logical unit for removing a node.
5010 HPATH = "node-remove"
5011 HTYPE = constants.HTYPE_NODE
5013 def BuildHooksEnv(self):
5018 "OP_TARGET": self.op.node_name,
5019 "NODE_NAME": self.op.node_name,
5022 def BuildHooksNodes(self):
5023 """Build hooks nodes.
5025 This doesn't run on the target node in the pre phase as a failed
5026 node would then be impossible to remove.
5029 all_nodes = self.cfg.GetNodeList()
5031 all_nodes.remove(self.op.node_name)
5034 return (all_nodes, all_nodes)
5036 def CheckPrereq(self):
5037 """Check prerequisites.
5040 - the node exists in the configuration
5041 - it does not have primary or secondary instances
5042 - it's not the master
5044 Any errors are signaled by raising errors.OpPrereqError.
5047 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5048 node = self.cfg.GetNodeInfo(self.op.node_name)
5049 assert node is not None
5051 masternode = self.cfg.GetMasterNode()
5052 if node.name == masternode:
5053 raise errors.OpPrereqError("Node is the master node, failover to another"
5054 " node is required", errors.ECODE_INVAL)
5056 for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
5057 if node.name in instance.all_nodes:
5058 raise errors.OpPrereqError("Instance %s is still running on the node,"
5059 " please remove first" % instance_name,
5061 self.op.node_name = node.name
5064 def Exec(self, feedback_fn):
5065 """Removes the node from the cluster.
5069 logging.info("Stopping the node daemon and removing configs from node %s",
5072 modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
5074 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5077 # Promote nodes to master candidate as needed
5078 _AdjustCandidatePool(self, exceptions=[node.name])
5079 self.context.RemoveNode(node.name)
5081 # Run post hooks on the node before it's removed
5082 _RunPostHook(self, node.name)
5084 result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
5085 msg = result.fail_msg
5087 self.LogWarning("Errors encountered on the remote node while leaving"
5088 " the cluster: %s", msg)
5090 # Remove node from our /etc/hosts
5091 if self.cfg.GetClusterInfo().modify_etc_hosts:
5092 master_node = self.cfg.GetMasterNode()
5093 result = self.rpc.call_etc_hosts_modify(master_node,
5094 constants.ETC_HOSTS_REMOVE,
5096 result.Raise("Can't update hosts file with new host data")
5097 _RedistributeAncillaryFiles(self)
5100 class _NodeQuery(_QueryBase):
5101 FIELDS = query.NODE_FIELDS
5103 def ExpandNames(self, lu):
5104 lu.needed_locks = {}
5105 lu.share_locks = _ShareAll()
5108 self.wanted = _GetWantedNodes(lu, self.names)
5110 self.wanted = locking.ALL_SET
5112 self.do_locking = (self.use_locking and
5113 query.NQ_LIVE in self.requested_data)
5116 # If any non-static field is requested we need to lock the nodes
5117 lu.needed_locks[locking.LEVEL_NODE] = self.wanted
5119 def DeclareLocks(self, lu, level):
5122 def _GetQueryData(self, lu):
5123 """Computes the list of nodes and their attributes.
5126 all_info = lu.cfg.GetAllNodesInfo()
5128 nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
5130 # Gather data as requested
5131 if query.NQ_LIVE in self.requested_data:
5132 # filter out non-vm_capable nodes
5133 toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
5135 node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
5136 [lu.cfg.GetHypervisorType()])
5137 live_data = dict((name, rpc.MakeLegacyNodeInfo(nresult.payload))
5138 for (name, nresult) in node_data.items()
5139 if not nresult.fail_msg and nresult.payload)
5143 if query.NQ_INST in self.requested_data:
5144 node_to_primary = dict([(name, set()) for name in nodenames])
5145 node_to_secondary = dict([(name, set()) for name in nodenames])
5147 inst_data = lu.cfg.GetAllInstancesInfo()
5149 for inst in inst_data.values():
5150 if inst.primary_node in node_to_primary:
5151 node_to_primary[inst.primary_node].add(inst.name)
5152 for secnode in inst.secondary_nodes:
5153 if secnode in node_to_secondary:
5154 node_to_secondary[secnode].add(inst.name)
5156 node_to_primary = None
5157 node_to_secondary = None
5159 if query.NQ_OOB in self.requested_data:
5160 oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
5161 for name, node in all_info.iteritems())
5165 if query.NQ_GROUP in self.requested_data:
5166 groups = lu.cfg.GetAllNodeGroupsInfo()
5170 return query.NodeQueryData([all_info[name] for name in nodenames],
5171 live_data, lu.cfg.GetMasterNode(),
5172 node_to_primary, node_to_secondary, groups,
5173 oob_support, lu.cfg.GetClusterInfo())
5176 class LUNodeQuery(NoHooksLU):
5177 """Logical unit for querying nodes.
5180 # pylint: disable=W0142
5183 def CheckArguments(self):
5184 self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
5185 self.op.output_fields, self.op.use_locking)
5187 def ExpandNames(self):
5188 self.nq.ExpandNames(self)
5190 def DeclareLocks(self, level):
5191 self.nq.DeclareLocks(self, level)
5193 def Exec(self, feedback_fn):
5194 return self.nq.OldStyleQuery(self)
5197 class LUNodeQueryvols(NoHooksLU):
5198 """Logical unit for getting volumes on node(s).
5202 _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
5203 _FIELDS_STATIC = utils.FieldSet("node")
5205 def CheckArguments(self):
5206 _CheckOutputFields(static=self._FIELDS_STATIC,
5207 dynamic=self._FIELDS_DYNAMIC,
5208 selected=self.op.output_fields)
5210 def ExpandNames(self):
5211 self.share_locks = _ShareAll()
5212 self.needed_locks = {}
5214 if not self.op.nodes:
5215 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5217 self.needed_locks[locking.LEVEL_NODE] = \
5218 _GetWantedNodes(self, self.op.nodes)
5220 def Exec(self, feedback_fn):
5221 """Computes the list of nodes and their attributes.
5224 nodenames = self.owned_locks(locking.LEVEL_NODE)
5225 volumes = self.rpc.call_node_volumes(nodenames)
5227 ilist = self.cfg.GetAllInstancesInfo()
5228 vol2inst = _MapInstanceDisksToNodes(ilist.values())
5231 for node in nodenames:
5232 nresult = volumes[node]
5235 msg = nresult.fail_msg
5237 self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
5240 node_vols = sorted(nresult.payload,
5241 key=operator.itemgetter("dev"))
5243 for vol in node_vols:
5245 for field in self.op.output_fields:
5248 elif field == "phys":
5252 elif field == "name":
5254 elif field == "size":
5255 val = int(float(vol["size"]))
5256 elif field == "instance":
5257 val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
5259 raise errors.ParameterError(field)
5260 node_output.append(str(val))
5262 output.append(node_output)
5267 class LUNodeQueryStorage(NoHooksLU):
5268 """Logical unit for getting information on storage units on node(s).
5271 _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
5274 def CheckArguments(self):
5275 _CheckOutputFields(static=self._FIELDS_STATIC,
5276 dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
5277 selected=self.op.output_fields)
5279 def ExpandNames(self):
5280 self.share_locks = _ShareAll()
5281 self.needed_locks = {}
5284 self.needed_locks[locking.LEVEL_NODE] = \
5285 _GetWantedNodes(self, self.op.nodes)
5287 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5289 def Exec(self, feedback_fn):
5290 """Computes the list of nodes and their attributes.
5293 self.nodes = self.owned_locks(locking.LEVEL_NODE)
5295 # Always get name to sort by
5296 if constants.SF_NAME in self.op.output_fields:
5297 fields = self.op.output_fields[:]
5299 fields = [constants.SF_NAME] + self.op.output_fields
5301 # Never ask for node or type as it's only known to the LU
5302 for extra in [constants.SF_NODE, constants.SF_TYPE]:
5303 while extra in fields:
5304 fields.remove(extra)
5306 field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
5307 name_idx = field_idx[constants.SF_NAME]
5309 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5310 data = self.rpc.call_storage_list(self.nodes,
5311 self.op.storage_type, st_args,
5312 self.op.name, fields)
5316 for node in utils.NiceSort(self.nodes):
5317 nresult = data[node]
5321 msg = nresult.fail_msg
5323 self.LogWarning("Can't get storage data from node %s: %s", node, msg)
5326 rows = dict([(row[name_idx], row) for row in nresult.payload])
5328 for name in utils.NiceSort(rows.keys()):
5333 for field in self.op.output_fields:
5334 if field == constants.SF_NODE:
5336 elif field == constants.SF_TYPE:
5337 val = self.op.storage_type
5338 elif field in field_idx:
5339 val = row[field_idx[field]]
5341 raise errors.ParameterError(field)
5350 class _InstanceQuery(_QueryBase):
5351 FIELDS = query.INSTANCE_FIELDS
5353 def ExpandNames(self, lu):
5354 lu.needed_locks = {}
5355 lu.share_locks = _ShareAll()
5358 self.wanted = _GetWantedInstances(lu, self.names)
5360 self.wanted = locking.ALL_SET
5362 self.do_locking = (self.use_locking and
5363 query.IQ_LIVE in self.requested_data)
5365 lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5366 lu.needed_locks[locking.LEVEL_NODEGROUP] = []
5367 lu.needed_locks[locking.LEVEL_NODE] = []
5368 lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5370 self.do_grouplocks = (self.do_locking and
5371 query.IQ_NODES in self.requested_data)
5373 def DeclareLocks(self, lu, level):
5375 if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
5376 assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
5378 # Lock all groups used by instances optimistically; this requires going
5379 # via the node before it's locked, requiring verification later on
5380 lu.needed_locks[locking.LEVEL_NODEGROUP] = \
5382 for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
5383 for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
5384 elif level == locking.LEVEL_NODE:
5385 lu._LockInstancesNodes() # pylint: disable=W0212
5388 def _CheckGroupLocks(lu):
5389 owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
5390 owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
5392 # Check if node groups for locked instances are still correct
5393 for instance_name in owned_instances:
5394 _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
5396 def _GetQueryData(self, lu):
5397 """Computes the list of instances and their attributes.
5400 if self.do_grouplocks:
5401 self._CheckGroupLocks(lu)
5403 cluster = lu.cfg.GetClusterInfo()
5404 all_info = lu.cfg.GetAllInstancesInfo()
5406 instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
5408 instance_list = [all_info[name] for name in instance_names]
5409 nodes = frozenset(itertools.chain(*(inst.all_nodes
5410 for inst in instance_list)))
5411 hv_list = list(set([inst.hypervisor for inst in instance_list]))
5414 wrongnode_inst = set()
5416 # Gather data as requested
5417 if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
5419 node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
5421 result = node_data[name]
5423 # offline nodes will be in both lists
5424 assert result.fail_msg
5425 offline_nodes.append(name)
5427 bad_nodes.append(name)
5428 elif result.payload:
5429 for inst in result.payload:
5430 if inst in all_info:
5431 if all_info[inst].primary_node == name:
5432 live_data.update(result.payload)
5434 wrongnode_inst.add(inst)
5436 # orphan instance; we don't list it here as we don't
5437 # handle this case yet in the output of instance listing
5438 logging.warning("Orphan instance '%s' found on node %s",
5440 # else no instance is alive
5444 if query.IQ_DISKUSAGE in self.requested_data:
5445 gmi = ganeti.masterd.instance
5446 disk_usage = dict((inst.name,
5447 gmi.ComputeDiskSize(inst.disk_template,
5448 [{constants.IDISK_SIZE: disk.size}
5449 for disk in inst.disks]))
5450 for inst in instance_list)
5454 if query.IQ_CONSOLE in self.requested_data:
5456 for inst in instance_list:
5457 if inst.name in live_data:
5458 # Instance is running
5459 consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
5461 consinfo[inst.name] = None
5462 assert set(consinfo.keys()) == set(instance_names)
5466 if query.IQ_NODES in self.requested_data:
5467 node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
5469 nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
5470 groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
5471 for uuid in set(map(operator.attrgetter("group"),
5477 return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
5478 disk_usage, offline_nodes, bad_nodes,
5479 live_data, wrongnode_inst, consinfo,
5483 class LUQuery(NoHooksLU):
5484 """Query for resources/items of a certain kind.
5487 # pylint: disable=W0142
5490 def CheckArguments(self):
5491 qcls = _GetQueryImplementation(self.op.what)
5493 self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5495 def ExpandNames(self):
5496 self.impl.ExpandNames(self)
5498 def DeclareLocks(self, level):
5499 self.impl.DeclareLocks(self, level)
5501 def Exec(self, feedback_fn):
5502 return self.impl.NewStyleQuery(self)
5505 class LUQueryFields(NoHooksLU):
5506 """Query for resources/items of a certain kind.
5509 # pylint: disable=W0142
5512 def CheckArguments(self):
5513 self.qcls = _GetQueryImplementation(self.op.what)
5515 def ExpandNames(self):
5516 self.needed_locks = {}
5518 def Exec(self, feedback_fn):
5519 return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5522 class LUNodeModifyStorage(NoHooksLU):
5523 """Logical unit for modifying a storage volume on a node.
5528 def CheckArguments(self):
5529 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5531 storage_type = self.op.storage_type
5534 modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5536 raise errors.OpPrereqError("Storage units of type '%s' can not be"
5537 " modified" % storage_type,
5540 diff = set(self.op.changes.keys()) - modifiable
5542 raise errors.OpPrereqError("The following fields can not be modified for"
5543 " storage units of type '%s': %r" %
5544 (storage_type, list(diff)),
5547 def ExpandNames(self):
5548 self.needed_locks = {
5549 locking.LEVEL_NODE: self.op.node_name,
5552 def Exec(self, feedback_fn):
5553 """Computes the list of nodes and their attributes.
5556 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5557 result = self.rpc.call_storage_modify(self.op.node_name,
5558 self.op.storage_type, st_args,
5559 self.op.name, self.op.changes)
5560 result.Raise("Failed to modify storage unit '%s' on %s" %
5561 (self.op.name, self.op.node_name))
5564 class LUNodeAdd(LogicalUnit):
5565 """Logical unit for adding node to the cluster.
5569 HTYPE = constants.HTYPE_NODE
5570 _NFLAGS = ["master_capable", "vm_capable"]
5572 def CheckArguments(self):
5573 self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5574 # validate/normalize the node name
5575 self.hostname = netutils.GetHostname(name=self.op.node_name,
5576 family=self.primary_ip_family)
5577 self.op.node_name = self.hostname.name
5579 if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5580 raise errors.OpPrereqError("Cannot readd the master node",
5583 if self.op.readd and self.op.group:
5584 raise errors.OpPrereqError("Cannot pass a node group when a node is"
5585 " being readded", errors.ECODE_INVAL)
5587 def BuildHooksEnv(self):
5590 This will run on all nodes before, and on all nodes + the new node after.
5594 "OP_TARGET": self.op.node_name,
5595 "NODE_NAME": self.op.node_name,
5596 "NODE_PIP": self.op.primary_ip,
5597 "NODE_SIP": self.op.secondary_ip,
5598 "MASTER_CAPABLE": str(self.op.master_capable),
5599 "VM_CAPABLE": str(self.op.vm_capable),
5602 def BuildHooksNodes(self):
5603 """Build hooks nodes.
5606 # Exclude added node
5607 pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5608 post_nodes = pre_nodes + [self.op.node_name, ]
5610 return (pre_nodes, post_nodes)
5612 def CheckPrereq(self):
5613 """Check prerequisites.
5616 - the new node is not already in the config
5618 - its parameters (single/dual homed) matches the cluster
5620 Any errors are signaled by raising errors.OpPrereqError.
5624 hostname = self.hostname
5625 node = hostname.name
5626 primary_ip = self.op.primary_ip = hostname.ip
5627 if self.op.secondary_ip is None:
5628 if self.primary_ip_family == netutils.IP6Address.family:
5629 raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5630 " IPv4 address must be given as secondary",
5632 self.op.secondary_ip = primary_ip
5634 secondary_ip = self.op.secondary_ip
5635 if not netutils.IP4Address.IsValid(secondary_ip):
5636 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5637 " address" % secondary_ip, errors.ECODE_INVAL)
5639 node_list = cfg.GetNodeList()
5640 if not self.op.readd and node in node_list:
5641 raise errors.OpPrereqError("Node %s is already in the configuration" %
5642 node, errors.ECODE_EXISTS)
5643 elif self.op.readd and node not in node_list:
5644 raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5647 self.changed_primary_ip = False
5649 for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5650 if self.op.readd and node == existing_node_name:
5651 if existing_node.secondary_ip != secondary_ip:
5652 raise errors.OpPrereqError("Readded node doesn't have the same IP"
5653 " address configuration as before",
5655 if existing_node.primary_ip != primary_ip:
5656 self.changed_primary_ip = True
5660 if (existing_node.primary_ip == primary_ip or
5661 existing_node.secondary_ip == primary_ip or
5662 existing_node.primary_ip == secondary_ip or
5663 existing_node.secondary_ip == secondary_ip):
5664 raise errors.OpPrereqError("New node ip address(es) conflict with"
5665 " existing node %s" % existing_node.name,
5666 errors.ECODE_NOTUNIQUE)
5668 # After this 'if' block, None is no longer a valid value for the
5669 # _capable op attributes
5671 old_node = self.cfg.GetNodeInfo(node)
5672 assert old_node is not None, "Can't retrieve locked node %s" % node
5673 for attr in self._NFLAGS:
5674 if getattr(self.op, attr) is None:
5675 setattr(self.op, attr, getattr(old_node, attr))
5677 for attr in self._NFLAGS:
5678 if getattr(self.op, attr) is None:
5679 setattr(self.op, attr, True)
5681 if self.op.readd and not self.op.vm_capable:
5682 pri, sec = cfg.GetNodeInstances(node)
5684 raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5685 " flag set to false, but it already holds"
5686 " instances" % node,
5689 # check that the type of the node (single versus dual homed) is the
5690 # same as for the master
5691 myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5692 master_singlehomed = myself.secondary_ip == myself.primary_ip
5693 newbie_singlehomed = secondary_ip == primary_ip
5694 if master_singlehomed != newbie_singlehomed:
5695 if master_singlehomed:
5696 raise errors.OpPrereqError("The master has no secondary ip but the"
5697 " new node has one",
5700 raise errors.OpPrereqError("The master has a secondary ip but the"
5701 " new node doesn't have one",
5704 # checks reachability
5705 if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5706 raise errors.OpPrereqError("Node not reachable by ping",
5707 errors.ECODE_ENVIRON)
5709 if not newbie_singlehomed:
5710 # check reachability from my secondary ip to newbie's secondary ip
5711 if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5712 source=myself.secondary_ip):
5713 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5714 " based ping to node daemon port",
5715 errors.ECODE_ENVIRON)
5722 if self.op.master_capable:
5723 self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5725 self.master_candidate = False
5728 self.new_node = old_node
5730 node_group = cfg.LookupNodeGroup(self.op.group)
5731 self.new_node = objects.Node(name=node,
5732 primary_ip=primary_ip,
5733 secondary_ip=secondary_ip,
5734 master_candidate=self.master_candidate,
5735 offline=False, drained=False,
5738 if self.op.ndparams:
5739 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5741 if self.op.hv_state:
5742 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
5744 if self.op.disk_state:
5745 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
5747 # TODO: If we need to have multiple DnsOnlyRunner we probably should make
5748 # it a property on the base class.
5749 result = rpc.DnsOnlyRunner().call_version([node])[node]
5750 result.Raise("Can't get version information from node %s" % node)
5751 if constants.PROTOCOL_VERSION == result.payload:
5752 logging.info("Communication to node %s fine, sw version %s match",
5753 node, result.payload)
5755 raise errors.OpPrereqError("Version mismatch master version %s,"
5756 " node version %s" %
5757 (constants.PROTOCOL_VERSION, result.payload),
5758 errors.ECODE_ENVIRON)
5760 def Exec(self, feedback_fn):
5761 """Adds the new node to the cluster.
5764 new_node = self.new_node
5765 node = new_node.name
5767 assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5770 # We adding a new node so we assume it's powered
5771 new_node.powered = True
5773 # for re-adds, reset the offline/drained/master-candidate flags;
5774 # we need to reset here, otherwise offline would prevent RPC calls
5775 # later in the procedure; this also means that if the re-add
5776 # fails, we are left with a non-offlined, broken node
5778 new_node.drained = new_node.offline = False # pylint: disable=W0201
5779 self.LogInfo("Readding a node, the offline/drained flags were reset")
5780 # if we demote the node, we do cleanup later in the procedure
5781 new_node.master_candidate = self.master_candidate
5782 if self.changed_primary_ip:
5783 new_node.primary_ip = self.op.primary_ip
5785 # copy the master/vm_capable flags
5786 for attr in self._NFLAGS:
5787 setattr(new_node, attr, getattr(self.op, attr))
5789 # notify the user about any possible mc promotion
5790 if new_node.master_candidate:
5791 self.LogInfo("Node will be a master candidate")
5793 if self.op.ndparams:
5794 new_node.ndparams = self.op.ndparams
5796 new_node.ndparams = {}
5798 if self.op.hv_state:
5799 new_node.hv_state_static = self.new_hv_state
5801 if self.op.disk_state:
5802 new_node.disk_state_static = self.new_disk_state
5804 # Add node to our /etc/hosts, and add key to known_hosts
5805 if self.cfg.GetClusterInfo().modify_etc_hosts:
5806 master_node = self.cfg.GetMasterNode()
5807 result = self.rpc.call_etc_hosts_modify(master_node,
5808 constants.ETC_HOSTS_ADD,
5811 result.Raise("Can't update hosts file with new host data")
5813 if new_node.secondary_ip != new_node.primary_ip:
5814 _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5817 node_verify_list = [self.cfg.GetMasterNode()]
5818 node_verify_param = {
5819 constants.NV_NODELIST: ([node], {}),
5820 # TODO: do a node-net-test as well?
5823 result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5824 self.cfg.GetClusterName())
5825 for verifier in node_verify_list:
5826 result[verifier].Raise("Cannot communicate with node %s" % verifier)
5827 nl_payload = result[verifier].payload[constants.NV_NODELIST]
5829 for failed in nl_payload:
5830 feedback_fn("ssh/hostname verification failed"
5831 " (checking from %s): %s" %
5832 (verifier, nl_payload[failed]))
5833 raise errors.OpExecError("ssh/hostname verification failed")
5836 _RedistributeAncillaryFiles(self)
5837 self.context.ReaddNode(new_node)
5838 # make sure we redistribute the config
5839 self.cfg.Update(new_node, feedback_fn)
5840 # and make sure the new node will not have old files around
5841 if not new_node.master_candidate:
5842 result = self.rpc.call_node_demote_from_mc(new_node.name)
5843 msg = result.fail_msg
5845 self.LogWarning("Node failed to demote itself from master"
5846 " candidate status: %s" % msg)
5848 _RedistributeAncillaryFiles(self, additional_nodes=[node],
5849 additional_vm=self.op.vm_capable)
5850 self.context.AddNode(new_node, self.proc.GetECId())
5853 class LUNodeSetParams(LogicalUnit):
5854 """Modifies the parameters of a node.
5856 @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5857 to the node role (as _ROLE_*)
5858 @cvar _R2F: a dictionary from node role to tuples of flags
5859 @cvar _FLAGS: a list of attribute names corresponding to the flags
5862 HPATH = "node-modify"
5863 HTYPE = constants.HTYPE_NODE
5865 (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5867 (True, False, False): _ROLE_CANDIDATE,
5868 (False, True, False): _ROLE_DRAINED,
5869 (False, False, True): _ROLE_OFFLINE,
5870 (False, False, False): _ROLE_REGULAR,
5872 _R2F = dict((v, k) for k, v in _F2R.items())
5873 _FLAGS = ["master_candidate", "drained", "offline"]
5875 def CheckArguments(self):
5876 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5877 all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5878 self.op.master_capable, self.op.vm_capable,
5879 self.op.secondary_ip, self.op.ndparams, self.op.hv_state,
5881 if all_mods.count(None) == len(all_mods):
5882 raise errors.OpPrereqError("Please pass at least one modification",
5884 if all_mods.count(True) > 1:
5885 raise errors.OpPrereqError("Can't set the node into more than one"
5886 " state at the same time",
5889 # Boolean value that tells us whether we might be demoting from MC
5890 self.might_demote = (self.op.master_candidate is False or
5891 self.op.offline is True or
5892 self.op.drained is True or
5893 self.op.master_capable is False)
5895 if self.op.secondary_ip:
5896 if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5897 raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5898 " address" % self.op.secondary_ip,
5901 self.lock_all = self.op.auto_promote and self.might_demote
5902 self.lock_instances = self.op.secondary_ip is not None
5904 def _InstanceFilter(self, instance):
5905 """Filter for getting affected instances.
5908 return (instance.disk_template in constants.DTS_INT_MIRROR and
5909 self.op.node_name in instance.all_nodes)
5911 def ExpandNames(self):
5913 self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5915 self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5917 # Since modifying a node can have severe effects on currently running
5918 # operations the resource lock is at least acquired in shared mode
5919 self.needed_locks[locking.LEVEL_NODE_RES] = \
5920 self.needed_locks[locking.LEVEL_NODE]
5922 # Get node resource and instance locks in shared mode; they are not used
5923 # for anything but read-only access
5924 self.share_locks[locking.LEVEL_NODE_RES] = 1
5925 self.share_locks[locking.LEVEL_INSTANCE] = 1
5927 if self.lock_instances:
5928 self.needed_locks[locking.LEVEL_INSTANCE] = \
5929 frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5931 def BuildHooksEnv(self):
5934 This runs on the master node.
5938 "OP_TARGET": self.op.node_name,
5939 "MASTER_CANDIDATE": str(self.op.master_candidate),
5940 "OFFLINE": str(self.op.offline),
5941 "DRAINED": str(self.op.drained),
5942 "MASTER_CAPABLE": str(self.op.master_capable),
5943 "VM_CAPABLE": str(self.op.vm_capable),
5946 def BuildHooksNodes(self):
5947 """Build hooks nodes.
5950 nl = [self.cfg.GetMasterNode(), self.op.node_name]
5953 def CheckPrereq(self):
5954 """Check prerequisites.
5956 This only checks the instance list against the existing names.
5959 node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5961 if self.lock_instances:
5962 affected_instances = \
5963 self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5965 # Verify instance locks
5966 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5967 wanted_instances = frozenset(affected_instances.keys())
5968 if wanted_instances - owned_instances:
5969 raise errors.OpPrereqError("Instances affected by changing node %s's"
5970 " secondary IP address have changed since"
5971 " locks were acquired, wanted '%s', have"
5972 " '%s'; retry the operation" %
5974 utils.CommaJoin(wanted_instances),
5975 utils.CommaJoin(owned_instances)),
5978 affected_instances = None
5980 if (self.op.master_candidate is not None or
5981 self.op.drained is not None or
5982 self.op.offline is not None):
5983 # we can't change the master's node flags
5984 if self.op.node_name == self.cfg.GetMasterNode():
5985 raise errors.OpPrereqError("The master role can be changed"
5986 " only via master-failover",
5989 if self.op.master_candidate and not node.master_capable:
5990 raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5991 " it a master candidate" % node.name,
5994 if self.op.vm_capable is False:
5995 (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5997 raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5998 " the vm_capable flag" % node.name,
6001 if node.master_candidate and self.might_demote and not self.lock_all:
6002 assert not self.op.auto_promote, "auto_promote set but lock_all not"
6003 # check if after removing the current node, we're missing master
6005 (mc_remaining, mc_should, _) = \
6006 self.cfg.GetMasterCandidateStats(exceptions=[node.name])
6007 if mc_remaining < mc_should:
6008 raise errors.OpPrereqError("Not enough master candidates, please"
6009 " pass auto promote option to allow"
6010 " promotion (--auto-promote or RAPI"
6011 " auto_promote=True)", errors.ECODE_STATE)
6013 self.old_flags = old_flags = (node.master_candidate,
6014 node.drained, node.offline)
6015 assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
6016 self.old_role = old_role = self._F2R[old_flags]
6018 # Check for ineffective changes
6019 for attr in self._FLAGS:
6020 if (getattr(self.op, attr) is False and getattr(node, attr) is False):
6021 self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
6022 setattr(self.op, attr, None)
6024 # Past this point, any flag change to False means a transition
6025 # away from the respective state, as only real changes are kept
6027 # TODO: We might query the real power state if it supports OOB
6028 if _SupportsOob(self.cfg, node):
6029 if self.op.offline is False and not (node.powered or
6030 self.op.powered is True):
6031 raise errors.OpPrereqError(("Node %s needs to be turned on before its"
6032 " offline status can be reset") %
6033 self.op.node_name, errors.ECODE_STATE)
6034 elif self.op.powered is not None:
6035 raise errors.OpPrereqError(("Unable to change powered state for node %s"
6036 " as it does not support out-of-band"
6037 " handling") % self.op.node_name,
6040 # If we're being deofflined/drained, we'll MC ourself if needed
6041 if (self.op.drained is False or self.op.offline is False or
6042 (self.op.master_capable and not node.master_capable)):
6043 if _DecideSelfPromotion(self):
6044 self.op.master_candidate = True
6045 self.LogInfo("Auto-promoting node to master candidate")
6047 # If we're no longer master capable, we'll demote ourselves from MC
6048 if self.op.master_capable is False and node.master_candidate:
6049 self.LogInfo("Demoting from master candidate")
6050 self.op.master_candidate = False
6053 assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
6054 if self.op.master_candidate:
6055 new_role = self._ROLE_CANDIDATE
6056 elif self.op.drained:
6057 new_role = self._ROLE_DRAINED
6058 elif self.op.offline:
6059 new_role = self._ROLE_OFFLINE
6060 elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
6061 # False is still in new flags, which means we're un-setting (the
6063 new_role = self._ROLE_REGULAR
6064 else: # no new flags, nothing, keep old role
6067 self.new_role = new_role
6069 if old_role == self._ROLE_OFFLINE and new_role != old_role:
6070 # Trying to transition out of offline status
6071 result = self.rpc.call_version([node.name])[node.name]
6073 raise errors.OpPrereqError("Node %s is being de-offlined but fails"
6074 " to report its version: %s" %
6075 (node.name, result.fail_msg),
6078 self.LogWarning("Transitioning node from offline to online state"
6079 " without using re-add. Please make sure the node"
6082 # When changing the secondary ip, verify if this is a single-homed to
6083 # multi-homed transition or vice versa, and apply the relevant
6085 if self.op.secondary_ip:
6086 # Ok even without locking, because this can't be changed by any LU
6087 master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
6088 master_singlehomed = master.secondary_ip == master.primary_ip
6089 if master_singlehomed and self.op.secondary_ip != node.primary_ip:
6090 if self.op.force and node.name == master.name:
6091 self.LogWarning("Transitioning from single-homed to multi-homed"
6092 " cluster. All nodes will require a secondary ip.")
6094 raise errors.OpPrereqError("Changing the secondary ip on a"
6095 " single-homed cluster requires the"
6096 " --force option to be passed, and the"
6097 " target node to be the master",
6099 elif not master_singlehomed and self.op.secondary_ip == node.primary_ip:
6100 if self.op.force and node.name == master.name:
6101 self.LogWarning("Transitioning from multi-homed to single-homed"
6102 " cluster. Secondary IPs will have to be removed.")
6104 raise errors.OpPrereqError("Cannot set the secondary IP to be the"
6105 " same as the primary IP on a multi-homed"
6106 " cluster, unless the --force option is"
6107 " passed, and the target node is the"
6108 " master", errors.ECODE_INVAL)
6110 assert not (frozenset(affected_instances) -
6111 self.owned_locks(locking.LEVEL_INSTANCE))
6114 if affected_instances:
6115 msg = ("Cannot change secondary IP address: offline node has"
6116 " instances (%s) configured to use it" %
6117 utils.CommaJoin(affected_instances.keys()))
6118 raise errors.OpPrereqError(msg, errors.ECODE_STATE)
6120 # On online nodes, check that no instances are running, and that
6121 # the node has the new ip and we can reach it.
6122 for instance in affected_instances.values():
6123 _CheckInstanceState(self, instance, INSTANCE_DOWN,
6124 msg="cannot change secondary ip")
6126 _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
6127 if master.name != node.name:
6128 # check reachability from master secondary ip to new secondary ip
6129 if not netutils.TcpPing(self.op.secondary_ip,
6130 constants.DEFAULT_NODED_PORT,
6131 source=master.secondary_ip):
6132 raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
6133 " based ping to node daemon port",
6134 errors.ECODE_ENVIRON)
6136 if self.op.ndparams:
6137 new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
6138 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
6139 self.new_ndparams = new_ndparams
6141 if self.op.hv_state:
6142 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
6143 self.node.hv_state_static)
6145 if self.op.disk_state:
6146 self.new_disk_state = \
6147 _MergeAndVerifyDiskState(self.op.disk_state,
6148 self.node.disk_state_static)
6150 def Exec(self, feedback_fn):
6155 old_role = self.old_role
6156 new_role = self.new_role
6160 if self.op.ndparams:
6161 node.ndparams = self.new_ndparams
6163 if self.op.powered is not None:
6164 node.powered = self.op.powered
6166 if self.op.hv_state:
6167 node.hv_state_static = self.new_hv_state
6169 if self.op.disk_state:
6170 node.disk_state_static = self.new_disk_state
6172 for attr in ["master_capable", "vm_capable"]:
6173 val = getattr(self.op, attr)
6175 setattr(node, attr, val)
6176 result.append((attr, str(val)))
6178 if new_role != old_role:
6179 # Tell the node to demote itself, if no longer MC and not offline
6180 if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
6181 msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
6183 self.LogWarning("Node failed to demote itself: %s", msg)
6185 new_flags = self._R2F[new_role]
6186 for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
6188 result.append((desc, str(nf)))
6189 (node.master_candidate, node.drained, node.offline) = new_flags
6191 # we locked all nodes, we adjust the CP before updating this node
6193 _AdjustCandidatePool(self, [node.name])
6195 if self.op.secondary_ip:
6196 node.secondary_ip = self.op.secondary_ip
6197 result.append(("secondary_ip", self.op.secondary_ip))
6199 # this will trigger configuration file update, if needed
6200 self.cfg.Update(node, feedback_fn)
6202 # this will trigger job queue propagation or cleanup if the mc
6204 if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
6205 self.context.ReaddNode(node)
6210 class LUNodePowercycle(NoHooksLU):
6211 """Powercycles a node.
6216 def CheckArguments(self):
6217 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6218 if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
6219 raise errors.OpPrereqError("The node is the master and the force"
6220 " parameter was not set",
6223 def ExpandNames(self):
6224 """Locking for PowercycleNode.
6226 This is a last-resort option and shouldn't block on other
6227 jobs. Therefore, we grab no locks.
6230 self.needed_locks = {}
6232 def Exec(self, feedback_fn):
6236 result = self.rpc.call_node_powercycle(self.op.node_name,
6237 self.cfg.GetHypervisorType())
6238 result.Raise("Failed to schedule the reboot")
6239 return result.payload
6242 class LUClusterQuery(NoHooksLU):
6243 """Query cluster configuration.
6248 def ExpandNames(self):
6249 self.needed_locks = {}
6251 def Exec(self, feedback_fn):
6252 """Return cluster config.
6255 cluster = self.cfg.GetClusterInfo()
6258 # Filter just for enabled hypervisors
6259 for os_name, hv_dict in cluster.os_hvp.items():
6260 os_hvp[os_name] = {}
6261 for hv_name, hv_params in hv_dict.items():
6262 if hv_name in cluster.enabled_hypervisors:
6263 os_hvp[os_name][hv_name] = hv_params
6265 # Convert ip_family to ip_version
6266 primary_ip_version = constants.IP4_VERSION
6267 if cluster.primary_ip_family == netutils.IP6Address.family:
6268 primary_ip_version = constants.IP6_VERSION
6271 "software_version": constants.RELEASE_VERSION,
6272 "protocol_version": constants.PROTOCOL_VERSION,
6273 "config_version": constants.CONFIG_VERSION,
6274 "os_api_version": max(constants.OS_API_VERSIONS),
6275 "export_version": constants.EXPORT_VERSION,
6276 "architecture": runtime.GetArchInfo(),
6277 "name": cluster.cluster_name,
6278 "master": cluster.master_node,
6279 "default_hypervisor": cluster.primary_hypervisor,
6280 "enabled_hypervisors": cluster.enabled_hypervisors,
6281 "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
6282 for hypervisor_name in cluster.enabled_hypervisors]),
6284 "beparams": cluster.beparams,
6285 "osparams": cluster.osparams,
6286 "ipolicy": cluster.ipolicy,
6287 "nicparams": cluster.nicparams,
6288 "ndparams": cluster.ndparams,
6289 "diskparams": cluster.diskparams,
6290 "candidate_pool_size": cluster.candidate_pool_size,
6291 "master_netdev": cluster.master_netdev,
6292 "master_netmask": cluster.master_netmask,
6293 "use_external_mip_script": cluster.use_external_mip_script,
6294 "volume_group_name": cluster.volume_group_name,
6295 "drbd_usermode_helper": cluster.drbd_usermode_helper,
6296 "file_storage_dir": cluster.file_storage_dir,
6297 "shared_file_storage_dir": cluster.shared_file_storage_dir,
6298 "maintain_node_health": cluster.maintain_node_health,
6299 "ctime": cluster.ctime,
6300 "mtime": cluster.mtime,
6301 "uuid": cluster.uuid,
6302 "tags": list(cluster.GetTags()),
6303 "uid_pool": cluster.uid_pool,
6304 "default_iallocator": cluster.default_iallocator,
6305 "reserved_lvs": cluster.reserved_lvs,
6306 "primary_ip_version": primary_ip_version,
6307 "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
6308 "hidden_os": cluster.hidden_os,
6309 "blacklisted_os": cluster.blacklisted_os,
6315 class LUClusterConfigQuery(NoHooksLU):
6316 """Return configuration values.
6321 def CheckArguments(self):
6322 self.cq = _ClusterQuery(None, self.op.output_fields, False)
6324 def ExpandNames(self):
6325 self.cq.ExpandNames(self)
6327 def DeclareLocks(self, level):
6328 self.cq.DeclareLocks(self, level)
6330 def Exec(self, feedback_fn):
6331 result = self.cq.OldStyleQuery(self)
6333 assert len(result) == 1
6338 class _ClusterQuery(_QueryBase):
6339 FIELDS = query.CLUSTER_FIELDS
6341 #: Do not sort (there is only one item)
6344 def ExpandNames(self, lu):
6345 lu.needed_locks = {}
6347 # The following variables interact with _QueryBase._GetNames
6348 self.wanted = locking.ALL_SET
6349 self.do_locking = self.use_locking
6352 raise errors.OpPrereqError("Can not use locking for cluster queries",
6355 def DeclareLocks(self, lu, level):
6358 def _GetQueryData(self, lu):
6359 """Computes the list of nodes and their attributes.
6362 # Locking is not used
6363 assert not (compat.any(lu.glm.is_owned(level)
6364 for level in locking.LEVELS
6365 if level != locking.LEVEL_CLUSTER) or
6366 self.do_locking or self.use_locking)
6368 if query.CQ_CONFIG in self.requested_data:
6369 cluster = lu.cfg.GetClusterInfo()
6371 cluster = NotImplemented
6373 if query.CQ_QUEUE_DRAINED in self.requested_data:
6374 drain_flag = os.path.exists(pathutils.JOB_QUEUE_DRAIN_FILE)
6376 drain_flag = NotImplemented
6378 if query.CQ_WATCHER_PAUSE in self.requested_data:
6379 watcher_pause = utils.ReadWatcherPauseFile(pathutils.WATCHER_PAUSEFILE)
6381 watcher_pause = NotImplemented
6383 return query.ClusterQueryData(cluster, drain_flag, watcher_pause)
6386 class LUInstanceActivateDisks(NoHooksLU):
6387 """Bring up an instance's disks.
6392 def ExpandNames(self):
6393 self._ExpandAndLockInstance()
6394 self.needed_locks[locking.LEVEL_NODE] = []
6395 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6397 def DeclareLocks(self, level):
6398 if level == locking.LEVEL_NODE:
6399 self._LockInstancesNodes()
6401 def CheckPrereq(self):
6402 """Check prerequisites.
6404 This checks that the instance is in the cluster.
6407 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6408 assert self.instance is not None, \
6409 "Cannot retrieve locked instance %s" % self.op.instance_name
6410 _CheckNodeOnline(self, self.instance.primary_node)
6412 def Exec(self, feedback_fn):
6413 """Activate the disks.
6416 disks_ok, disks_info = \
6417 _AssembleInstanceDisks(self, self.instance,
6418 ignore_size=self.op.ignore_size)
6420 raise errors.OpExecError("Cannot activate block devices")
6422 if self.op.wait_for_sync:
6423 if not _WaitForSync(self, self.instance):
6424 raise errors.OpExecError("Some disks of the instance are degraded!")
6429 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
6431 """Prepare the block devices for an instance.
6433 This sets up the block devices on all nodes.
6435 @type lu: L{LogicalUnit}
6436 @param lu: the logical unit on whose behalf we execute
6437 @type instance: L{objects.Instance}
6438 @param instance: the instance for whose disks we assemble
6439 @type disks: list of L{objects.Disk} or None
6440 @param disks: which disks to assemble (or all, if None)
6441 @type ignore_secondaries: boolean
6442 @param ignore_secondaries: if true, errors on secondary nodes
6443 won't result in an error return from the function
6444 @type ignore_size: boolean
6445 @param ignore_size: if true, the current known size of the disk
6446 will not be used during the disk activation, useful for cases
6447 when the size is wrong
6448 @return: False if the operation failed, otherwise a list of
6449 (host, instance_visible_name, node_visible_name)
6450 with the mapping from node devices to instance devices
6455 iname = instance.name
6456 disks = _ExpandCheckDisks(instance, disks)
6458 # With the two passes mechanism we try to reduce the window of
6459 # opportunity for the race condition of switching DRBD to primary
6460 # before handshaking occured, but we do not eliminate it
6462 # The proper fix would be to wait (with some limits) until the
6463 # connection has been made and drbd transitions from WFConnection
6464 # into any other network-connected state (Connected, SyncTarget,
6467 # 1st pass, assemble on all nodes in secondary mode
6468 for idx, inst_disk in enumerate(disks):
6469 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6471 node_disk = node_disk.Copy()
6472 node_disk.UnsetSize()
6473 lu.cfg.SetDiskID(node_disk, node)
6474 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6476 msg = result.fail_msg
6478 is_offline_secondary = (node in instance.secondary_nodes and
6480 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6481 " (is_primary=False, pass=1): %s",
6482 inst_disk.iv_name, node, msg)
6483 if not (ignore_secondaries or is_offline_secondary):
6486 # FIXME: race condition on drbd migration to primary
6488 # 2nd pass, do only the primary node
6489 for idx, inst_disk in enumerate(disks):
6492 for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
6493 if node != instance.primary_node:
6496 node_disk = node_disk.Copy()
6497 node_disk.UnsetSize()
6498 lu.cfg.SetDiskID(node_disk, node)
6499 result = lu.rpc.call_blockdev_assemble(node, (node_disk, instance), iname,
6501 msg = result.fail_msg
6503 lu.proc.LogWarning("Could not prepare block device %s on node %s"
6504 " (is_primary=True, pass=2): %s",
6505 inst_disk.iv_name, node, msg)
6508 dev_path = result.payload
6510 device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
6512 # leave the disks configured for the primary node
6513 # this is a workaround that would be fixed better by
6514 # improving the logical/physical id handling
6516 lu.cfg.SetDiskID(disk, instance.primary_node)
6518 return disks_ok, device_info
6521 def _StartInstanceDisks(lu, instance, force):
6522 """Start the disks of an instance.
6525 disks_ok, _ = _AssembleInstanceDisks(lu, instance,
6526 ignore_secondaries=force)
6528 _ShutdownInstanceDisks(lu, instance)
6529 if force is not None and not force:
6530 lu.proc.LogWarning("", hint="If the message above refers to a"
6532 " you can retry the operation using '--force'.")
6533 raise errors.OpExecError("Disk consistency error")
6536 class LUInstanceDeactivateDisks(NoHooksLU):
6537 """Shutdown an instance's disks.
6542 def ExpandNames(self):
6543 self._ExpandAndLockInstance()
6544 self.needed_locks[locking.LEVEL_NODE] = []
6545 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6547 def DeclareLocks(self, level):
6548 if level == locking.LEVEL_NODE:
6549 self._LockInstancesNodes()
6551 def CheckPrereq(self):
6552 """Check prerequisites.
6554 This checks that the instance is in the cluster.
6557 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6558 assert self.instance is not None, \
6559 "Cannot retrieve locked instance %s" % self.op.instance_name
6561 def Exec(self, feedback_fn):
6562 """Deactivate the disks
6565 instance = self.instance
6567 _ShutdownInstanceDisks(self, instance)
6569 _SafeShutdownInstanceDisks(self, instance)
6572 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
6573 """Shutdown block devices of an instance.
6575 This function checks if an instance is running, before calling
6576 _ShutdownInstanceDisks.
6579 _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
6580 _ShutdownInstanceDisks(lu, instance, disks=disks)
6583 def _ExpandCheckDisks(instance, disks):
6584 """Return the instance disks selected by the disks list
6586 @type disks: list of L{objects.Disk} or None
6587 @param disks: selected disks
6588 @rtype: list of L{objects.Disk}
6589 @return: selected instance disks to act on
6593 return instance.disks
6595 if not set(disks).issubset(instance.disks):
6596 raise errors.ProgrammerError("Can only act on disks belonging to the"
6601 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6602 """Shutdown block devices of an instance.
6604 This does the shutdown on all nodes of the instance.
6606 If the ignore_primary is false, errors on the primary node are
6611 disks = _ExpandCheckDisks(instance, disks)
6614 for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6615 lu.cfg.SetDiskID(top_disk, node)
6616 result = lu.rpc.call_blockdev_shutdown(node, (top_disk, instance))
6617 msg = result.fail_msg
6619 lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6620 disk.iv_name, node, msg)
6621 if ((node == instance.primary_node and not ignore_primary) or
6622 (node != instance.primary_node and not result.offline)):
6627 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6628 """Checks if a node has enough free memory.
6630 This function check if a given node has the needed amount of free
6631 memory. In case the node has less memory or we cannot get the
6632 information from the node, this function raise an OpPrereqError
6635 @type lu: C{LogicalUnit}
6636 @param lu: a logical unit from which we get configuration data
6638 @param node: the node to check
6639 @type reason: C{str}
6640 @param reason: string to use in the error message
6641 @type requested: C{int}
6642 @param requested: the amount of memory in MiB to check for
6643 @type hypervisor_name: C{str}
6644 @param hypervisor_name: the hypervisor to ask for memory stats
6646 @return: node current free memory
6647 @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6648 we cannot check the node
6651 nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6652 nodeinfo[node].Raise("Can't get data from node %s" % node,
6653 prereq=True, ecode=errors.ECODE_ENVIRON)
6654 (_, _, (hv_info, )) = nodeinfo[node].payload
6656 free_mem = hv_info.get("memory_free", None)
6657 if not isinstance(free_mem, int):
6658 raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6659 " was '%s'" % (node, free_mem),
6660 errors.ECODE_ENVIRON)
6661 if requested > free_mem:
6662 raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6663 " needed %s MiB, available %s MiB" %
6664 (node, reason, requested, free_mem),
6669 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6670 """Checks if nodes have enough free disk space in the all VGs.
6672 This function check if all given nodes have the needed amount of
6673 free disk. In case any node has less disk or we cannot get the
6674 information from the node, this function raise an OpPrereqError
6677 @type lu: C{LogicalUnit}
6678 @param lu: a logical unit from which we get configuration data
6679 @type nodenames: C{list}
6680 @param nodenames: the list of node names to check
6681 @type req_sizes: C{dict}
6682 @param req_sizes: the hash of vg and corresponding amount of disk in
6684 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6685 or we cannot check the node
6688 for vg, req_size in req_sizes.items():
6689 _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6692 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6693 """Checks if nodes have enough free disk space in the specified VG.
6695 This function check if all given nodes have the needed amount of
6696 free disk. In case any node has less disk or we cannot get the
6697 information from the node, this function raise an OpPrereqError
6700 @type lu: C{LogicalUnit}
6701 @param lu: a logical unit from which we get configuration data
6702 @type nodenames: C{list}
6703 @param nodenames: the list of node names to check
6705 @param vg: the volume group to check
6706 @type requested: C{int}
6707 @param requested: the amount of disk in MiB to check for
6708 @raise errors.OpPrereqError: if the node doesn't have enough disk,
6709 or we cannot check the node
6712 nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6713 for node in nodenames:
6714 info = nodeinfo[node]
6715 info.Raise("Cannot get current information from node %s" % node,
6716 prereq=True, ecode=errors.ECODE_ENVIRON)
6717 (_, (vg_info, ), _) = info.payload
6718 vg_free = vg_info.get("vg_free", None)
6719 if not isinstance(vg_free, int):
6720 raise errors.OpPrereqError("Can't compute free disk space on node"
6721 " %s for vg %s, result was '%s'" %
6722 (node, vg, vg_free), errors.ECODE_ENVIRON)
6723 if requested > vg_free:
6724 raise errors.OpPrereqError("Not enough disk space on target node %s"
6725 " vg %s: required %d MiB, available %d MiB" %
6726 (node, vg, requested, vg_free),
6730 def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6731 """Checks if nodes have enough physical CPUs
6733 This function checks if all given nodes have the needed number of
6734 physical CPUs. In case any node has less CPUs or we cannot get the
6735 information from the node, this function raises an OpPrereqError
6738 @type lu: C{LogicalUnit}
6739 @param lu: a logical unit from which we get configuration data
6740 @type nodenames: C{list}
6741 @param nodenames: the list of node names to check
6742 @type requested: C{int}
6743 @param requested: the minimum acceptable number of physical CPUs
6744 @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6745 or we cannot check the node
6748 nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6749 for node in nodenames:
6750 info = nodeinfo[node]
6751 info.Raise("Cannot get current information from node %s" % node,
6752 prereq=True, ecode=errors.ECODE_ENVIRON)
6753 (_, _, (hv_info, )) = info.payload
6754 num_cpus = hv_info.get("cpu_total", None)
6755 if not isinstance(num_cpus, int):
6756 raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6757 " on node %s, result was '%s'" %
6758 (node, num_cpus), errors.ECODE_ENVIRON)
6759 if requested > num_cpus:
6760 raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6761 "required" % (node, num_cpus, requested),
6765 class LUInstanceStartup(LogicalUnit):
6766 """Starts an instance.
6769 HPATH = "instance-start"
6770 HTYPE = constants.HTYPE_INSTANCE
6773 def CheckArguments(self):
6775 if self.op.beparams:
6776 # fill the beparams dict
6777 objects.UpgradeBeParams(self.op.beparams)
6778 utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6780 def ExpandNames(self):
6781 self._ExpandAndLockInstance()
6782 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
6784 def DeclareLocks(self, level):
6785 if level == locking.LEVEL_NODE_RES:
6786 self._LockInstancesNodes(primary_only=True, level=locking.LEVEL_NODE_RES)
6788 def BuildHooksEnv(self):
6791 This runs on master, primary and secondary nodes of the instance.
6795 "FORCE": self.op.force,
6798 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6802 def BuildHooksNodes(self):
6803 """Build hooks nodes.
6806 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6809 def CheckPrereq(self):
6810 """Check prerequisites.
6812 This checks that the instance is in the cluster.
6815 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6816 assert self.instance is not None, \
6817 "Cannot retrieve locked instance %s" % self.op.instance_name
6820 if self.op.hvparams:
6821 # check hypervisor parameter syntax (locally)
6822 cluster = self.cfg.GetClusterInfo()
6823 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6824 filled_hvp = cluster.FillHV(instance)
6825 filled_hvp.update(self.op.hvparams)
6826 hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6827 hv_type.CheckParameterSyntax(filled_hvp)
6828 _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6830 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6832 self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6834 if self.primary_offline and self.op.ignore_offline_nodes:
6835 self.proc.LogWarning("Ignoring offline primary node")
6837 if self.op.hvparams or self.op.beparams:
6838 self.proc.LogWarning("Overridden parameters are ignored")
6840 _CheckNodeOnline(self, instance.primary_node)
6842 bep = self.cfg.GetClusterInfo().FillBE(instance)
6843 bep.update(self.op.beparams)
6845 # check bridges existence
6846 _CheckInstanceBridgesExist(self, instance)
6848 remote_info = self.rpc.call_instance_info(instance.primary_node,
6850 instance.hypervisor)
6851 remote_info.Raise("Error checking node %s" % instance.primary_node,
6852 prereq=True, ecode=errors.ECODE_ENVIRON)
6853 if not remote_info.payload: # not running already
6854 _CheckNodeFreeMemory(self, instance.primary_node,
6855 "starting instance %s" % instance.name,
6856 bep[constants.BE_MINMEM], instance.hypervisor)
6858 def Exec(self, feedback_fn):
6859 """Start the instance.
6862 instance = self.instance
6863 force = self.op.force
6865 if not self.op.no_remember:
6866 self.cfg.MarkInstanceUp(instance.name)
6868 if self.primary_offline:
6869 assert self.op.ignore_offline_nodes
6870 self.proc.LogInfo("Primary node offline, marked instance as started")
6872 node_current = instance.primary_node
6874 _StartInstanceDisks(self, instance, force)
6877 self.rpc.call_instance_start(node_current,
6878 (instance, self.op.hvparams,
6880 self.op.startup_paused)
6881 msg = result.fail_msg
6883 _ShutdownInstanceDisks(self, instance)
6884 raise errors.OpExecError("Could not start instance: %s" % msg)
6887 class LUInstanceReboot(LogicalUnit):
6888 """Reboot an instance.
6891 HPATH = "instance-reboot"
6892 HTYPE = constants.HTYPE_INSTANCE
6895 def ExpandNames(self):
6896 self._ExpandAndLockInstance()
6898 def BuildHooksEnv(self):
6901 This runs on master, primary and secondary nodes of the instance.
6905 "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6906 "REBOOT_TYPE": self.op.reboot_type,
6907 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6910 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6914 def BuildHooksNodes(self):
6915 """Build hooks nodes.
6918 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6921 def CheckPrereq(self):
6922 """Check prerequisites.
6924 This checks that the instance is in the cluster.
6927 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6928 assert self.instance is not None, \
6929 "Cannot retrieve locked instance %s" % self.op.instance_name
6930 _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6931 _CheckNodeOnline(self, instance.primary_node)
6933 # check bridges existence
6934 _CheckInstanceBridgesExist(self, instance)
6936 def Exec(self, feedback_fn):
6937 """Reboot the instance.
6940 instance = self.instance
6941 ignore_secondaries = self.op.ignore_secondaries
6942 reboot_type = self.op.reboot_type
6944 remote_info = self.rpc.call_instance_info(instance.primary_node,
6946 instance.hypervisor)
6947 remote_info.Raise("Error checking node %s" % instance.primary_node)
6948 instance_running = bool(remote_info.payload)
6950 node_current = instance.primary_node
6952 if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6953 constants.INSTANCE_REBOOT_HARD]:
6954 for disk in instance.disks:
6955 self.cfg.SetDiskID(disk, node_current)
6956 result = self.rpc.call_instance_reboot(node_current, instance,
6958 self.op.shutdown_timeout)
6959 result.Raise("Could not reboot instance")
6961 if instance_running:
6962 result = self.rpc.call_instance_shutdown(node_current, instance,
6963 self.op.shutdown_timeout)
6964 result.Raise("Could not shutdown instance for full reboot")
6965 _ShutdownInstanceDisks(self, instance)
6967 self.LogInfo("Instance %s was already stopped, starting now",
6969 _StartInstanceDisks(self, instance, ignore_secondaries)
6970 result = self.rpc.call_instance_start(node_current,
6971 (instance, None, None), False)
6972 msg = result.fail_msg
6974 _ShutdownInstanceDisks(self, instance)
6975 raise errors.OpExecError("Could not start instance for"
6976 " full reboot: %s" % msg)
6978 self.cfg.MarkInstanceUp(instance.name)
6981 class LUInstanceShutdown(LogicalUnit):
6982 """Shutdown an instance.
6985 HPATH = "instance-stop"
6986 HTYPE = constants.HTYPE_INSTANCE
6989 def ExpandNames(self):
6990 self._ExpandAndLockInstance()
6992 def BuildHooksEnv(self):
6995 This runs on master, primary and secondary nodes of the instance.
6998 env = _BuildInstanceHookEnvByObject(self, self.instance)
6999 env["TIMEOUT"] = self.op.timeout
7002 def BuildHooksNodes(self):
7003 """Build hooks nodes.
7006 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7009 def CheckPrereq(self):
7010 """Check prerequisites.
7012 This checks that the instance is in the cluster.
7015 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7016 assert self.instance is not None, \
7017 "Cannot retrieve locked instance %s" % self.op.instance_name
7019 _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
7021 self.primary_offline = \
7022 self.cfg.GetNodeInfo(self.instance.primary_node).offline
7024 if self.primary_offline and self.op.ignore_offline_nodes:
7025 self.proc.LogWarning("Ignoring offline primary node")
7027 _CheckNodeOnline(self, self.instance.primary_node)
7029 def Exec(self, feedback_fn):
7030 """Shutdown the instance.
7033 instance = self.instance
7034 node_current = instance.primary_node
7035 timeout = self.op.timeout
7037 if not self.op.no_remember:
7038 self.cfg.MarkInstanceDown(instance.name)
7040 if self.primary_offline:
7041 assert self.op.ignore_offline_nodes
7042 self.proc.LogInfo("Primary node offline, marked instance as stopped")
7044 result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
7045 msg = result.fail_msg
7047 self.proc.LogWarning("Could not shutdown instance: %s" % msg)
7049 _ShutdownInstanceDisks(self, instance)
7052 class LUInstanceReinstall(LogicalUnit):
7053 """Reinstall an instance.
7056 HPATH = "instance-reinstall"
7057 HTYPE = constants.HTYPE_INSTANCE
7060 def ExpandNames(self):
7061 self._ExpandAndLockInstance()
7063 def BuildHooksEnv(self):
7066 This runs on master, primary and secondary nodes of the instance.
7069 return _BuildInstanceHookEnvByObject(self, self.instance)
7071 def BuildHooksNodes(self):
7072 """Build hooks nodes.
7075 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7078 def CheckPrereq(self):
7079 """Check prerequisites.
7081 This checks that the instance is in the cluster and is not running.
7084 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7085 assert instance is not None, \
7086 "Cannot retrieve locked instance %s" % self.op.instance_name
7087 _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
7088 " offline, cannot reinstall")
7090 if instance.disk_template == constants.DT_DISKLESS:
7091 raise errors.OpPrereqError("Instance '%s' has no disks" %
7092 self.op.instance_name,
7094 _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
7096 if self.op.os_type is not None:
7098 pnode = _ExpandNodeName(self.cfg, instance.primary_node)
7099 _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
7100 instance_os = self.op.os_type
7102 instance_os = instance.os
7104 nodelist = list(instance.all_nodes)
7106 if self.op.osparams:
7107 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
7108 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
7109 self.os_inst = i_osdict # the new dict (without defaults)
7113 self.instance = instance
7115 def Exec(self, feedback_fn):
7116 """Reinstall the instance.
7119 inst = self.instance
7121 if self.op.os_type is not None:
7122 feedback_fn("Changing OS to '%s'..." % self.op.os_type)
7123 inst.os = self.op.os_type
7124 # Write to configuration
7125 self.cfg.Update(inst, feedback_fn)
7127 _StartInstanceDisks(self, inst, None)
7129 feedback_fn("Running the instance OS create scripts...")
7130 # FIXME: pass debug option from opcode to backend
7131 result = self.rpc.call_instance_os_add(inst.primary_node,
7132 (inst, self.os_inst), True,
7133 self.op.debug_level)
7134 result.Raise("Could not install OS for instance %s on node %s" %
7135 (inst.name, inst.primary_node))
7137 _ShutdownInstanceDisks(self, inst)
7140 class LUInstanceRecreateDisks(LogicalUnit):
7141 """Recreate an instance's missing disks.
7144 HPATH = "instance-recreate-disks"
7145 HTYPE = constants.HTYPE_INSTANCE
7148 _MODIFYABLE = frozenset([
7149 constants.IDISK_SIZE,
7150 constants.IDISK_MODE,
7153 # New or changed disk parameters may have different semantics
7154 assert constants.IDISK_PARAMS == (_MODIFYABLE | frozenset([
7155 constants.IDISK_ADOPT,
7157 # TODO: Implement support changing VG while recreating
7159 constants.IDISK_METAVG,
7162 def _RunAllocator(self):
7163 """Run the allocator based on input opcode.
7166 be_full = self.cfg.GetClusterInfo().FillBE(self.instance)
7169 # The allocator should actually run in "relocate" mode, but current
7170 # allocators don't support relocating all the nodes of an instance at
7171 # the same time. As a workaround we use "allocate" mode, but this is
7172 # suboptimal for two reasons:
7173 # - The instance name passed to the allocator is present in the list of
7174 # existing instances, so there could be a conflict within the
7175 # internal structures of the allocator. This doesn't happen with the
7176 # current allocators, but it's a liability.
7177 # - The allocator counts the resources used by the instance twice: once
7178 # because the instance exists already, and once because it tries to
7179 # allocate a new instance.
7180 # The allocator could choose some of the nodes on which the instance is
7181 # running, but that's not a problem. If the instance nodes are broken,
7182 # they should be already be marked as drained or offline, and hence
7183 # skipped by the allocator. If instance disks have been lost for other
7184 # reasons, then recreating the disks on the same nodes should be fine.
7185 disk_template = self.instance.disk_template
7186 spindle_use = be_full[constants.BE_SPINDLE_USE]
7187 req = iallocator.IAReqInstanceAlloc(name=self.op.instance_name,
7188 disk_template=disk_template,
7189 tags=list(self.instance.GetTags()),
7190 os=self.instance.os,
7192 vcpus=be_full[constants.BE_VCPUS],
7193 memory=be_full[constants.BE_MAXMEM],
7194 spindle_use=spindle_use,
7195 disks=[{constants.IDISK_SIZE: d.size,
7196 constants.IDISK_MODE: d.mode}
7197 for d in self.instance.disks],
7198 hypervisor=self.instance.hypervisor)
7199 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
7201 ial.Run(self.op.iallocator)
7203 assert req.RequiredNodes() == len(self.instance.all_nodes)
7206 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7207 " %s" % (self.op.iallocator, ial.info),
7210 self.op.nodes = ial.result
7211 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7212 self.op.instance_name, self.op.iallocator,
7213 utils.CommaJoin(ial.result))
7215 def CheckArguments(self):
7216 if self.op.disks and ht.TPositiveInt(self.op.disks[0]):
7217 # Normalize and convert deprecated list of disk indices
7218 self.op.disks = [(idx, {}) for idx in sorted(frozenset(self.op.disks))]
7220 duplicates = utils.FindDuplicates(map(compat.fst, self.op.disks))
7222 raise errors.OpPrereqError("Some disks have been specified more than"
7223 " once: %s" % utils.CommaJoin(duplicates),
7226 # We don't want _CheckIAllocatorOrNode selecting the default iallocator
7227 # when neither iallocator nor nodes are specified
7228 if self.op.iallocator or self.op.nodes:
7229 _CheckIAllocatorOrNode(self, "iallocator", "nodes")
7231 for (idx, params) in self.op.disks:
7232 utils.ForceDictType(params, constants.IDISK_PARAMS_TYPES)
7233 unsupported = frozenset(params.keys()) - self._MODIFYABLE
7235 raise errors.OpPrereqError("Parameters for disk %s try to change"
7236 " unmodifyable parameter(s): %s" %
7237 (idx, utils.CommaJoin(unsupported)),
7240 def ExpandNames(self):
7241 self._ExpandAndLockInstance()
7242 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7244 self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
7245 self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
7247 self.needed_locks[locking.LEVEL_NODE] = []
7248 if self.op.iallocator:
7249 # iallocator will select a new node in the same group
7250 self.needed_locks[locking.LEVEL_NODEGROUP] = []
7251 self.needed_locks[locking.LEVEL_NODE_RES] = []
7253 def DeclareLocks(self, level):
7254 if level == locking.LEVEL_NODEGROUP:
7255 assert self.op.iallocator is not None
7256 assert not self.op.nodes
7257 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
7258 self.share_locks[locking.LEVEL_NODEGROUP] = 1
7259 # Lock the primary group used by the instance optimistically; this
7260 # requires going via the node before it's locked, requiring
7261 # verification later on
7262 self.needed_locks[locking.LEVEL_NODEGROUP] = \
7263 self.cfg.GetInstanceNodeGroups(self.op.instance_name, primary_only=True)
7265 elif level == locking.LEVEL_NODE:
7266 # If an allocator is used, then we lock all the nodes in the current
7267 # instance group, as we don't know yet which ones will be selected;
7268 # if we replace the nodes without using an allocator, locks are
7269 # already declared in ExpandNames; otherwise, we need to lock all the
7270 # instance nodes for disk re-creation
7271 if self.op.iallocator:
7272 assert not self.op.nodes
7273 assert not self.needed_locks[locking.LEVEL_NODE]
7274 assert len(self.owned_locks(locking.LEVEL_NODEGROUP)) == 1
7276 # Lock member nodes of the group of the primary node
7277 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP):
7278 self.needed_locks[locking.LEVEL_NODE].extend(
7279 self.cfg.GetNodeGroup(group_uuid).members)
7280 elif not self.op.nodes:
7281 self._LockInstancesNodes(primary_only=False)
7282 elif level == locking.LEVEL_NODE_RES:
7284 self.needed_locks[locking.LEVEL_NODE_RES] = \
7285 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7287 def BuildHooksEnv(self):
7290 This runs on master, primary and secondary nodes of the instance.
7293 return _BuildInstanceHookEnvByObject(self, self.instance)
7295 def BuildHooksNodes(self):
7296 """Build hooks nodes.
7299 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7302 def CheckPrereq(self):
7303 """Check prerequisites.
7305 This checks that the instance is in the cluster and is not running.
7308 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7309 assert instance is not None, \
7310 "Cannot retrieve locked instance %s" % self.op.instance_name
7312 if len(self.op.nodes) != len(instance.all_nodes):
7313 raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
7314 " %d replacement nodes were specified" %
7315 (instance.name, len(instance.all_nodes),
7316 len(self.op.nodes)),
7318 assert instance.disk_template != constants.DT_DRBD8 or \
7319 len(self.op.nodes) == 2
7320 assert instance.disk_template != constants.DT_PLAIN or \
7321 len(self.op.nodes) == 1
7322 primary_node = self.op.nodes[0]
7324 primary_node = instance.primary_node
7325 if not self.op.iallocator:
7326 _CheckNodeOnline(self, primary_node)
7328 if instance.disk_template == constants.DT_DISKLESS:
7329 raise errors.OpPrereqError("Instance '%s' has no disks" %
7330 self.op.instance_name, errors.ECODE_INVAL)
7332 # Verify if node group locks are still correct
7333 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
7335 # Node group locks are acquired only for the primary node (and only
7336 # when the allocator is used)
7337 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups,
7340 # if we replace nodes *and* the old primary is offline, we don't
7341 # check the instance state
7342 old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
7343 if not ((self.op.iallocator or self.op.nodes) and old_pnode.offline):
7344 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7345 msg="cannot recreate disks")
7348 self.disks = dict(self.op.disks)
7350 self.disks = dict((idx, {}) for idx in range(len(instance.disks)))
7352 maxidx = max(self.disks.keys())
7353 if maxidx >= len(instance.disks):
7354 raise errors.OpPrereqError("Invalid disk index '%s'" % maxidx,
7357 if ((self.op.nodes or self.op.iallocator) and
7358 sorted(self.disks.keys()) != range(len(instance.disks))):
7359 raise errors.OpPrereqError("Can't recreate disks partially and"
7360 " change the nodes at the same time",
7363 self.instance = instance
7365 if self.op.iallocator:
7366 self._RunAllocator()
7367 # Release unneeded node and node resource locks
7368 _ReleaseLocks(self, locking.LEVEL_NODE, keep=self.op.nodes)
7369 _ReleaseLocks(self, locking.LEVEL_NODE_RES, keep=self.op.nodes)
7371 def Exec(self, feedback_fn):
7372 """Recreate the disks.
7375 instance = self.instance
7377 assert (self.owned_locks(locking.LEVEL_NODE) ==
7378 self.owned_locks(locking.LEVEL_NODE_RES))
7381 mods = [] # keeps track of needed changes
7383 for idx, disk in enumerate(instance.disks):
7385 changes = self.disks[idx]
7387 # Disk should not be recreated
7391 # update secondaries for disks, if needed
7392 if self.op.nodes and disk.dev_type == constants.LD_DRBD8:
7393 # need to update the nodes and minors
7394 assert len(self.op.nodes) == 2
7395 assert len(disk.logical_id) == 6 # otherwise disk internals
7397 (_, _, old_port, _, _, old_secret) = disk.logical_id
7398 new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
7399 new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
7400 new_minors[0], new_minors[1], old_secret)
7401 assert len(disk.logical_id) == len(new_id)
7405 mods.append((idx, new_id, changes))
7407 # now that we have passed all asserts above, we can apply the mods
7408 # in a single run (to avoid partial changes)
7409 for idx, new_id, changes in mods:
7410 disk = instance.disks[idx]
7411 if new_id is not None:
7412 assert disk.dev_type == constants.LD_DRBD8
7413 disk.logical_id = new_id
7415 disk.Update(size=changes.get(constants.IDISK_SIZE, None),
7416 mode=changes.get(constants.IDISK_MODE, None))
7418 # change primary node, if needed
7420 instance.primary_node = self.op.nodes[0]
7421 self.LogWarning("Changing the instance's nodes, you will have to"
7422 " remove any disks left on the older nodes manually")
7425 self.cfg.Update(instance, feedback_fn)
7427 # All touched nodes must be locked
7428 mylocks = self.owned_locks(locking.LEVEL_NODE)
7429 assert mylocks.issuperset(frozenset(instance.all_nodes))
7430 _CreateDisks(self, instance, to_skip=to_skip)
7433 class LUInstanceRename(LogicalUnit):
7434 """Rename an instance.
7437 HPATH = "instance-rename"
7438 HTYPE = constants.HTYPE_INSTANCE
7440 def CheckArguments(self):
7444 if self.op.ip_check and not self.op.name_check:
7445 # TODO: make the ip check more flexible and not depend on the name check
7446 raise errors.OpPrereqError("IP address check requires a name check",
7449 def BuildHooksEnv(self):
7452 This runs on master, primary and secondary nodes of the instance.
7455 env = _BuildInstanceHookEnvByObject(self, self.instance)
7456 env["INSTANCE_NEW_NAME"] = self.op.new_name
7459 def BuildHooksNodes(self):
7460 """Build hooks nodes.
7463 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7466 def CheckPrereq(self):
7467 """Check prerequisites.
7469 This checks that the instance is in the cluster and is not running.
7472 self.op.instance_name = _ExpandInstanceName(self.cfg,
7473 self.op.instance_name)
7474 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7475 assert instance is not None
7476 _CheckNodeOnline(self, instance.primary_node)
7477 _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
7478 msg="cannot rename")
7479 self.instance = instance
7481 new_name = self.op.new_name
7482 if self.op.name_check:
7483 hostname = _CheckHostnameSane(self, new_name)
7484 new_name = self.op.new_name = hostname.name
7485 if (self.op.ip_check and
7486 netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
7487 raise errors.OpPrereqError("IP %s of instance %s already in use" %
7488 (hostname.ip, new_name),
7489 errors.ECODE_NOTUNIQUE)
7491 instance_list = self.cfg.GetInstanceList()
7492 if new_name in instance_list and new_name != instance.name:
7493 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7494 new_name, errors.ECODE_EXISTS)
7496 def Exec(self, feedback_fn):
7497 """Rename the instance.
7500 inst = self.instance
7501 old_name = inst.name
7503 rename_file_storage = False
7504 if (inst.disk_template in constants.DTS_FILEBASED and
7505 self.op.new_name != inst.name):
7506 old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7507 rename_file_storage = True
7509 self.cfg.RenameInstance(inst.name, self.op.new_name)
7510 # Change the instance lock. This is definitely safe while we hold the BGL.
7511 # Otherwise the new lock would have to be added in acquired mode.
7513 self.glm.remove(locking.LEVEL_INSTANCE, old_name)
7514 self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
7516 # re-read the instance from the configuration after rename
7517 inst = self.cfg.GetInstanceInfo(self.op.new_name)
7519 if rename_file_storage:
7520 new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
7521 result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
7522 old_file_storage_dir,
7523 new_file_storage_dir)
7524 result.Raise("Could not rename on node %s directory '%s' to '%s'"
7525 " (but the instance has been renamed in Ganeti)" %
7526 (inst.primary_node, old_file_storage_dir,
7527 new_file_storage_dir))
7529 _StartInstanceDisks(self, inst, None)
7530 # update info on disks
7531 info = _GetInstanceInfoText(inst)
7532 for (idx, disk) in enumerate(inst.disks):
7533 for node in inst.all_nodes:
7534 self.cfg.SetDiskID(disk, node)
7535 result = self.rpc.call_blockdev_setinfo(node, disk, info)
7537 self.LogWarning("Error setting info on node %s for disk %s: %s",
7538 node, idx, result.fail_msg)
7540 result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
7541 old_name, self.op.debug_level)
7542 msg = result.fail_msg
7544 msg = ("Could not run OS rename script for instance %s on node %s"
7545 " (but the instance has been renamed in Ganeti): %s" %
7546 (inst.name, inst.primary_node, msg))
7547 self.proc.LogWarning(msg)
7549 _ShutdownInstanceDisks(self, inst)
7554 class LUInstanceRemove(LogicalUnit):
7555 """Remove an instance.
7558 HPATH = "instance-remove"
7559 HTYPE = constants.HTYPE_INSTANCE
7562 def ExpandNames(self):
7563 self._ExpandAndLockInstance()
7564 self.needed_locks[locking.LEVEL_NODE] = []
7565 self.needed_locks[locking.LEVEL_NODE_RES] = []
7566 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7568 def DeclareLocks(self, level):
7569 if level == locking.LEVEL_NODE:
7570 self._LockInstancesNodes()
7571 elif level == locking.LEVEL_NODE_RES:
7573 self.needed_locks[locking.LEVEL_NODE_RES] = \
7574 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7576 def BuildHooksEnv(self):
7579 This runs on master, primary and secondary nodes of the instance.
7582 env = _BuildInstanceHookEnvByObject(self, self.instance)
7583 env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
7586 def BuildHooksNodes(self):
7587 """Build hooks nodes.
7590 nl = [self.cfg.GetMasterNode()]
7591 nl_post = list(self.instance.all_nodes) + nl
7592 return (nl, nl_post)
7594 def CheckPrereq(self):
7595 """Check prerequisites.
7597 This checks that the instance is in the cluster.
7600 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7601 assert self.instance is not None, \
7602 "Cannot retrieve locked instance %s" % self.op.instance_name
7604 def Exec(self, feedback_fn):
7605 """Remove the instance.
7608 instance = self.instance
7609 logging.info("Shutting down instance %s on node %s",
7610 instance.name, instance.primary_node)
7612 result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
7613 self.op.shutdown_timeout)
7614 msg = result.fail_msg
7616 if self.op.ignore_failures:
7617 feedback_fn("Warning: can't shutdown instance: %s" % msg)
7619 raise errors.OpExecError("Could not shutdown instance %s on"
7621 (instance.name, instance.primary_node, msg))
7623 assert (self.owned_locks(locking.LEVEL_NODE) ==
7624 self.owned_locks(locking.LEVEL_NODE_RES))
7625 assert not (set(instance.all_nodes) -
7626 self.owned_locks(locking.LEVEL_NODE)), \
7627 "Not owning correct locks"
7629 _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
7632 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
7633 """Utility function to remove an instance.
7636 logging.info("Removing block devices for instance %s", instance.name)
7638 if not _RemoveDisks(lu, instance, ignore_failures=ignore_failures):
7639 if not ignore_failures:
7640 raise errors.OpExecError("Can't remove instance's disks")
7641 feedback_fn("Warning: can't remove instance's disks")
7643 logging.info("Removing instance %s out of cluster config", instance.name)
7645 lu.cfg.RemoveInstance(instance.name)
7647 assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
7648 "Instance lock removal conflict"
7650 # Remove lock for the instance
7651 lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
7654 class LUInstanceQuery(NoHooksLU):
7655 """Logical unit for querying instances.
7658 # pylint: disable=W0142
7661 def CheckArguments(self):
7662 self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
7663 self.op.output_fields, self.op.use_locking)
7665 def ExpandNames(self):
7666 self.iq.ExpandNames(self)
7668 def DeclareLocks(self, level):
7669 self.iq.DeclareLocks(self, level)
7671 def Exec(self, feedback_fn):
7672 return self.iq.OldStyleQuery(self)
7675 class LUInstanceFailover(LogicalUnit):
7676 """Failover an instance.
7679 HPATH = "instance-failover"
7680 HTYPE = constants.HTYPE_INSTANCE
7683 def CheckArguments(self):
7684 """Check the arguments.
7687 self.iallocator = getattr(self.op, "iallocator", None)
7688 self.target_node = getattr(self.op, "target_node", None)
7690 def ExpandNames(self):
7691 self._ExpandAndLockInstance()
7693 if self.op.target_node is not None:
7694 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7696 self.needed_locks[locking.LEVEL_NODE] = []
7697 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7699 self.needed_locks[locking.LEVEL_NODE_RES] = []
7700 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
7702 ignore_consistency = self.op.ignore_consistency
7703 shutdown_timeout = self.op.shutdown_timeout
7704 self._migrater = TLMigrateInstance(self, self.op.instance_name,
7707 ignore_consistency=ignore_consistency,
7708 shutdown_timeout=shutdown_timeout,
7709 ignore_ipolicy=self.op.ignore_ipolicy)
7710 self.tasklets = [self._migrater]
7712 def DeclareLocks(self, level):
7713 if level == locking.LEVEL_NODE:
7714 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7715 if instance.disk_template in constants.DTS_EXT_MIRROR:
7716 if self.op.target_node is None:
7717 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7719 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7720 self.op.target_node]
7721 del self.recalculate_locks[locking.LEVEL_NODE]
7723 self._LockInstancesNodes()
7724 elif level == locking.LEVEL_NODE_RES:
7726 self.needed_locks[locking.LEVEL_NODE_RES] = \
7727 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7729 def BuildHooksEnv(self):
7732 This runs on master, primary and secondary nodes of the instance.
7735 instance = self._migrater.instance
7736 source_node = instance.primary_node
7737 target_node = self.op.target_node
7739 "IGNORE_CONSISTENCY": self.op.ignore_consistency,
7740 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7741 "OLD_PRIMARY": source_node,
7742 "NEW_PRIMARY": target_node,
7745 if instance.disk_template in constants.DTS_INT_MIRROR:
7746 env["OLD_SECONDARY"] = instance.secondary_nodes[0]
7747 env["NEW_SECONDARY"] = source_node
7749 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7751 env.update(_BuildInstanceHookEnvByObject(self, instance))
7755 def BuildHooksNodes(self):
7756 """Build hooks nodes.
7759 instance = self._migrater.instance
7760 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7761 return (nl, nl + [instance.primary_node])
7764 class LUInstanceMigrate(LogicalUnit):
7765 """Migrate an instance.
7767 This is migration without shutting down, compared to the failover,
7768 which is done with shutdown.
7771 HPATH = "instance-migrate"
7772 HTYPE = constants.HTYPE_INSTANCE
7775 def ExpandNames(self):
7776 self._ExpandAndLockInstance()
7778 if self.op.target_node is not None:
7779 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7781 self.needed_locks[locking.LEVEL_NODE] = []
7782 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7784 self.needed_locks[locking.LEVEL_NODE] = []
7785 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7788 TLMigrateInstance(self, self.op.instance_name,
7789 cleanup=self.op.cleanup,
7791 fallback=self.op.allow_failover,
7792 allow_runtime_changes=self.op.allow_runtime_changes,
7793 ignore_ipolicy=self.op.ignore_ipolicy)
7794 self.tasklets = [self._migrater]
7796 def DeclareLocks(self, level):
7797 if level == locking.LEVEL_NODE:
7798 instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7799 if instance.disk_template in constants.DTS_EXT_MIRROR:
7800 if self.op.target_node is None:
7801 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7803 self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7804 self.op.target_node]
7805 del self.recalculate_locks[locking.LEVEL_NODE]
7807 self._LockInstancesNodes()
7808 elif level == locking.LEVEL_NODE_RES:
7810 self.needed_locks[locking.LEVEL_NODE_RES] = \
7811 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7813 def BuildHooksEnv(self):
7816 This runs on master, primary and secondary nodes of the instance.
7819 instance = self._migrater.instance
7820 source_node = instance.primary_node
7821 target_node = self.op.target_node
7822 env = _BuildInstanceHookEnvByObject(self, instance)
7824 "MIGRATE_LIVE": self._migrater.live,
7825 "MIGRATE_CLEANUP": self.op.cleanup,
7826 "OLD_PRIMARY": source_node,
7827 "NEW_PRIMARY": target_node,
7828 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
7831 if instance.disk_template in constants.DTS_INT_MIRROR:
7832 env["OLD_SECONDARY"] = target_node
7833 env["NEW_SECONDARY"] = source_node
7835 env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7839 def BuildHooksNodes(self):
7840 """Build hooks nodes.
7843 instance = self._migrater.instance
7844 nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7845 return (nl, nl + [instance.primary_node])
7848 class LUInstanceMove(LogicalUnit):
7849 """Move an instance by data-copying.
7852 HPATH = "instance-move"
7853 HTYPE = constants.HTYPE_INSTANCE
7856 def ExpandNames(self):
7857 self._ExpandAndLockInstance()
7858 target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7859 self.op.target_node = target_node
7860 self.needed_locks[locking.LEVEL_NODE] = [target_node]
7861 self.needed_locks[locking.LEVEL_NODE_RES] = []
7862 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7864 def DeclareLocks(self, level):
7865 if level == locking.LEVEL_NODE:
7866 self._LockInstancesNodes(primary_only=True)
7867 elif level == locking.LEVEL_NODE_RES:
7869 self.needed_locks[locking.LEVEL_NODE_RES] = \
7870 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
7872 def BuildHooksEnv(self):
7875 This runs on master, primary and secondary nodes of the instance.
7879 "TARGET_NODE": self.op.target_node,
7880 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7882 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7885 def BuildHooksNodes(self):
7886 """Build hooks nodes.
7890 self.cfg.GetMasterNode(),
7891 self.instance.primary_node,
7892 self.op.target_node,
7896 def CheckPrereq(self):
7897 """Check prerequisites.
7899 This checks that the instance is in the cluster.
7902 self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7903 assert self.instance is not None, \
7904 "Cannot retrieve locked instance %s" % self.op.instance_name
7906 node = self.cfg.GetNodeInfo(self.op.target_node)
7907 assert node is not None, \
7908 "Cannot retrieve locked node %s" % self.op.target_node
7910 self.target_node = target_node = node.name
7912 if target_node == instance.primary_node:
7913 raise errors.OpPrereqError("Instance %s is already on the node %s" %
7914 (instance.name, target_node),
7917 bep = self.cfg.GetClusterInfo().FillBE(instance)
7919 for idx, dsk in enumerate(instance.disks):
7920 if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7921 raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7922 " cannot copy" % idx, errors.ECODE_STATE)
7924 _CheckNodeOnline(self, target_node)
7925 _CheckNodeNotDrained(self, target_node)
7926 _CheckNodeVmCapable(self, target_node)
7927 cluster = self.cfg.GetClusterInfo()
7928 group_info = self.cfg.GetNodeGroup(node.group)
7929 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
7930 _CheckTargetNodeIPolicy(self, ipolicy, instance, node,
7931 ignore=self.op.ignore_ipolicy)
7933 if instance.admin_state == constants.ADMINST_UP:
7934 # check memory requirements on the secondary node
7935 _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7936 instance.name, bep[constants.BE_MAXMEM],
7937 instance.hypervisor)
7939 self.LogInfo("Not checking memory on the secondary node as"
7940 " instance will not be started")
7942 # check bridge existance
7943 _CheckInstanceBridgesExist(self, instance, node=target_node)
7945 def Exec(self, feedback_fn):
7946 """Move an instance.
7948 The move is done by shutting it down on its present node, copying
7949 the data over (slow) and starting it on the new node.
7952 instance = self.instance
7954 source_node = instance.primary_node
7955 target_node = self.target_node
7957 self.LogInfo("Shutting down instance %s on source node %s",
7958 instance.name, source_node)
7960 assert (self.owned_locks(locking.LEVEL_NODE) ==
7961 self.owned_locks(locking.LEVEL_NODE_RES))
7963 result = self.rpc.call_instance_shutdown(source_node, instance,
7964 self.op.shutdown_timeout)
7965 msg = result.fail_msg
7967 if self.op.ignore_consistency:
7968 self.proc.LogWarning("Could not shutdown instance %s on node %s."
7969 " Proceeding anyway. Please make sure node"
7970 " %s is down. Error details: %s",
7971 instance.name, source_node, source_node, msg)
7973 raise errors.OpExecError("Could not shutdown instance %s on"
7975 (instance.name, source_node, msg))
7977 # create the target disks
7979 _CreateDisks(self, instance, target_node=target_node)
7980 except errors.OpExecError:
7981 self.LogWarning("Device creation failed, reverting...")
7983 _RemoveDisks(self, instance, target_node=target_node)
7985 self.cfg.ReleaseDRBDMinors(instance.name)
7988 cluster_name = self.cfg.GetClusterInfo().cluster_name
7991 # activate, get path, copy the data over
7992 for idx, disk in enumerate(instance.disks):
7993 self.LogInfo("Copying data for disk %d", idx)
7994 result = self.rpc.call_blockdev_assemble(target_node, (disk, instance),
7995 instance.name, True, idx)
7997 self.LogWarning("Can't assemble newly created disk %d: %s",
7998 idx, result.fail_msg)
7999 errs.append(result.fail_msg)
8001 dev_path = result.payload
8002 result = self.rpc.call_blockdev_export(source_node, (disk, instance),
8003 target_node, dev_path,
8006 self.LogWarning("Can't copy data over for disk %d: %s",
8007 idx, result.fail_msg)
8008 errs.append(result.fail_msg)
8012 self.LogWarning("Some disks failed to copy, aborting")
8014 _RemoveDisks(self, instance, target_node=target_node)
8016 self.cfg.ReleaseDRBDMinors(instance.name)
8017 raise errors.OpExecError("Errors during disk copy: %s" %
8020 instance.primary_node = target_node
8021 self.cfg.Update(instance, feedback_fn)
8023 self.LogInfo("Removing the disks on the original node")
8024 _RemoveDisks(self, instance, target_node=source_node)
8026 # Only start the instance if it's marked as up
8027 if instance.admin_state == constants.ADMINST_UP:
8028 self.LogInfo("Starting instance %s on node %s",
8029 instance.name, target_node)
8031 disks_ok, _ = _AssembleInstanceDisks(self, instance,
8032 ignore_secondaries=True)
8034 _ShutdownInstanceDisks(self, instance)
8035 raise errors.OpExecError("Can't activate the instance's disks")
8037 result = self.rpc.call_instance_start(target_node,
8038 (instance, None, None), False)
8039 msg = result.fail_msg
8041 _ShutdownInstanceDisks(self, instance)
8042 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8043 (instance.name, target_node, msg))
8046 class LUNodeMigrate(LogicalUnit):
8047 """Migrate all instances from a node.
8050 HPATH = "node-migrate"
8051 HTYPE = constants.HTYPE_NODE
8054 def CheckArguments(self):
8057 def ExpandNames(self):
8058 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8060 self.share_locks = _ShareAll()
8061 self.needed_locks = {
8062 locking.LEVEL_NODE: [self.op.node_name],
8065 def BuildHooksEnv(self):
8068 This runs on the master, the primary and all the secondaries.
8072 "NODE_NAME": self.op.node_name,
8073 "ALLOW_RUNTIME_CHANGES": self.op.allow_runtime_changes,
8076 def BuildHooksNodes(self):
8077 """Build hooks nodes.
8080 nl = [self.cfg.GetMasterNode()]
8083 def CheckPrereq(self):
8086 def Exec(self, feedback_fn):
8087 # Prepare jobs for migration instances
8088 allow_runtime_changes = self.op.allow_runtime_changes
8090 [opcodes.OpInstanceMigrate(instance_name=inst.name,
8093 iallocator=self.op.iallocator,
8094 target_node=self.op.target_node,
8095 allow_runtime_changes=allow_runtime_changes,
8096 ignore_ipolicy=self.op.ignore_ipolicy)]
8097 for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
8100 # TODO: Run iallocator in this opcode and pass correct placement options to
8101 # OpInstanceMigrate. Since other jobs can modify the cluster between
8102 # running the iallocator and the actual migration, a good consistency model
8103 # will have to be found.
8105 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
8106 frozenset([self.op.node_name]))
8108 return ResultWithJobs(jobs)
8111 class TLMigrateInstance(Tasklet):
8112 """Tasklet class for instance migration.
8115 @ivar live: whether the migration will be done live or non-live;
8116 this variable is initalized only after CheckPrereq has run
8117 @type cleanup: boolean
8118 @ivar cleanup: Wheater we cleanup from a failed migration
8119 @type iallocator: string
8120 @ivar iallocator: The iallocator used to determine target_node
8121 @type target_node: string
8122 @ivar target_node: If given, the target_node to reallocate the instance to
8123 @type failover: boolean
8124 @ivar failover: Whether operation results in failover or migration
8125 @type fallback: boolean
8126 @ivar fallback: Whether fallback to failover is allowed if migration not
8128 @type ignore_consistency: boolean
8129 @ivar ignore_consistency: Wheter we should ignore consistency between source
8131 @type shutdown_timeout: int
8132 @ivar shutdown_timeout: In case of failover timeout of the shutdown
8133 @type ignore_ipolicy: bool
8134 @ivar ignore_ipolicy: If true, we can ignore instance policy when migrating
8139 _MIGRATION_POLL_INTERVAL = 1 # seconds
8140 _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
8142 def __init__(self, lu, instance_name, cleanup=False,
8143 failover=False, fallback=False,
8144 ignore_consistency=False,
8145 allow_runtime_changes=True,
8146 shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT,
8147 ignore_ipolicy=False):
8148 """Initializes this class.
8151 Tasklet.__init__(self, lu)
8154 self.instance_name = instance_name
8155 self.cleanup = cleanup
8156 self.live = False # will be overridden later
8157 self.failover = failover
8158 self.fallback = fallback
8159 self.ignore_consistency = ignore_consistency
8160 self.shutdown_timeout = shutdown_timeout
8161 self.ignore_ipolicy = ignore_ipolicy
8162 self.allow_runtime_changes = allow_runtime_changes
8164 def CheckPrereq(self):
8165 """Check prerequisites.
8167 This checks that the instance is in the cluster.
8170 instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
8171 instance = self.cfg.GetInstanceInfo(instance_name)
8172 assert instance is not None
8173 self.instance = instance
8174 cluster = self.cfg.GetClusterInfo()
8176 if (not self.cleanup and
8177 not instance.admin_state == constants.ADMINST_UP and
8178 not self.failover and self.fallback):
8179 self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
8180 " switching to failover")
8181 self.failover = True
8183 if instance.disk_template not in constants.DTS_MIRRORED:
8188 raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
8189 " %s" % (instance.disk_template, text),
8192 if instance.disk_template in constants.DTS_EXT_MIRROR:
8193 _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
8195 if self.lu.op.iallocator:
8196 self._RunAllocator()
8198 # We set set self.target_node as it is required by
8200 self.target_node = self.lu.op.target_node
8202 # Check that the target node is correct in terms of instance policy
8203 nodeinfo = self.cfg.GetNodeInfo(self.target_node)
8204 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8205 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8207 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8208 ignore=self.ignore_ipolicy)
8210 # self.target_node is already populated, either directly or by the
8212 target_node = self.target_node
8213 if self.target_node == instance.primary_node:
8214 raise errors.OpPrereqError("Cannot migrate instance %s"
8215 " to its primary (%s)" %
8216 (instance.name, instance.primary_node),
8219 if len(self.lu.tasklets) == 1:
8220 # It is safe to release locks only when we're the only tasklet
8222 _ReleaseLocks(self.lu, locking.LEVEL_NODE,
8223 keep=[instance.primary_node, self.target_node])
8226 secondary_nodes = instance.secondary_nodes
8227 if not secondary_nodes:
8228 raise errors.ConfigurationError("No secondary node but using"
8229 " %s disk template" %
8230 instance.disk_template)
8231 target_node = secondary_nodes[0]
8232 if self.lu.op.iallocator or (self.lu.op.target_node and
8233 self.lu.op.target_node != target_node):
8235 text = "failed over"
8238 raise errors.OpPrereqError("Instances with disk template %s cannot"
8239 " be %s to arbitrary nodes"
8240 " (neither an iallocator nor a target"
8241 " node can be passed)" %
8242 (instance.disk_template, text),
8244 nodeinfo = self.cfg.GetNodeInfo(target_node)
8245 group_info = self.cfg.GetNodeGroup(nodeinfo.group)
8246 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
8248 _CheckTargetNodeIPolicy(self.lu, ipolicy, instance, nodeinfo,
8249 ignore=self.ignore_ipolicy)
8251 i_be = cluster.FillBE(instance)
8253 # check memory requirements on the secondary node
8254 if (not self.cleanup and
8255 (not self.failover or instance.admin_state == constants.ADMINST_UP)):
8256 self.tgt_free_mem = _CheckNodeFreeMemory(self.lu, target_node,
8257 "migrating instance %s" %
8259 i_be[constants.BE_MINMEM],
8260 instance.hypervisor)
8262 self.lu.LogInfo("Not checking memory on the secondary node as"
8263 " instance will not be started")
8265 # check if failover must be forced instead of migration
8266 if (not self.cleanup and not self.failover and
8267 i_be[constants.BE_ALWAYS_FAILOVER]):
8268 self.lu.LogInfo("Instance configured to always failover; fallback"
8270 self.failover = True
8272 # check bridge existance
8273 _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
8275 if not self.cleanup:
8276 _CheckNodeNotDrained(self.lu, target_node)
8277 if not self.failover:
8278 result = self.rpc.call_instance_migratable(instance.primary_node,
8280 if result.fail_msg and self.fallback:
8281 self.lu.LogInfo("Can't migrate, instance offline, fallback to"
8283 self.failover = True
8285 result.Raise("Can't migrate, please use failover",
8286 prereq=True, ecode=errors.ECODE_STATE)
8288 assert not (self.failover and self.cleanup)
8290 if not self.failover:
8291 if self.lu.op.live is not None and self.lu.op.mode is not None:
8292 raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
8293 " parameters are accepted",
8295 if self.lu.op.live is not None:
8297 self.lu.op.mode = constants.HT_MIGRATION_LIVE
8299 self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
8300 # reset the 'live' parameter to None so that repeated
8301 # invocations of CheckPrereq do not raise an exception
8302 self.lu.op.live = None
8303 elif self.lu.op.mode is None:
8304 # read the default value from the hypervisor
8305 i_hv = cluster.FillHV(self.instance, skip_globals=False)
8306 self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
8308 self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
8310 # Failover is never live
8313 if not (self.failover or self.cleanup):
8314 remote_info = self.rpc.call_instance_info(instance.primary_node,
8316 instance.hypervisor)
8317 remote_info.Raise("Error checking instance on node %s" %
8318 instance.primary_node)
8319 instance_running = bool(remote_info.payload)
8320 if instance_running:
8321 self.current_mem = int(remote_info.payload["memory"])
8323 def _RunAllocator(self):
8324 """Run the allocator based on input opcode.
8327 # FIXME: add a self.ignore_ipolicy option
8328 req = iallocator.IAReqRelocate(name=self.instance_name,
8329 relocate_from=[self.instance.primary_node])
8330 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
8332 ial.Run(self.lu.op.iallocator)
8335 raise errors.OpPrereqError("Can't compute nodes using"
8336 " iallocator '%s': %s" %
8337 (self.lu.op.iallocator, ial.info),
8339 self.target_node = ial.result[0]
8340 self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8341 self.instance_name, self.lu.op.iallocator,
8342 utils.CommaJoin(ial.result))
8344 def _WaitUntilSync(self):
8345 """Poll with custom rpc for disk sync.
8347 This uses our own step-based rpc call.
8350 self.feedback_fn("* wait until resync is done")
8354 result = self.rpc.call_drbd_wait_sync(self.all_nodes,
8356 (self.instance.disks,
8359 for node, nres in result.items():
8360 nres.Raise("Cannot resync disks on node %s" % node)
8361 node_done, node_percent = nres.payload
8362 all_done = all_done and node_done
8363 if node_percent is not None:
8364 min_percent = min(min_percent, node_percent)
8366 if min_percent < 100:
8367 self.feedback_fn(" - progress: %.1f%%" % min_percent)
8370 def _EnsureSecondary(self, node):
8371 """Demote a node to secondary.
8374 self.feedback_fn("* switching node %s to secondary mode" % node)
8376 for dev in self.instance.disks:
8377 self.cfg.SetDiskID(dev, node)
8379 result = self.rpc.call_blockdev_close(node, self.instance.name,
8380 self.instance.disks)
8381 result.Raise("Cannot change disk to secondary on node %s" % node)
8383 def _GoStandalone(self):
8384 """Disconnect from the network.
8387 self.feedback_fn("* changing into standalone mode")
8388 result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
8389 self.instance.disks)
8390 for node, nres in result.items():
8391 nres.Raise("Cannot disconnect disks node %s" % node)
8393 def _GoReconnect(self, multimaster):
8394 """Reconnect to the network.
8400 msg = "single-master"
8401 self.feedback_fn("* changing disks into %s mode" % msg)
8402 result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
8403 (self.instance.disks, self.instance),
8404 self.instance.name, multimaster)
8405 for node, nres in result.items():
8406 nres.Raise("Cannot change disks config on node %s" % node)
8408 def _ExecCleanup(self):
8409 """Try to cleanup after a failed migration.
8411 The cleanup is done by:
8412 - check that the instance is running only on one node
8413 (and update the config if needed)
8414 - change disks on its secondary node to secondary
8415 - wait until disks are fully synchronized
8416 - disconnect from the network
8417 - change disks into single-master mode
8418 - wait again until disks are fully synchronized
8421 instance = self.instance
8422 target_node = self.target_node
8423 source_node = self.source_node
8425 # check running on only one node
8426 self.feedback_fn("* checking where the instance actually runs"
8427 " (if this hangs, the hypervisor might be in"
8429 ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
8430 for node, result in ins_l.items():
8431 result.Raise("Can't contact node %s" % node)
8433 runningon_source = instance.name in ins_l[source_node].payload
8434 runningon_target = instance.name in ins_l[target_node].payload
8436 if runningon_source and runningon_target:
8437 raise errors.OpExecError("Instance seems to be running on two nodes,"
8438 " or the hypervisor is confused; you will have"
8439 " to ensure manually that it runs only on one"
8440 " and restart this operation")
8442 if not (runningon_source or runningon_target):
8443 raise errors.OpExecError("Instance does not seem to be running at all;"
8444 " in this case it's safer to repair by"
8445 " running 'gnt-instance stop' to ensure disk"
8446 " shutdown, and then restarting it")
8448 if runningon_target:
8449 # the migration has actually succeeded, we need to update the config
8450 self.feedback_fn("* instance running on secondary node (%s),"
8451 " updating config" % target_node)
8452 instance.primary_node = target_node
8453 self.cfg.Update(instance, self.feedback_fn)
8454 demoted_node = source_node
8456 self.feedback_fn("* instance confirmed to be running on its"
8457 " primary node (%s)" % source_node)
8458 demoted_node = target_node
8460 if instance.disk_template in constants.DTS_INT_MIRROR:
8461 self._EnsureSecondary(demoted_node)
8463 self._WaitUntilSync()
8464 except errors.OpExecError:
8465 # we ignore here errors, since if the device is standalone, it
8466 # won't be able to sync
8468 self._GoStandalone()
8469 self._GoReconnect(False)
8470 self._WaitUntilSync()
8472 self.feedback_fn("* done")
8474 def _RevertDiskStatus(self):
8475 """Try to revert the disk status after a failed migration.
8478 target_node = self.target_node
8479 if self.instance.disk_template in constants.DTS_EXT_MIRROR:
8483 self._EnsureSecondary(target_node)
8484 self._GoStandalone()
8485 self._GoReconnect(False)
8486 self._WaitUntilSync()
8487 except errors.OpExecError, err:
8488 self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
8489 " please try to recover the instance manually;"
8490 " error '%s'" % str(err))
8492 def _AbortMigration(self):
8493 """Call the hypervisor code to abort a started migration.
8496 instance = self.instance
8497 target_node = self.target_node
8498 source_node = self.source_node
8499 migration_info = self.migration_info
8501 abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
8505 abort_msg = abort_result.fail_msg
8507 logging.error("Aborting migration failed on target node %s: %s",
8508 target_node, abort_msg)
8509 # Don't raise an exception here, as we stil have to try to revert the
8510 # disk status, even if this step failed.
8512 abort_result = self.rpc.call_instance_finalize_migration_src(
8513 source_node, instance, False, self.live)
8514 abort_msg = abort_result.fail_msg
8516 logging.error("Aborting migration failed on source node %s: %s",
8517 source_node, abort_msg)
8519 def _ExecMigration(self):
8520 """Migrate an instance.
8522 The migrate is done by:
8523 - change the disks into dual-master mode
8524 - wait until disks are fully synchronized again
8525 - migrate the instance
8526 - change disks on the new secondary node (the old primary) to secondary
8527 - wait until disks are fully synchronized
8528 - change disks into single-master mode
8531 instance = self.instance
8532 target_node = self.target_node
8533 source_node = self.source_node
8535 # Check for hypervisor version mismatch and warn the user.
8536 nodeinfo = self.rpc.call_node_info([source_node, target_node],
8537 None, [self.instance.hypervisor])
8538 for ninfo in nodeinfo.values():
8539 ninfo.Raise("Unable to retrieve node information from node '%s'" %
8541 (_, _, (src_info, )) = nodeinfo[source_node].payload
8542 (_, _, (dst_info, )) = nodeinfo[target_node].payload
8544 if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
8545 (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
8546 src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
8547 dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
8548 if src_version != dst_version:
8549 self.feedback_fn("* warning: hypervisor version mismatch between"
8550 " source (%s) and target (%s) node" %
8551 (src_version, dst_version))
8553 self.feedback_fn("* checking disk consistency between source and target")
8554 for (idx, dev) in enumerate(instance.disks):
8555 if not _CheckDiskConsistency(self.lu, instance, dev, target_node, False):
8556 raise errors.OpExecError("Disk %s is degraded or not fully"
8557 " synchronized on target node,"
8558 " aborting migration" % idx)
8560 if self.current_mem > self.tgt_free_mem:
8561 if not self.allow_runtime_changes:
8562 raise errors.OpExecError("Memory ballooning not allowed and not enough"
8563 " free memory to fit instance %s on target"
8564 " node %s (have %dMB, need %dMB)" %
8565 (instance.name, target_node,
8566 self.tgt_free_mem, self.current_mem))
8567 self.feedback_fn("* setting instance memory to %s" % self.tgt_free_mem)
8568 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
8571 rpcres.Raise("Cannot modify instance runtime memory")
8573 # First get the migration information from the remote node
8574 result = self.rpc.call_migration_info(source_node, instance)
8575 msg = result.fail_msg
8577 log_err = ("Failed fetching source migration information from %s: %s" %
8579 logging.error(log_err)
8580 raise errors.OpExecError(log_err)
8582 self.migration_info = migration_info = result.payload
8584 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8585 # Then switch the disks to master/master mode
8586 self._EnsureSecondary(target_node)
8587 self._GoStandalone()
8588 self._GoReconnect(True)
8589 self._WaitUntilSync()
8591 self.feedback_fn("* preparing %s to accept the instance" % target_node)
8592 result = self.rpc.call_accept_instance(target_node,
8595 self.nodes_ip[target_node])
8597 msg = result.fail_msg
8599 logging.error("Instance pre-migration failed, trying to revert"
8600 " disk status: %s", msg)
8601 self.feedback_fn("Pre-migration failed, aborting")
8602 self._AbortMigration()
8603 self._RevertDiskStatus()
8604 raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
8605 (instance.name, msg))
8607 self.feedback_fn("* migrating instance to %s" % target_node)
8608 result = self.rpc.call_instance_migrate(source_node, instance,
8609 self.nodes_ip[target_node],
8611 msg = result.fail_msg
8613 logging.error("Instance migration failed, trying to revert"
8614 " disk status: %s", msg)
8615 self.feedback_fn("Migration failed, aborting")
8616 self._AbortMigration()
8617 self._RevertDiskStatus()
8618 raise errors.OpExecError("Could not migrate instance %s: %s" %
8619 (instance.name, msg))
8621 self.feedback_fn("* starting memory transfer")
8622 last_feedback = time.time()
8624 result = self.rpc.call_instance_get_migration_status(source_node,
8626 msg = result.fail_msg
8627 ms = result.payload # MigrationStatus instance
8628 if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
8629 logging.error("Instance migration failed, trying to revert"
8630 " disk status: %s", msg)
8631 self.feedback_fn("Migration failed, aborting")
8632 self._AbortMigration()
8633 self._RevertDiskStatus()
8634 raise errors.OpExecError("Could not migrate instance %s: %s" %
8635 (instance.name, msg))
8637 if result.payload.status != constants.HV_MIGRATION_ACTIVE:
8638 self.feedback_fn("* memory transfer complete")
8641 if (utils.TimeoutExpired(last_feedback,
8642 self._MIGRATION_FEEDBACK_INTERVAL) and
8643 ms.transferred_ram is not None):
8644 mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
8645 self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
8646 last_feedback = time.time()
8648 time.sleep(self._MIGRATION_POLL_INTERVAL)
8650 result = self.rpc.call_instance_finalize_migration_src(source_node,
8654 msg = result.fail_msg
8656 logging.error("Instance migration succeeded, but finalization failed"
8657 " on the source node: %s", msg)
8658 raise errors.OpExecError("Could not finalize instance migration: %s" %
8661 instance.primary_node = target_node
8663 # distribute new instance config to the other nodes
8664 self.cfg.Update(instance, self.feedback_fn)
8666 result = self.rpc.call_instance_finalize_migration_dst(target_node,
8670 msg = result.fail_msg
8672 logging.error("Instance migration succeeded, but finalization failed"
8673 " on the target node: %s", msg)
8674 raise errors.OpExecError("Could not finalize instance migration: %s" %
8677 if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
8678 self._EnsureSecondary(source_node)
8679 self._WaitUntilSync()
8680 self._GoStandalone()
8681 self._GoReconnect(False)
8682 self._WaitUntilSync()
8684 # If the instance's disk template is `rbd' and there was a successful
8685 # migration, unmap the device from the source node.
8686 if self.instance.disk_template == constants.DT_RBD:
8687 disks = _ExpandCheckDisks(instance, instance.disks)
8688 self.feedback_fn("* unmapping instance's disks from %s" % source_node)
8690 result = self.rpc.call_blockdev_shutdown(source_node, (disk, instance))
8691 msg = result.fail_msg
8693 logging.error("Migration was successful, but couldn't unmap the"
8694 " block device %s on source node %s: %s",
8695 disk.iv_name, source_node, msg)
8696 logging.error("You need to unmap the device %s manually on %s",
8697 disk.iv_name, source_node)
8699 self.feedback_fn("* done")
8701 def _ExecFailover(self):
8702 """Failover an instance.
8704 The failover is done by shutting it down on its present node and
8705 starting it on the secondary.
8708 instance = self.instance
8709 primary_node = self.cfg.GetNodeInfo(instance.primary_node)
8711 source_node = instance.primary_node
8712 target_node = self.target_node
8714 if instance.admin_state == constants.ADMINST_UP:
8715 self.feedback_fn("* checking disk consistency between source and target")
8716 for (idx, dev) in enumerate(instance.disks):
8717 # for drbd, these are drbd over lvm
8718 if not _CheckDiskConsistency(self.lu, instance, dev, target_node,
8720 if primary_node.offline:
8721 self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
8723 (primary_node.name, idx, target_node))
8724 elif not self.ignore_consistency:
8725 raise errors.OpExecError("Disk %s is degraded on target node,"
8726 " aborting failover" % idx)
8728 self.feedback_fn("* not checking disk consistency as instance is not"
8731 self.feedback_fn("* shutting down instance on source node")
8732 logging.info("Shutting down instance %s on node %s",
8733 instance.name, source_node)
8735 result = self.rpc.call_instance_shutdown(source_node, instance,
8736 self.shutdown_timeout)
8737 msg = result.fail_msg
8739 if self.ignore_consistency or primary_node.offline:
8740 self.lu.LogWarning("Could not shutdown instance %s on node %s,"
8741 " proceeding anyway; please make sure node"
8742 " %s is down; error details: %s",
8743 instance.name, source_node, source_node, msg)
8745 raise errors.OpExecError("Could not shutdown instance %s on"
8747 (instance.name, source_node, msg))
8749 self.feedback_fn("* deactivating the instance's disks on source node")
8750 if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
8751 raise errors.OpExecError("Can't shut down the instance's disks")
8753 instance.primary_node = target_node
8754 # distribute new instance config to the other nodes
8755 self.cfg.Update(instance, self.feedback_fn)
8757 # Only start the instance if it's marked as up
8758 if instance.admin_state == constants.ADMINST_UP:
8759 self.feedback_fn("* activating the instance's disks on target node %s" %
8761 logging.info("Starting instance %s on node %s",
8762 instance.name, target_node)
8764 disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
8765 ignore_secondaries=True)
8767 _ShutdownInstanceDisks(self.lu, instance)
8768 raise errors.OpExecError("Can't activate the instance's disks")
8770 self.feedback_fn("* starting the instance on the target node %s" %
8772 result = self.rpc.call_instance_start(target_node, (instance, None, None),
8774 msg = result.fail_msg
8776 _ShutdownInstanceDisks(self.lu, instance)
8777 raise errors.OpExecError("Could not start instance %s on node %s: %s" %
8778 (instance.name, target_node, msg))
8780 def Exec(self, feedback_fn):
8781 """Perform the migration.
8784 self.feedback_fn = feedback_fn
8785 self.source_node = self.instance.primary_node
8787 # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
8788 if self.instance.disk_template in constants.DTS_INT_MIRROR:
8789 self.target_node = self.instance.secondary_nodes[0]
8790 # Otherwise self.target_node has been populated either
8791 # directly, or through an iallocator.
8793 self.all_nodes = [self.source_node, self.target_node]
8794 self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
8795 in self.cfg.GetMultiNodeInfo(self.all_nodes))
8798 feedback_fn("Failover instance %s" % self.instance.name)
8799 self._ExecFailover()
8801 feedback_fn("Migrating instance %s" % self.instance.name)
8804 return self._ExecCleanup()
8806 return self._ExecMigration()
8809 def _CreateBlockDev(lu, node, instance, device, force_create, info,
8811 """Wrapper around L{_CreateBlockDevInner}.
8813 This method annotates the root device first.
8816 (disk,) = _AnnotateDiskParams(instance, [device], lu.cfg)
8817 return _CreateBlockDevInner(lu, node, instance, disk, force_create, info,
8821 def _CreateBlockDevInner(lu, node, instance, device, force_create,
8823 """Create a tree of block devices on a given node.
8825 If this device type has to be created on secondaries, create it and
8828 If not, just recurse to children keeping the same 'force' value.
8830 @attention: The device has to be annotated already.
8832 @param lu: the lu on whose behalf we execute
8833 @param node: the node on which to create the device
8834 @type instance: L{objects.Instance}
8835 @param instance: the instance which owns the device
8836 @type device: L{objects.Disk}
8837 @param device: the device to create
8838 @type force_create: boolean
8839 @param force_create: whether to force creation of this device; this
8840 will be change to True whenever we find a device which has
8841 CreateOnSecondary() attribute
8842 @param info: the extra 'metadata' we should attach to the device
8843 (this will be represented as a LVM tag)
8844 @type force_open: boolean
8845 @param force_open: this parameter will be passes to the
8846 L{backend.BlockdevCreate} function where it specifies
8847 whether we run on primary or not, and it affects both
8848 the child assembly and the device own Open() execution
8851 if device.CreateOnSecondary():
8855 for child in device.children:
8856 _CreateBlockDevInner(lu, node, instance, child, force_create,
8859 if not force_create:
8862 _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8865 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8866 """Create a single block device on a given node.
8868 This will not recurse over children of the device, so they must be
8871 @param lu: the lu on whose behalf we execute
8872 @param node: the node on which to create the device
8873 @type instance: L{objects.Instance}
8874 @param instance: the instance which owns the device
8875 @type device: L{objects.Disk}
8876 @param device: the device to create
8877 @param info: the extra 'metadata' we should attach to the device
8878 (this will be represented as a LVM tag)
8879 @type force_open: boolean
8880 @param force_open: this parameter will be passes to the
8881 L{backend.BlockdevCreate} function where it specifies
8882 whether we run on primary or not, and it affects both
8883 the child assembly and the device own Open() execution
8886 lu.cfg.SetDiskID(device, node)
8887 result = lu.rpc.call_blockdev_create(node, device, device.size,
8888 instance.name, force_open, info)
8889 result.Raise("Can't create block device %s on"
8890 " node %s for instance %s" % (device, node, instance.name))
8891 if device.physical_id is None:
8892 device.physical_id = result.payload
8895 def _GenerateUniqueNames(lu, exts):
8896 """Generate a suitable LV name.
8898 This will generate a logical volume name for the given instance.
8903 new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8904 results.append("%s%s" % (new_id, val))
8908 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8909 iv_name, p_minor, s_minor):
8910 """Generate a drbd8 device complete with its children.
8913 assert len(vgnames) == len(names) == 2
8914 port = lu.cfg.AllocatePort()
8915 shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8917 dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8918 logical_id=(vgnames[0], names[0]),
8920 dev_meta = objects.Disk(dev_type=constants.LD_LV,
8921 size=constants.DRBD_META_SIZE,
8922 logical_id=(vgnames[1], names[1]),
8924 drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8925 logical_id=(primary, secondary, port,
8928 children=[dev_data, dev_meta],
8929 iv_name=iv_name, params={})
8933 _DISK_TEMPLATE_NAME_PREFIX = {
8934 constants.DT_PLAIN: "",
8935 constants.DT_RBD: ".rbd",
8939 _DISK_TEMPLATE_DEVICE_TYPE = {
8940 constants.DT_PLAIN: constants.LD_LV,
8941 constants.DT_FILE: constants.LD_FILE,
8942 constants.DT_SHARED_FILE: constants.LD_FILE,
8943 constants.DT_BLOCK: constants.LD_BLOCKDEV,
8944 constants.DT_RBD: constants.LD_RBD,
8948 def _GenerateDiskTemplate(
8949 lu, template_name, instance_name, primary_node, secondary_nodes,
8950 disk_info, file_storage_dir, file_driver, base_index,
8951 feedback_fn, full_disk_params, _req_file_storage=opcodes.RequireFileStorage,
8952 _req_shr_file_storage=opcodes.RequireSharedFileStorage):
8953 """Generate the entire disk layout for a given template type.
8956 #TODO: compute space requirements
8958 vgname = lu.cfg.GetVGName()
8959 disk_count = len(disk_info)
8962 if template_name == constants.DT_DISKLESS:
8964 elif template_name == constants.DT_DRBD8:
8965 if len(secondary_nodes) != 1:
8966 raise errors.ProgrammerError("Wrong template configuration")
8967 remote_node = secondary_nodes[0]
8968 minors = lu.cfg.AllocateDRBDMinor(
8969 [primary_node, remote_node] * len(disk_info), instance_name)
8971 (drbd_params, _, _) = objects.Disk.ComputeLDParams(template_name,
8973 drbd_default_metavg = drbd_params[constants.LDP_DEFAULT_METAVG]
8976 for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8977 for i in range(disk_count)]):
8978 names.append(lv_prefix + "_data")
8979 names.append(lv_prefix + "_meta")
8980 for idx, disk in enumerate(disk_info):
8981 disk_index = idx + base_index
8982 data_vg = disk.get(constants.IDISK_VG, vgname)
8983 meta_vg = disk.get(constants.IDISK_METAVG, drbd_default_metavg)
8984 disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8985 disk[constants.IDISK_SIZE],
8987 names[idx * 2:idx * 2 + 2],
8988 "disk/%d" % disk_index,
8989 minors[idx * 2], minors[idx * 2 + 1])
8990 disk_dev.mode = disk[constants.IDISK_MODE]
8991 disks.append(disk_dev)
8994 raise errors.ProgrammerError("Wrong template configuration")
8996 if template_name == constants.DT_FILE:
8998 elif template_name == constants.DT_SHARED_FILE:
8999 _req_shr_file_storage()
9001 name_prefix = _DISK_TEMPLATE_NAME_PREFIX.get(template_name, None)
9002 if name_prefix is None:
9005 names = _GenerateUniqueNames(lu, ["%s.disk%s" %
9006 (name_prefix, base_index + i)
9007 for i in range(disk_count)])
9009 if template_name == constants.DT_PLAIN:
9010 def logical_id_fn(idx, _, disk):
9011 vg = disk.get(constants.IDISK_VG, vgname)
9012 return (vg, names[idx])
9013 elif template_name in (constants.DT_FILE, constants.DT_SHARED_FILE):
9015 lambda _, disk_index, disk: (file_driver,
9016 "%s/disk%d" % (file_storage_dir,
9018 elif template_name == constants.DT_BLOCK:
9020 lambda idx, disk_index, disk: (constants.BLOCKDEV_DRIVER_MANUAL,
9021 disk[constants.IDISK_ADOPT])
9022 elif template_name == constants.DT_RBD:
9023 logical_id_fn = lambda idx, _, disk: ("rbd", names[idx])
9025 raise errors.ProgrammerError("Unknown disk template '%s'" % template_name)
9027 dev_type = _DISK_TEMPLATE_DEVICE_TYPE[template_name]
9029 for idx, disk in enumerate(disk_info):
9030 disk_index = idx + base_index
9031 size = disk[constants.IDISK_SIZE]
9032 feedback_fn("* disk %s, size %s" %
9033 (disk_index, utils.FormatUnit(size, "h")))
9034 disks.append(objects.Disk(dev_type=dev_type, size=size,
9035 logical_id=logical_id_fn(idx, disk_index, disk),
9036 iv_name="disk/%d" % disk_index,
9037 mode=disk[constants.IDISK_MODE],
9043 def _GetInstanceInfoText(instance):
9044 """Compute that text that should be added to the disk's metadata.
9047 return "originstname+%s" % instance.name
9050 def _CalcEta(time_taken, written, total_size):
9051 """Calculates the ETA based on size written and total size.
9053 @param time_taken: The time taken so far
9054 @param written: amount written so far
9055 @param total_size: The total size of data to be written
9056 @return: The remaining time in seconds
9059 avg_time = time_taken / float(written)
9060 return (total_size - written) * avg_time
9063 def _WipeDisks(lu, instance, disks=None):
9064 """Wipes instance disks.
9066 @type lu: L{LogicalUnit}
9067 @param lu: the logical unit on whose behalf we execute
9068 @type instance: L{objects.Instance}
9069 @param instance: the instance whose disks we should create
9070 @return: the success of the wipe
9073 node = instance.primary_node
9076 disks = [(idx, disk, 0)
9077 for (idx, disk) in enumerate(instance.disks)]
9079 for (_, device, _) in disks:
9080 lu.cfg.SetDiskID(device, node)
9082 logging.info("Pausing synchronization of disks of instance '%s'",
9084 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9085 (map(compat.snd, disks),
9088 result.Raise("Failed to pause disk synchronization on node '%s'" % node)
9090 for idx, success in enumerate(result.payload):
9092 logging.warn("Pausing synchronization of disk %s of instance '%s'"
9093 " failed", idx, instance.name)
9096 for (idx, device, offset) in disks:
9097 # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
9098 # MAX_WIPE_CHUNK at max. Truncating to integer to avoid rounding errors.
9100 int(min(constants.MAX_WIPE_CHUNK,
9101 device.size / 100.0 * constants.MIN_WIPE_CHUNK_PERCENT))
9105 start_time = time.time()
9110 info_text = (" (from %s to %s)" %
9111 (utils.FormatUnit(offset, "h"),
9112 utils.FormatUnit(size, "h")))
9114 lu.LogInfo("* Wiping disk %s%s", idx, info_text)
9116 logging.info("Wiping disk %d for instance %s on node %s using"
9117 " chunk size %s", idx, instance.name, node, wipe_chunk_size)
9119 while offset < size:
9120 wipe_size = min(wipe_chunk_size, size - offset)
9122 logging.debug("Wiping disk %d, offset %s, chunk %s",
9123 idx, offset, wipe_size)
9125 result = lu.rpc.call_blockdev_wipe(node, (device, instance), offset,
9127 result.Raise("Could not wipe disk %d at offset %d for size %d" %
9128 (idx, offset, wipe_size))
9132 if now - last_output >= 60:
9133 eta = _CalcEta(now - start_time, offset, size)
9134 lu.LogInfo(" - done: %.1f%% ETA: %s",
9135 offset / float(size) * 100, utils.FormatSeconds(eta))
9138 logging.info("Resuming synchronization of disks for instance '%s'",
9141 result = lu.rpc.call_blockdev_pause_resume_sync(node,
9142 (map(compat.snd, disks),
9147 lu.LogWarning("Failed to resume disk synchronization on node '%s': %s",
9148 node, result.fail_msg)
9150 for idx, success in enumerate(result.payload):
9152 lu.LogWarning("Resuming synchronization of disk %s of instance '%s'"
9153 " failed", idx, instance.name)
9156 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
9157 """Create all disks for an instance.
9159 This abstracts away some work from AddInstance.
9161 @type lu: L{LogicalUnit}
9162 @param lu: the logical unit on whose behalf we execute
9163 @type instance: L{objects.Instance}
9164 @param instance: the instance whose disks we should create
9166 @param to_skip: list of indices to skip
9167 @type target_node: string
9168 @param target_node: if passed, overrides the target node for creation
9170 @return: the success of the creation
9173 info = _GetInstanceInfoText(instance)
9174 if target_node is None:
9175 pnode = instance.primary_node
9176 all_nodes = instance.all_nodes
9181 if instance.disk_template in constants.DTS_FILEBASED:
9182 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9183 result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
9185 result.Raise("Failed to create directory '%s' on"
9186 " node %s" % (file_storage_dir, pnode))
9188 # Note: this needs to be kept in sync with adding of disks in
9189 # LUInstanceSetParams
9190 for idx, device in enumerate(instance.disks):
9191 if to_skip and idx in to_skip:
9193 logging.info("Creating disk %s for instance '%s'", idx, instance.name)
9195 for node in all_nodes:
9196 f_create = node == pnode
9197 _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
9200 def _RemoveDisks(lu, instance, target_node=None, ignore_failures=False):
9201 """Remove all disks for an instance.
9203 This abstracts away some work from `AddInstance()` and
9204 `RemoveInstance()`. Note that in case some of the devices couldn't
9205 be removed, the removal will continue with the other ones (compare
9206 with `_CreateDisks()`).
9208 @type lu: L{LogicalUnit}
9209 @param lu: the logical unit on whose behalf we execute
9210 @type instance: L{objects.Instance}
9211 @param instance: the instance whose disks we should remove
9212 @type target_node: string
9213 @param target_node: used to override the node on which to remove the disks
9215 @return: the success of the removal
9218 logging.info("Removing block devices for instance %s", instance.name)
9221 ports_to_release = set()
9222 anno_disks = _AnnotateDiskParams(instance, instance.disks, lu.cfg)
9223 for (idx, device) in enumerate(anno_disks):
9225 edata = [(target_node, device)]
9227 edata = device.ComputeNodeTree(instance.primary_node)
9228 for node, disk in edata:
9229 lu.cfg.SetDiskID(disk, node)
9230 result = lu.rpc.call_blockdev_remove(node, disk)
9232 lu.LogWarning("Could not remove disk %s on node %s,"
9233 " continuing anyway: %s", idx, node, result.fail_msg)
9234 if not (result.offline and node != instance.primary_node):
9237 # if this is a DRBD disk, return its port to the pool
9238 if device.dev_type in constants.LDS_DRBD:
9239 ports_to_release.add(device.logical_id[2])
9241 if all_result or ignore_failures:
9242 for port in ports_to_release:
9243 lu.cfg.AddTcpUdpPort(port)
9245 if instance.disk_template in constants.DTS_FILEBASED:
9246 file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
9250 tgt = instance.primary_node
9251 result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
9253 lu.LogWarning("Could not remove directory '%s' on node %s: %s",
9254 file_storage_dir, instance.primary_node, result.fail_msg)
9260 def _ComputeDiskSizePerVG(disk_template, disks):
9261 """Compute disk size requirements in the volume group
9264 def _compute(disks, payload):
9265 """Universal algorithm.
9270 vgs[disk[constants.IDISK_VG]] = \
9271 vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
9275 # Required free disk space as a function of disk and swap space
9277 constants.DT_DISKLESS: {},
9278 constants.DT_PLAIN: _compute(disks, 0),
9279 # 128 MB are added for drbd metadata for each disk
9280 constants.DT_DRBD8: _compute(disks, constants.DRBD_META_SIZE),
9281 constants.DT_FILE: {},
9282 constants.DT_SHARED_FILE: {},
9285 if disk_template not in req_size_dict:
9286 raise errors.ProgrammerError("Disk template '%s' size requirement"
9287 " is unknown" % disk_template)
9289 return req_size_dict[disk_template]
9292 def _FilterVmNodes(lu, nodenames):
9293 """Filters out non-vm_capable nodes from a list.
9295 @type lu: L{LogicalUnit}
9296 @param lu: the logical unit for which we check
9297 @type nodenames: list
9298 @param nodenames: the list of nodes on which we should check
9300 @return: the list of vm-capable nodes
9303 vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
9304 return [name for name in nodenames if name not in vm_nodes]
9307 def _CheckHVParams(lu, nodenames, hvname, hvparams):
9308 """Hypervisor parameter validation.
9310 This function abstract the hypervisor parameter validation to be
9311 used in both instance create and instance modify.
9313 @type lu: L{LogicalUnit}
9314 @param lu: the logical unit for which we check
9315 @type nodenames: list
9316 @param nodenames: the list of nodes on which we should check
9317 @type hvname: string
9318 @param hvname: the name of the hypervisor we should use
9319 @type hvparams: dict
9320 @param hvparams: the parameters which we need to check
9321 @raise errors.OpPrereqError: if the parameters are not valid
9324 nodenames = _FilterVmNodes(lu, nodenames)
9326 cluster = lu.cfg.GetClusterInfo()
9327 hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
9329 hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
9330 for node in nodenames:
9334 info.Raise("Hypervisor parameter validation failed on node %s" % node)
9337 def _CheckOSParams(lu, required, nodenames, osname, osparams):
9338 """OS parameters validation.
9340 @type lu: L{LogicalUnit}
9341 @param lu: the logical unit for which we check
9342 @type required: boolean
9343 @param required: whether the validation should fail if the OS is not
9345 @type nodenames: list
9346 @param nodenames: the list of nodes on which we should check
9347 @type osname: string
9348 @param osname: the name of the hypervisor we should use
9349 @type osparams: dict
9350 @param osparams: the parameters which we need to check
9351 @raise errors.OpPrereqError: if the parameters are not valid
9354 nodenames = _FilterVmNodes(lu, nodenames)
9355 result = lu.rpc.call_os_validate(nodenames, required, osname,
9356 [constants.OS_VALIDATE_PARAMETERS],
9358 for node, nres in result.items():
9359 # we don't check for offline cases since this should be run only
9360 # against the master node and/or an instance's nodes
9361 nres.Raise("OS Parameters validation failed on node %s" % node)
9362 if not nres.payload:
9363 lu.LogInfo("OS %s not found on node %s, validation skipped",
9367 def _CreateInstanceAllocRequest(op, disks, nics, beparams):
9368 """Wrapper around IAReqInstanceAlloc.
9370 @param op: The instance opcode
9371 @param disks: The computed disks
9372 @param nics: The computed nics
9373 @param beparams: The full filled beparams
9375 @returns: A filled L{iallocator.IAReqInstanceAlloc}
9378 spindle_use = beparams[constants.BE_SPINDLE_USE]
9379 return iallocator.IAReqInstanceAlloc(name=op.instance_name,
9380 disk_template=op.disk_template,
9383 vcpus=beparams[constants.BE_VCPUS],
9384 memory=beparams[constants.BE_MAXMEM],
9385 spindle_use=spindle_use,
9387 nics=[n.ToDict() for n in nics],
9388 hypervisor=op.hypervisor)
9391 def _ComputeNics(op, cluster, default_ip, cfg, proc):
9392 """Computes the nics.
9394 @param op: The instance opcode
9395 @param cluster: Cluster configuration object
9396 @param default_ip: The default ip to assign
9397 @param cfg: An instance of the configuration object
9398 @param proc: The executer instance
9400 @returns: The build up nics
9404 for idx, nic in enumerate(op.nics):
9405 nic_mode_req = nic.get(constants.INIC_MODE, None)
9406 nic_mode = nic_mode_req
9407 if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9408 nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9410 # in routed mode, for the first nic, the default ip is 'auto'
9411 if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9412 default_ip_mode = constants.VALUE_AUTO
9414 default_ip_mode = constants.VALUE_NONE
9416 # ip validity checks
9417 ip = nic.get(constants.INIC_IP, default_ip_mode)
9418 if ip is None or ip.lower() == constants.VALUE_NONE:
9420 elif ip.lower() == constants.VALUE_AUTO:
9421 if not op.name_check:
9422 raise errors.OpPrereqError("IP address set to auto but name checks"
9423 " have been skipped",
9427 if not netutils.IPAddress.IsValid(ip):
9428 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9432 # TODO: check the ip address for uniqueness
9433 if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9434 raise errors.OpPrereqError("Routed nic mode requires an ip address",
9437 # MAC address verification
9438 mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9439 if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9440 mac = utils.NormalizeAndValidateMac(mac)
9443 # TODO: We need to factor this out
9444 cfg.ReserveMAC(mac, proc.GetECId())
9445 except errors.ReservationError:
9446 raise errors.OpPrereqError("MAC address %s already in use"
9447 " in cluster" % mac,
9448 errors.ECODE_NOTUNIQUE)
9450 # Build nic parameters
9451 link = nic.get(constants.INIC_LINK, None)
9452 if link == constants.VALUE_AUTO:
9453 link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9456 nicparams[constants.NIC_MODE] = nic_mode
9458 nicparams[constants.NIC_LINK] = link
9460 check_params = cluster.SimpleFillNIC(nicparams)
9461 objects.NIC.CheckParameterSyntax(check_params)
9462 nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9467 def _ComputeDisks(op, default_vg):
9468 """Computes the instance disks.
9470 @param op: The instance opcode
9471 @param default_vg: The default_vg to assume
9473 @return: The computer disks
9477 for disk in op.disks:
9478 mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9479 if mode not in constants.DISK_ACCESS_SET:
9480 raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9481 mode, errors.ECODE_INVAL)
9482 size = disk.get(constants.IDISK_SIZE, None)
9484 raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9487 except (TypeError, ValueError):
9488 raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9491 data_vg = disk.get(constants.IDISK_VG, default_vg)
9493 constants.IDISK_SIZE: size,
9494 constants.IDISK_MODE: mode,
9495 constants.IDISK_VG: data_vg,
9497 if constants.IDISK_METAVG in disk:
9498 new_disk[constants.IDISK_METAVG] = disk[constants.IDISK_METAVG]
9499 if constants.IDISK_ADOPT in disk:
9500 new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9501 disks.append(new_disk)
9506 def _ComputeFullBeParams(op, cluster):
9507 """Computes the full beparams.
9509 @param op: The instance opcode
9510 @param cluster: The cluster config object
9512 @return: The fully filled beparams
9515 default_beparams = cluster.beparams[constants.PP_DEFAULT]
9516 for param, value in op.beparams.iteritems():
9517 if value == constants.VALUE_AUTO:
9518 op.beparams[param] = default_beparams[param]
9519 objects.UpgradeBeParams(op.beparams)
9520 utils.ForceDictType(op.beparams, constants.BES_PARAMETER_TYPES)
9521 return cluster.SimpleFillBE(op.beparams)
9524 class LUInstanceCreate(LogicalUnit):
9525 """Create an instance.
9528 HPATH = "instance-add"
9529 HTYPE = constants.HTYPE_INSTANCE
9532 def CheckArguments(self):
9536 # do not require name_check to ease forward/backward compatibility
9538 if self.op.no_install and self.op.start:
9539 self.LogInfo("No-installation mode selected, disabling startup")
9540 self.op.start = False
9541 # validate/normalize the instance name
9542 self.op.instance_name = \
9543 netutils.Hostname.GetNormalizedName(self.op.instance_name)
9545 if self.op.ip_check and not self.op.name_check:
9546 # TODO: make the ip check more flexible and not depend on the name check
9547 raise errors.OpPrereqError("Cannot do IP address check without a name"
9548 " check", errors.ECODE_INVAL)
9550 # check nics' parameter names
9551 for nic in self.op.nics:
9552 utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
9554 # check disks. parameter names and consistent adopt/no-adopt strategy
9555 has_adopt = has_no_adopt = False
9556 for disk in self.op.disks:
9557 utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
9558 if constants.IDISK_ADOPT in disk:
9562 if has_adopt and has_no_adopt:
9563 raise errors.OpPrereqError("Either all disks are adopted or none is",
9566 if self.op.disk_template not in constants.DTS_MAY_ADOPT:
9567 raise errors.OpPrereqError("Disk adoption is not supported for the"
9568 " '%s' disk template" %
9569 self.op.disk_template,
9571 if self.op.iallocator is not None:
9572 raise errors.OpPrereqError("Disk adoption not allowed with an"
9573 " iallocator script", errors.ECODE_INVAL)
9574 if self.op.mode == constants.INSTANCE_IMPORT:
9575 raise errors.OpPrereqError("Disk adoption not allowed for"
9576 " instance import", errors.ECODE_INVAL)
9578 if self.op.disk_template in constants.DTS_MUST_ADOPT:
9579 raise errors.OpPrereqError("Disk template %s requires disk adoption,"
9580 " but no 'adopt' parameter given" %
9581 self.op.disk_template,
9584 self.adopt_disks = has_adopt
9586 # instance name verification
9587 if self.op.name_check:
9588 self.hostname1 = _CheckHostnameSane(self, self.op.instance_name)
9589 self.op.instance_name = self.hostname1.name
9590 # used in CheckPrereq for ip ping check
9591 self.check_ip = self.hostname1.ip
9593 self.check_ip = None
9595 # file storage checks
9596 if (self.op.file_driver and
9597 not self.op.file_driver in constants.FILE_DRIVER):
9598 raise errors.OpPrereqError("Invalid file driver name '%s'" %
9599 self.op.file_driver, errors.ECODE_INVAL)
9601 if self.op.disk_template == constants.DT_FILE:
9602 opcodes.RequireFileStorage()
9603 elif self.op.disk_template == constants.DT_SHARED_FILE:
9604 opcodes.RequireSharedFileStorage()
9606 ### Node/iallocator related checks
9607 _CheckIAllocatorOrNode(self, "iallocator", "pnode")
9609 if self.op.pnode is not None:
9610 if self.op.disk_template in constants.DTS_INT_MIRROR:
9611 if self.op.snode is None:
9612 raise errors.OpPrereqError("The networked disk templates need"
9613 " a mirror node", errors.ECODE_INVAL)
9615 self.LogWarning("Secondary node will be ignored on non-mirrored disk"
9617 self.op.snode = None
9619 self._cds = _GetClusterDomainSecret()
9621 if self.op.mode == constants.INSTANCE_IMPORT:
9622 # On import force_variant must be True, because if we forced it at
9623 # initial install, our only chance when importing it back is that it
9625 self.op.force_variant = True
9627 if self.op.no_install:
9628 self.LogInfo("No-installation mode has no effect during import")
9630 elif self.op.mode == constants.INSTANCE_CREATE:
9631 if self.op.os_type is None:
9632 raise errors.OpPrereqError("No guest OS specified",
9634 if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
9635 raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
9636 " installation" % self.op.os_type,
9638 if self.op.disk_template is None:
9639 raise errors.OpPrereqError("No disk template specified",
9642 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9643 # Check handshake to ensure both clusters have the same domain secret
9644 src_handshake = self.op.source_handshake
9645 if not src_handshake:
9646 raise errors.OpPrereqError("Missing source handshake",
9649 errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
9652 raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
9655 # Load and check source CA
9656 self.source_x509_ca_pem = self.op.source_x509_ca
9657 if not self.source_x509_ca_pem:
9658 raise errors.OpPrereqError("Missing source X509 CA",
9662 (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
9664 except OpenSSL.crypto.Error, err:
9665 raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
9666 (err, ), errors.ECODE_INVAL)
9668 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9669 if errcode is not None:
9670 raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
9673 self.source_x509_ca = cert
9675 src_instance_name = self.op.source_instance_name
9676 if not src_instance_name:
9677 raise errors.OpPrereqError("Missing source instance name",
9680 self.source_instance_name = \
9681 netutils.GetHostname(name=src_instance_name).name
9684 raise errors.OpPrereqError("Invalid instance creation mode %r" %
9685 self.op.mode, errors.ECODE_INVAL)
9687 def ExpandNames(self):
9688 """ExpandNames for CreateInstance.
9690 Figure out the right locks for instance creation.
9693 self.needed_locks = {}
9695 instance_name = self.op.instance_name
9696 # this is just a preventive check, but someone might still add this
9697 # instance in the meantime, and creation will fail at lock-add time
9698 if instance_name in self.cfg.GetInstanceList():
9699 raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
9700 instance_name, errors.ECODE_EXISTS)
9702 self.add_locks[locking.LEVEL_INSTANCE] = instance_name
9704 if self.op.iallocator:
9705 # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
9706 # specifying a group on instance creation and then selecting nodes from
9708 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9709 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
9711 self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
9712 nodelist = [self.op.pnode]
9713 if self.op.snode is not None:
9714 self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
9715 nodelist.append(self.op.snode)
9716 self.needed_locks[locking.LEVEL_NODE] = nodelist
9717 # Lock resources of instance's primary and secondary nodes (copy to
9718 # prevent accidential modification)
9719 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
9721 # in case of import lock the source node too
9722 if self.op.mode == constants.INSTANCE_IMPORT:
9723 src_node = self.op.src_node
9724 src_path = self.op.src_path
9726 if src_path is None:
9727 self.op.src_path = src_path = self.op.instance_name
9729 if src_node is None:
9730 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9731 self.op.src_node = None
9732 if os.path.isabs(src_path):
9733 raise errors.OpPrereqError("Importing an instance from a path"
9734 " requires a source node option",
9737 self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
9738 if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
9739 self.needed_locks[locking.LEVEL_NODE].append(src_node)
9740 if not os.path.isabs(src_path):
9741 self.op.src_path = src_path = \
9742 utils.PathJoin(pathutils.EXPORT_DIR, src_path)
9744 def _RunAllocator(self):
9745 """Run the allocator based on input opcode.
9748 req = _CreateInstanceAllocRequest(self.op, self.disks,
9749 self.nics, self.be_full)
9750 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
9752 ial.Run(self.op.iallocator)
9755 raise errors.OpPrereqError("Can't compute nodes using"
9756 " iallocator '%s': %s" %
9757 (self.op.iallocator, ial.info),
9759 self.op.pnode = ial.result[0]
9760 self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
9761 self.op.instance_name, self.op.iallocator,
9762 utils.CommaJoin(ial.result))
9764 assert req.RequiredNodes() in (1, 2), "Wrong node count from iallocator"
9766 if req.RequiredNodes() == 2:
9767 self.op.snode = ial.result[1]
9769 def BuildHooksEnv(self):
9772 This runs on master, primary and secondary nodes of the instance.
9776 "ADD_MODE": self.op.mode,
9778 if self.op.mode == constants.INSTANCE_IMPORT:
9779 env["SRC_NODE"] = self.op.src_node
9780 env["SRC_PATH"] = self.op.src_path
9781 env["SRC_IMAGES"] = self.src_images
9783 env.update(_BuildInstanceHookEnv(
9784 name=self.op.instance_name,
9785 primary_node=self.op.pnode,
9786 secondary_nodes=self.secondaries,
9787 status=self.op.start,
9788 os_type=self.op.os_type,
9789 minmem=self.be_full[constants.BE_MINMEM],
9790 maxmem=self.be_full[constants.BE_MAXMEM],
9791 vcpus=self.be_full[constants.BE_VCPUS],
9792 nics=_NICListToTuple(self, self.nics),
9793 disk_template=self.op.disk_template,
9794 disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
9795 for d in self.disks],
9798 hypervisor_name=self.op.hypervisor,
9804 def BuildHooksNodes(self):
9805 """Build hooks nodes.
9808 nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
9811 def _ReadExportInfo(self):
9812 """Reads the export information from disk.
9814 It will override the opcode source node and path with the actual
9815 information, if these two were not specified before.
9817 @return: the export information
9820 assert self.op.mode == constants.INSTANCE_IMPORT
9822 src_node = self.op.src_node
9823 src_path = self.op.src_path
9825 if src_node is None:
9826 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
9827 exp_list = self.rpc.call_export_list(locked_nodes)
9829 for node in exp_list:
9830 if exp_list[node].fail_msg:
9832 if src_path in exp_list[node].payload:
9834 self.op.src_node = src_node = node
9835 self.op.src_path = src_path = utils.PathJoin(pathutils.EXPORT_DIR,
9839 raise errors.OpPrereqError("No export found for relative path %s" %
9840 src_path, errors.ECODE_INVAL)
9842 _CheckNodeOnline(self, src_node)
9843 result = self.rpc.call_export_info(src_node, src_path)
9844 result.Raise("No export or invalid export found in dir %s" % src_path)
9846 export_info = objects.SerializableConfigParser.Loads(str(result.payload))
9847 if not export_info.has_section(constants.INISECT_EXP):
9848 raise errors.ProgrammerError("Corrupted export config",
9849 errors.ECODE_ENVIRON)
9851 ei_version = export_info.get(constants.INISECT_EXP, "version")
9852 if (int(ei_version) != constants.EXPORT_VERSION):
9853 raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
9854 (ei_version, constants.EXPORT_VERSION),
9855 errors.ECODE_ENVIRON)
9858 def _ReadExportParams(self, einfo):
9859 """Use export parameters as defaults.
9861 In case the opcode doesn't specify (as in override) some instance
9862 parameters, then try to use them from the export information, if
9866 self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
9868 if self.op.disk_template is None:
9869 if einfo.has_option(constants.INISECT_INS, "disk_template"):
9870 self.op.disk_template = einfo.get(constants.INISECT_INS,
9872 if self.op.disk_template not in constants.DISK_TEMPLATES:
9873 raise errors.OpPrereqError("Disk template specified in configuration"
9874 " file is not one of the allowed values:"
9876 " ".join(constants.DISK_TEMPLATES),
9879 raise errors.OpPrereqError("No disk template specified and the export"
9880 " is missing the disk_template information",
9883 if not self.op.disks:
9885 # TODO: import the disk iv_name too
9886 for idx in range(constants.MAX_DISKS):
9887 if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
9888 disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
9889 disks.append({constants.IDISK_SIZE: disk_sz})
9890 self.op.disks = disks
9891 if not disks and self.op.disk_template != constants.DT_DISKLESS:
9892 raise errors.OpPrereqError("No disk info specified and the export"
9893 " is missing the disk information",
9896 if not self.op.nics:
9898 for idx in range(constants.MAX_NICS):
9899 if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
9901 for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
9902 v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
9909 if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
9910 self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
9912 if (self.op.hypervisor is None and
9913 einfo.has_option(constants.INISECT_INS, "hypervisor")):
9914 self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
9916 if einfo.has_section(constants.INISECT_HYP):
9917 # use the export parameters but do not override the ones
9918 # specified by the user
9919 for name, value in einfo.items(constants.INISECT_HYP):
9920 if name not in self.op.hvparams:
9921 self.op.hvparams[name] = value
9923 if einfo.has_section(constants.INISECT_BEP):
9924 # use the parameters, without overriding
9925 for name, value in einfo.items(constants.INISECT_BEP):
9926 if name not in self.op.beparams:
9927 self.op.beparams[name] = value
9928 # Compatibility for the old "memory" be param
9929 if name == constants.BE_MEMORY:
9930 if constants.BE_MAXMEM not in self.op.beparams:
9931 self.op.beparams[constants.BE_MAXMEM] = value
9932 if constants.BE_MINMEM not in self.op.beparams:
9933 self.op.beparams[constants.BE_MINMEM] = value
9935 # try to read the parameters old style, from the main section
9936 for name in constants.BES_PARAMETERS:
9937 if (name not in self.op.beparams and
9938 einfo.has_option(constants.INISECT_INS, name)):
9939 self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9941 if einfo.has_section(constants.INISECT_OSP):
9942 # use the parameters, without overriding
9943 for name, value in einfo.items(constants.INISECT_OSP):
9944 if name not in self.op.osparams:
9945 self.op.osparams[name] = value
9947 def _RevertToDefaults(self, cluster):
9948 """Revert the instance parameters to the default values.
9952 hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9953 for name in self.op.hvparams.keys():
9954 if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9955 del self.op.hvparams[name]
9957 be_defs = cluster.SimpleFillBE({})
9958 for name in self.op.beparams.keys():
9959 if name in be_defs and be_defs[name] == self.op.beparams[name]:
9960 del self.op.beparams[name]
9962 nic_defs = cluster.SimpleFillNIC({})
9963 for nic in self.op.nics:
9964 for name in constants.NICS_PARAMETERS:
9965 if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9968 os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9969 for name in self.op.osparams.keys():
9970 if name in os_defs and os_defs[name] == self.op.osparams[name]:
9971 del self.op.osparams[name]
9973 def _CalculateFileStorageDir(self):
9974 """Calculate final instance file storage dir.
9977 # file storage dir calculation/check
9978 self.instance_file_storage_dir = None
9979 if self.op.disk_template in constants.DTS_FILEBASED:
9980 # build the full file storage dir path
9983 if self.op.disk_template == constants.DT_SHARED_FILE:
9984 get_fsd_fn = self.cfg.GetSharedFileStorageDir
9986 get_fsd_fn = self.cfg.GetFileStorageDir
9988 cfg_storagedir = get_fsd_fn()
9989 if not cfg_storagedir:
9990 raise errors.OpPrereqError("Cluster file storage dir not defined",
9992 joinargs.append(cfg_storagedir)
9994 if self.op.file_storage_dir is not None:
9995 joinargs.append(self.op.file_storage_dir)
9997 joinargs.append(self.op.instance_name)
9999 # pylint: disable=W0142
10000 self.instance_file_storage_dir = utils.PathJoin(*joinargs)
10002 def CheckPrereq(self): # pylint: disable=R0914
10003 """Check prerequisites.
10006 self._CalculateFileStorageDir()
10008 if self.op.mode == constants.INSTANCE_IMPORT:
10009 export_info = self._ReadExportInfo()
10010 self._ReadExportParams(export_info)
10011 self._old_instance_name = export_info.get(constants.INISECT_INS, "name")
10013 self._old_instance_name = None
10015 if (not self.cfg.GetVGName() and
10016 self.op.disk_template not in constants.DTS_NOT_LVM):
10017 raise errors.OpPrereqError("Cluster does not support lvm-based"
10018 " instances", errors.ECODE_STATE)
10020 if (self.op.hypervisor is None or
10021 self.op.hypervisor == constants.VALUE_AUTO):
10022 self.op.hypervisor = self.cfg.GetHypervisorType()
10024 cluster = self.cfg.GetClusterInfo()
10025 enabled_hvs = cluster.enabled_hypervisors
10026 if self.op.hypervisor not in enabled_hvs:
10027 raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
10029 (self.op.hypervisor, ",".join(enabled_hvs)),
10030 errors.ECODE_STATE)
10032 # Check tag validity
10033 for tag in self.op.tags:
10034 objects.TaggableObject.ValidateTag(tag)
10036 # check hypervisor parameter syntax (locally)
10037 utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
10038 filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
10040 hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
10041 hv_type.CheckParameterSyntax(filled_hvp)
10042 self.hv_full = filled_hvp
10043 # check that we don't specify global parameters on an instance
10044 _CheckGlobalHvParams(self.op.hvparams)
10046 # fill and remember the beparams dict
10047 self.be_full = _ComputeFullBeParams(self.op, cluster)
10049 # build os parameters
10050 self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
10052 # now that hvp/bep are in final format, let's reset to defaults,
10054 if self.op.identify_defaults:
10055 self._RevertToDefaults(cluster)
10058 self.nics = _ComputeNics(self.op, cluster, self.hostname1.ip, self.cfg,
10061 # disk checks/pre-build
10062 default_vg = self.cfg.GetVGName()
10063 self.disks = _ComputeDisks(self.op, default_vg)
10065 if self.op.mode == constants.INSTANCE_IMPORT:
10067 for idx in range(len(self.disks)):
10068 option = "disk%d_dump" % idx
10069 if export_info.has_option(constants.INISECT_INS, option):
10070 # FIXME: are the old os-es, disk sizes, etc. useful?
10071 export_name = export_info.get(constants.INISECT_INS, option)
10072 image = utils.PathJoin(self.op.src_path, export_name)
10073 disk_images.append(image)
10075 disk_images.append(False)
10077 self.src_images = disk_images
10079 if self.op.instance_name == self._old_instance_name:
10080 for idx, nic in enumerate(self.nics):
10081 if nic.mac == constants.VALUE_AUTO:
10082 nic_mac_ini = "nic%d_mac" % idx
10083 nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
10085 # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
10087 # ip ping checks (we use the same ip that was resolved in ExpandNames)
10088 if self.op.ip_check:
10089 if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
10090 raise errors.OpPrereqError("IP %s of instance %s already in use" %
10091 (self.check_ip, self.op.instance_name),
10092 errors.ECODE_NOTUNIQUE)
10094 #### mac address generation
10095 # By generating here the mac address both the allocator and the hooks get
10096 # the real final mac address rather than the 'auto' or 'generate' value.
10097 # There is a race condition between the generation and the instance object
10098 # creation, which means that we know the mac is valid now, but we're not
10099 # sure it will be when we actually add the instance. If things go bad
10100 # adding the instance will abort because of a duplicate mac, and the
10101 # creation job will fail.
10102 for nic in self.nics:
10103 if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10104 nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
10108 if self.op.iallocator is not None:
10109 self._RunAllocator()
10111 # Release all unneeded node locks
10112 _ReleaseLocks(self, locking.LEVEL_NODE,
10113 keep=filter(None, [self.op.pnode, self.op.snode,
10114 self.op.src_node]))
10115 _ReleaseLocks(self, locking.LEVEL_NODE_RES,
10116 keep=filter(None, [self.op.pnode, self.op.snode,
10117 self.op.src_node]))
10119 #### node related checks
10121 # check primary node
10122 self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
10123 assert self.pnode is not None, \
10124 "Cannot retrieve locked node %s" % self.op.pnode
10126 raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
10127 pnode.name, errors.ECODE_STATE)
10129 raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
10130 pnode.name, errors.ECODE_STATE)
10131 if not pnode.vm_capable:
10132 raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
10133 " '%s'" % pnode.name, errors.ECODE_STATE)
10135 self.secondaries = []
10137 # mirror node verification
10138 if self.op.disk_template in constants.DTS_INT_MIRROR:
10139 if self.op.snode == pnode.name:
10140 raise errors.OpPrereqError("The secondary node cannot be the"
10141 " primary node", errors.ECODE_INVAL)
10142 _CheckNodeOnline(self, self.op.snode)
10143 _CheckNodeNotDrained(self, self.op.snode)
10144 _CheckNodeVmCapable(self, self.op.snode)
10145 self.secondaries.append(self.op.snode)
10147 snode = self.cfg.GetNodeInfo(self.op.snode)
10148 if pnode.group != snode.group:
10149 self.LogWarning("The primary and secondary nodes are in two"
10150 " different node groups; the disk parameters"
10151 " from the first disk's node group will be"
10154 nodenames = [pnode.name] + self.secondaries
10156 # Verify instance specs
10157 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10159 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10160 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10161 constants.ISPEC_DISK_COUNT: len(self.disks),
10162 constants.ISPEC_DISK_SIZE: [disk["size"] for disk in self.disks],
10163 constants.ISPEC_NIC_COUNT: len(self.nics),
10164 constants.ISPEC_SPINDLE_USE: spindle_use,
10167 group_info = self.cfg.GetNodeGroup(pnode.group)
10168 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10169 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10170 if not self.op.ignore_ipolicy and res:
10171 msg = ("Instance allocation to group %s (%s) violates policy: %s" %
10172 (pnode.group, group_info.name, utils.CommaJoin(res)))
10173 raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10175 if not self.adopt_disks:
10176 if self.op.disk_template == constants.DT_RBD:
10177 # _CheckRADOSFreeSpace() is just a placeholder.
10178 # Any function that checks prerequisites can be placed here.
10179 # Check if there is enough space on the RADOS cluster.
10180 _CheckRADOSFreeSpace()
10182 # Check lv size requirements, if not adopting
10183 req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
10184 _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
10186 elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
10187 all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
10188 disk[constants.IDISK_ADOPT])
10189 for disk in self.disks])
10190 if len(all_lvs) != len(self.disks):
10191 raise errors.OpPrereqError("Duplicate volume names given for adoption",
10192 errors.ECODE_INVAL)
10193 for lv_name in all_lvs:
10195 # FIXME: lv_name here is "vg/lv" need to ensure that other calls
10196 # to ReserveLV uses the same syntax
10197 self.cfg.ReserveLV(lv_name, self.proc.GetECId())
10198 except errors.ReservationError:
10199 raise errors.OpPrereqError("LV named %s used by another instance" %
10200 lv_name, errors.ECODE_NOTUNIQUE)
10202 vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
10203 vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
10205 node_lvs = self.rpc.call_lv_list([pnode.name],
10206 vg_names.payload.keys())[pnode.name]
10207 node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
10208 node_lvs = node_lvs.payload
10210 delta = all_lvs.difference(node_lvs.keys())
10212 raise errors.OpPrereqError("Missing logical volume(s): %s" %
10213 utils.CommaJoin(delta),
10214 errors.ECODE_INVAL)
10215 online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
10217 raise errors.OpPrereqError("Online logical volumes found, cannot"
10218 " adopt: %s" % utils.CommaJoin(online_lvs),
10219 errors.ECODE_STATE)
10220 # update the size of disk based on what is found
10221 for dsk in self.disks:
10222 dsk[constants.IDISK_SIZE] = \
10223 int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
10224 dsk[constants.IDISK_ADOPT])][0]))
10226 elif self.op.disk_template == constants.DT_BLOCK:
10227 # Normalize and de-duplicate device paths
10228 all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
10229 for disk in self.disks])
10230 if len(all_disks) != len(self.disks):
10231 raise errors.OpPrereqError("Duplicate disk names given for adoption",
10232 errors.ECODE_INVAL)
10233 baddisks = [d for d in all_disks
10234 if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
10236 raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
10237 " cannot be adopted" %
10238 (", ".join(baddisks),
10239 constants.ADOPTABLE_BLOCKDEV_ROOT),
10240 errors.ECODE_INVAL)
10242 node_disks = self.rpc.call_bdev_sizes([pnode.name],
10243 list(all_disks))[pnode.name]
10244 node_disks.Raise("Cannot get block device information from node %s" %
10246 node_disks = node_disks.payload
10247 delta = all_disks.difference(node_disks.keys())
10249 raise errors.OpPrereqError("Missing block device(s): %s" %
10250 utils.CommaJoin(delta),
10251 errors.ECODE_INVAL)
10252 for dsk in self.disks:
10253 dsk[constants.IDISK_SIZE] = \
10254 int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
10256 # Verify instance specs
10257 spindle_use = self.be_full.get(constants.BE_SPINDLE_USE, None)
10259 constants.ISPEC_MEM_SIZE: self.be_full.get(constants.BE_MAXMEM, None),
10260 constants.ISPEC_CPU_COUNT: self.be_full.get(constants.BE_VCPUS, None),
10261 constants.ISPEC_DISK_COUNT: len(self.disks),
10262 constants.ISPEC_DISK_SIZE: [disk[constants.IDISK_SIZE]
10263 for disk in self.disks],
10264 constants.ISPEC_NIC_COUNT: len(self.nics),
10265 constants.ISPEC_SPINDLE_USE: spindle_use,
10268 group_info = self.cfg.GetNodeGroup(pnode.group)
10269 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info)
10270 res = _ComputeIPolicyInstanceSpecViolation(ipolicy, ispec)
10271 if not self.op.ignore_ipolicy and res:
10272 raise errors.OpPrereqError(("Instance allocation to group %s violates"
10273 " policy: %s") % (pnode.group,
10274 utils.CommaJoin(res)),
10275 errors.ECODE_INVAL)
10277 _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
10279 _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
10280 # check OS parameters (remotely)
10281 _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
10283 _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
10285 # memory check on primary node
10286 #TODO(dynmem): use MINMEM for checking
10288 _CheckNodeFreeMemory(self, self.pnode.name,
10289 "creating instance %s" % self.op.instance_name,
10290 self.be_full[constants.BE_MAXMEM],
10291 self.op.hypervisor)
10293 self.dry_run_result = list(nodenames)
10295 def Exec(self, feedback_fn):
10296 """Create and add the instance to the cluster.
10299 instance = self.op.instance_name
10300 pnode_name = self.pnode.name
10302 assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
10303 self.owned_locks(locking.LEVEL_NODE)), \
10304 "Node locks differ from node resource locks"
10306 ht_kind = self.op.hypervisor
10307 if ht_kind in constants.HTS_REQ_PORT:
10308 network_port = self.cfg.AllocatePort()
10310 network_port = None
10312 # This is ugly but we got a chicken-egg problem here
10313 # We can only take the group disk parameters, as the instance
10314 # has no disks yet (we are generating them right here).
10315 node = self.cfg.GetNodeInfo(pnode_name)
10316 nodegroup = self.cfg.GetNodeGroup(node.group)
10317 disks = _GenerateDiskTemplate(self,
10318 self.op.disk_template,
10319 instance, pnode_name,
10322 self.instance_file_storage_dir,
10323 self.op.file_driver,
10326 self.cfg.GetGroupDiskParams(nodegroup))
10328 iobj = objects.Instance(name=instance, os=self.op.os_type,
10329 primary_node=pnode_name,
10330 nics=self.nics, disks=disks,
10331 disk_template=self.op.disk_template,
10332 admin_state=constants.ADMINST_DOWN,
10333 network_port=network_port,
10334 beparams=self.op.beparams,
10335 hvparams=self.op.hvparams,
10336 hypervisor=self.op.hypervisor,
10337 osparams=self.op.osparams,
10341 for tag in self.op.tags:
10344 if self.adopt_disks:
10345 if self.op.disk_template == constants.DT_PLAIN:
10346 # rename LVs to the newly-generated names; we need to construct
10347 # 'fake' LV disks with the old data, plus the new unique_id
10348 tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
10350 for t_dsk, a_dsk in zip(tmp_disks, self.disks):
10351 rename_to.append(t_dsk.logical_id)
10352 t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
10353 self.cfg.SetDiskID(t_dsk, pnode_name)
10354 result = self.rpc.call_blockdev_rename(pnode_name,
10355 zip(tmp_disks, rename_to))
10356 result.Raise("Failed to rename adoped LVs")
10358 feedback_fn("* creating instance disks...")
10360 _CreateDisks(self, iobj)
10361 except errors.OpExecError:
10362 self.LogWarning("Device creation failed, reverting...")
10364 _RemoveDisks(self, iobj)
10366 self.cfg.ReleaseDRBDMinors(instance)
10369 feedback_fn("adding instance %s to cluster config" % instance)
10371 self.cfg.AddInstance(iobj, self.proc.GetECId())
10373 # Declare that we don't want to remove the instance lock anymore, as we've
10374 # added the instance to the config
10375 del self.remove_locks[locking.LEVEL_INSTANCE]
10377 if self.op.mode == constants.INSTANCE_IMPORT:
10378 # Release unused nodes
10379 _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
10381 # Release all nodes
10382 _ReleaseLocks(self, locking.LEVEL_NODE)
10385 if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
10386 feedback_fn("* wiping instance disks...")
10388 _WipeDisks(self, iobj)
10389 except errors.OpExecError, err:
10390 logging.exception("Wiping disks failed")
10391 self.LogWarning("Wiping instance disks failed (%s)", err)
10395 # Something is already wrong with the disks, don't do anything else
10397 elif self.op.wait_for_sync:
10398 disk_abort = not _WaitForSync(self, iobj)
10399 elif iobj.disk_template in constants.DTS_INT_MIRROR:
10400 # make sure the disks are not degraded (still sync-ing is ok)
10401 feedback_fn("* checking mirrors status")
10402 disk_abort = not _WaitForSync(self, iobj, oneshot=True)
10407 _RemoveDisks(self, iobj)
10408 self.cfg.RemoveInstance(iobj.name)
10409 # Make sure the instance lock gets removed
10410 self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
10411 raise errors.OpExecError("There are some degraded disks for"
10414 # Release all node resource locks
10415 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
10417 if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
10418 # we need to set the disks ID to the primary node, since the
10419 # preceding code might or might have not done it, depending on
10420 # disk template and other options
10421 for disk in iobj.disks:
10422 self.cfg.SetDiskID(disk, pnode_name)
10423 if self.op.mode == constants.INSTANCE_CREATE:
10424 if not self.op.no_install:
10425 pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
10426 not self.op.wait_for_sync)
10428 feedback_fn("* pausing disk sync to install instance OS")
10429 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10432 for idx, success in enumerate(result.payload):
10434 logging.warn("pause-sync of instance %s for disk %d failed",
10437 feedback_fn("* running the instance OS create scripts...")
10438 # FIXME: pass debug option from opcode to backend
10440 self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
10441 self.op.debug_level)
10443 feedback_fn("* resuming disk sync")
10444 result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
10447 for idx, success in enumerate(result.payload):
10449 logging.warn("resume-sync of instance %s for disk %d failed",
10452 os_add_result.Raise("Could not add os for instance %s"
10453 " on node %s" % (instance, pnode_name))
10456 if self.op.mode == constants.INSTANCE_IMPORT:
10457 feedback_fn("* running the instance OS import scripts...")
10461 for idx, image in enumerate(self.src_images):
10465 # FIXME: pass debug option from opcode to backend
10466 dt = masterd.instance.DiskTransfer("disk/%s" % idx,
10467 constants.IEIO_FILE, (image, ),
10468 constants.IEIO_SCRIPT,
10469 (iobj.disks[idx], idx),
10471 transfers.append(dt)
10474 masterd.instance.TransferInstanceData(self, feedback_fn,
10475 self.op.src_node, pnode_name,
10476 self.pnode.secondary_ip,
10478 if not compat.all(import_result):
10479 self.LogWarning("Some disks for instance %s on node %s were not"
10480 " imported successfully" % (instance, pnode_name))
10482 rename_from = self._old_instance_name
10484 elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
10485 feedback_fn("* preparing remote import...")
10486 # The source cluster will stop the instance before attempting to make
10487 # a connection. In some cases stopping an instance can take a long
10488 # time, hence the shutdown timeout is added to the connection
10490 connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
10491 self.op.source_shutdown_timeout)
10492 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10494 assert iobj.primary_node == self.pnode.name
10496 masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
10497 self.source_x509_ca,
10498 self._cds, timeouts)
10499 if not compat.all(disk_results):
10500 # TODO: Should the instance still be started, even if some disks
10501 # failed to import (valid for local imports, too)?
10502 self.LogWarning("Some disks for instance %s on node %s were not"
10503 " imported successfully" % (instance, pnode_name))
10505 rename_from = self.source_instance_name
10508 # also checked in the prereq part
10509 raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
10512 # Run rename script on newly imported instance
10513 assert iobj.name == instance
10514 feedback_fn("Running rename script for %s" % instance)
10515 result = self.rpc.call_instance_run_rename(pnode_name, iobj,
10517 self.op.debug_level)
10518 if result.fail_msg:
10519 self.LogWarning("Failed to run rename script for %s on node"
10520 " %s: %s" % (instance, pnode_name, result.fail_msg))
10522 assert not self.owned_locks(locking.LEVEL_NODE_RES)
10525 iobj.admin_state = constants.ADMINST_UP
10526 self.cfg.Update(iobj, feedback_fn)
10527 logging.info("Starting instance %s on node %s", instance, pnode_name)
10528 feedback_fn("* starting instance...")
10529 result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
10531 result.Raise("Could not start instance")
10533 return list(iobj.all_nodes)
10536 class LUInstanceMultiAlloc(NoHooksLU):
10537 """Allocates multiple instances at the same time.
10542 def CheckArguments(self):
10543 """Check arguments.
10547 for inst in self.op.instances:
10548 if inst.iallocator is not None:
10549 raise errors.OpPrereqError("iallocator are not allowed to be set on"
10550 " instance objects", errors.ECODE_INVAL)
10551 nodes.append(bool(inst.pnode))
10552 if inst.disk_template in constants.DTS_INT_MIRROR:
10553 nodes.append(bool(inst.snode))
10555 has_nodes = compat.any(nodes)
10556 if compat.all(nodes) ^ has_nodes:
10557 raise errors.OpPrereqError("There are instance objects providing"
10558 " pnode/snode while others do not",
10559 errors.ECODE_INVAL)
10561 if self.op.iallocator is None:
10562 default_iallocator = self.cfg.GetDefaultIAllocator()
10563 if default_iallocator and has_nodes:
10564 self.op.iallocator = default_iallocator
10566 raise errors.OpPrereqError("No iallocator or nodes on the instances"
10567 " given and no cluster-wide default"
10568 " iallocator found; please specify either"
10569 " an iallocator or nodes on the instances"
10570 " or set a cluster-wide default iallocator",
10571 errors.ECODE_INVAL)
10573 dups = utils.FindDuplicates([op.instance_name for op in self.op.instances])
10575 raise errors.OpPrereqError("There are duplicate instance names: %s" %
10576 utils.CommaJoin(dups), errors.ECODE_INVAL)
10578 def ExpandNames(self):
10579 """Calculate the locks.
10582 self.share_locks = _ShareAll()
10583 self.needed_locks = {}
10585 if self.op.iallocator:
10586 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10587 self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
10590 for inst in self.op.instances:
10591 inst.pnode = _ExpandNodeName(self.cfg, inst.pnode)
10592 nodeslist.append(inst.pnode)
10593 if inst.snode is not None:
10594 inst.snode = _ExpandNodeName(self.cfg, inst.snode)
10595 nodeslist.append(inst.snode)
10597 self.needed_locks[locking.LEVEL_NODE] = nodeslist
10598 # Lock resources of instance's primary and secondary nodes (copy to
10599 # prevent accidential modification)
10600 self.needed_locks[locking.LEVEL_NODE_RES] = list(nodeslist)
10602 def CheckPrereq(self):
10603 """Check prerequisite.
10606 cluster = self.cfg.GetClusterInfo()
10607 default_vg = self.cfg.GetVGName()
10608 insts = [_CreateInstanceAllocRequest(op, _ComputeDisks(op, default_vg),
10609 _ComputeNics(op, cluster, None,
10610 self.cfg, self.proc),
10611 _ComputeFullBeParams(op, cluster))
10612 for op in self.op.instances]
10613 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
10614 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
10616 ial.Run(self.op.iallocator)
10618 if not ial.success:
10619 raise errors.OpPrereqError("Can't compute nodes using"
10620 " iallocator '%s': %s" %
10621 (self.op.iallocator, ial.info),
10622 errors.ECODE_NORES)
10624 self.ia_result = ial.result
10626 if self.op.dry_run:
10627 self.dry_run_rsult = objects.FillDict(self._ConstructPartialResult(), {
10628 constants.JOB_IDS_KEY: [],
10631 def _ConstructPartialResult(self):
10632 """Contructs the partial result.
10635 (allocatable, failed) = self.ia_result
10637 opcodes.OpInstanceMultiAlloc.ALLOCATABLE_KEY:
10638 map(compat.fst, allocatable),
10639 opcodes.OpInstanceMultiAlloc.FAILED_KEY: failed,
10642 def Exec(self, feedback_fn):
10643 """Executes the opcode.
10646 op2inst = dict((op.instance_name, op) for op in self.op.instances)
10647 (allocatable, failed) = self.ia_result
10650 for (name, nodes) in allocatable:
10651 op = op2inst.pop(name)
10654 (op.pnode, op.snode) = nodes
10656 (op.pnode,) = nodes
10660 missing = set(op2inst.keys()) - set(failed)
10661 assert not missing, \
10662 "Iallocator did return incomplete result: %s" % utils.CommaJoin(missing)
10664 return ResultWithJobs(jobs, **self._ConstructPartialResult())
10667 def _CheckRADOSFreeSpace():
10668 """Compute disk size requirements inside the RADOS cluster.
10671 # For the RADOS cluster we assume there is always enough space.
10675 class LUInstanceConsole(NoHooksLU):
10676 """Connect to an instance's console.
10678 This is somewhat special in that it returns the command line that
10679 you need to run on the master node in order to connect to the
10685 def ExpandNames(self):
10686 self.share_locks = _ShareAll()
10687 self._ExpandAndLockInstance()
10689 def CheckPrereq(self):
10690 """Check prerequisites.
10692 This checks that the instance is in the cluster.
10695 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10696 assert self.instance is not None, \
10697 "Cannot retrieve locked instance %s" % self.op.instance_name
10698 _CheckNodeOnline(self, self.instance.primary_node)
10700 def Exec(self, feedback_fn):
10701 """Connect to the console of an instance
10704 instance = self.instance
10705 node = instance.primary_node
10707 node_insts = self.rpc.call_instance_list([node],
10708 [instance.hypervisor])[node]
10709 node_insts.Raise("Can't get node information from %s" % node)
10711 if instance.name not in node_insts.payload:
10712 if instance.admin_state == constants.ADMINST_UP:
10713 state = constants.INSTST_ERRORDOWN
10714 elif instance.admin_state == constants.ADMINST_DOWN:
10715 state = constants.INSTST_ADMINDOWN
10717 state = constants.INSTST_ADMINOFFLINE
10718 raise errors.OpExecError("Instance %s is not running (state %s)" %
10719 (instance.name, state))
10721 logging.debug("Connecting to console of %s on %s", instance.name, node)
10723 return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
10726 def _GetInstanceConsole(cluster, instance):
10727 """Returns console information for an instance.
10729 @type cluster: L{objects.Cluster}
10730 @type instance: L{objects.Instance}
10734 hyper = hypervisor.GetHypervisor(instance.hypervisor)
10735 # beparams and hvparams are passed separately, to avoid editing the
10736 # instance and then saving the defaults in the instance itself.
10737 hvparams = cluster.FillHV(instance)
10738 beparams = cluster.FillBE(instance)
10739 console = hyper.GetInstanceConsole(instance, hvparams, beparams)
10741 assert console.instance == instance.name
10742 assert console.Validate()
10744 return console.ToDict()
10747 class LUInstanceReplaceDisks(LogicalUnit):
10748 """Replace the disks of an instance.
10751 HPATH = "mirrors-replace"
10752 HTYPE = constants.HTYPE_INSTANCE
10755 def CheckArguments(self):
10756 """Check arguments.
10759 remote_node = self.op.remote_node
10760 ialloc = self.op.iallocator
10761 if self.op.mode == constants.REPLACE_DISK_CHG:
10762 if remote_node is None and ialloc is None:
10763 raise errors.OpPrereqError("When changing the secondary either an"
10764 " iallocator script must be used or the"
10765 " new node given", errors.ECODE_INVAL)
10767 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10769 elif remote_node is not None or ialloc is not None:
10770 # Not replacing the secondary
10771 raise errors.OpPrereqError("The iallocator and new node options can"
10772 " only be used when changing the"
10773 " secondary node", errors.ECODE_INVAL)
10775 def ExpandNames(self):
10776 self._ExpandAndLockInstance()
10778 assert locking.LEVEL_NODE not in self.needed_locks
10779 assert locking.LEVEL_NODE_RES not in self.needed_locks
10780 assert locking.LEVEL_NODEGROUP not in self.needed_locks
10782 assert self.op.iallocator is None or self.op.remote_node is None, \
10783 "Conflicting options"
10785 if self.op.remote_node is not None:
10786 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10788 # Warning: do not remove the locking of the new secondary here
10789 # unless DRBD8.AddChildren is changed to work in parallel;
10790 # currently it doesn't since parallel invocations of
10791 # FindUnusedMinor will conflict
10792 self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
10793 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
10795 self.needed_locks[locking.LEVEL_NODE] = []
10796 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10798 if self.op.iallocator is not None:
10799 # iallocator will select a new node in the same group
10800 self.needed_locks[locking.LEVEL_NODEGROUP] = []
10802 self.needed_locks[locking.LEVEL_NODE_RES] = []
10804 self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
10805 self.op.iallocator, self.op.remote_node,
10806 self.op.disks, False, self.op.early_release,
10807 self.op.ignore_ipolicy)
10809 self.tasklets = [self.replacer]
10811 def DeclareLocks(self, level):
10812 if level == locking.LEVEL_NODEGROUP:
10813 assert self.op.remote_node is None
10814 assert self.op.iallocator is not None
10815 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
10817 self.share_locks[locking.LEVEL_NODEGROUP] = 1
10818 # Lock all groups used by instance optimistically; this requires going
10819 # via the node before it's locked, requiring verification later on
10820 self.needed_locks[locking.LEVEL_NODEGROUP] = \
10821 self.cfg.GetInstanceNodeGroups(self.op.instance_name)
10823 elif level == locking.LEVEL_NODE:
10824 if self.op.iallocator is not None:
10825 assert self.op.remote_node is None
10826 assert not self.needed_locks[locking.LEVEL_NODE]
10828 # Lock member nodes of all locked groups
10829 self.needed_locks[locking.LEVEL_NODE] = \
10831 for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
10832 for node_name in self.cfg.GetNodeGroup(group_uuid).members]
10834 self._LockInstancesNodes()
10835 elif level == locking.LEVEL_NODE_RES:
10837 self.needed_locks[locking.LEVEL_NODE_RES] = \
10838 self.needed_locks[locking.LEVEL_NODE]
10840 def BuildHooksEnv(self):
10841 """Build hooks env.
10843 This runs on the master, the primary and all the secondaries.
10846 instance = self.replacer.instance
10848 "MODE": self.op.mode,
10849 "NEW_SECONDARY": self.op.remote_node,
10850 "OLD_SECONDARY": instance.secondary_nodes[0],
10852 env.update(_BuildInstanceHookEnvByObject(self, instance))
10855 def BuildHooksNodes(self):
10856 """Build hooks nodes.
10859 instance = self.replacer.instance
10861 self.cfg.GetMasterNode(),
10862 instance.primary_node,
10864 if self.op.remote_node is not None:
10865 nl.append(self.op.remote_node)
10868 def CheckPrereq(self):
10869 """Check prerequisites.
10872 assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
10873 self.op.iallocator is None)
10875 # Verify if node group locks are still correct
10876 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10878 _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
10880 return LogicalUnit.CheckPrereq(self)
10883 class TLReplaceDisks(Tasklet):
10884 """Replaces disks for an instance.
10886 Note: Locking is not within the scope of this class.
10889 def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
10890 disks, delay_iallocator, early_release, ignore_ipolicy):
10891 """Initializes this class.
10894 Tasklet.__init__(self, lu)
10897 self.instance_name = instance_name
10899 self.iallocator_name = iallocator_name
10900 self.remote_node = remote_node
10902 self.delay_iallocator = delay_iallocator
10903 self.early_release = early_release
10904 self.ignore_ipolicy = ignore_ipolicy
10907 self.instance = None
10908 self.new_node = None
10909 self.target_node = None
10910 self.other_node = None
10911 self.remote_node_info = None
10912 self.node_secondary_ip = None
10915 def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
10916 """Compute a new secondary node using an IAllocator.
10919 req = iallocator.IAReqRelocate(name=instance_name,
10920 relocate_from=list(relocate_from))
10921 ial = iallocator.IAllocator(lu.cfg, lu.rpc, req)
10923 ial.Run(iallocator_name)
10925 if not ial.success:
10926 raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
10927 " %s" % (iallocator_name, ial.info),
10928 errors.ECODE_NORES)
10930 remote_node_name = ial.result[0]
10932 lu.LogInfo("Selected new secondary for instance '%s': %s",
10933 instance_name, remote_node_name)
10935 return remote_node_name
10937 def _FindFaultyDisks(self, node_name):
10938 """Wrapper for L{_FindFaultyInstanceDisks}.
10941 return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
10944 def _CheckDisksActivated(self, instance):
10945 """Checks if the instance disks are activated.
10947 @param instance: The instance to check disks
10948 @return: True if they are activated, False otherwise
10951 nodes = instance.all_nodes
10953 for idx, dev in enumerate(instance.disks):
10955 self.lu.LogInfo("Checking disk/%d on %s", idx, node)
10956 self.cfg.SetDiskID(dev, node)
10958 result = _BlockdevFind(self, node, dev, instance)
10962 elif result.fail_msg or not result.payload:
10967 def CheckPrereq(self):
10968 """Check prerequisites.
10970 This checks that the instance is in the cluster.
10973 self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
10974 assert instance is not None, \
10975 "Cannot retrieve locked instance %s" % self.instance_name
10977 if instance.disk_template != constants.DT_DRBD8:
10978 raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
10979 " instances", errors.ECODE_INVAL)
10981 if len(instance.secondary_nodes) != 1:
10982 raise errors.OpPrereqError("The instance has a strange layout,"
10983 " expected one secondary but found %d" %
10984 len(instance.secondary_nodes),
10985 errors.ECODE_FAULT)
10987 if not self.delay_iallocator:
10988 self._CheckPrereq2()
10990 def _CheckPrereq2(self):
10991 """Check prerequisites, second part.
10993 This function should always be part of CheckPrereq. It was separated and is
10994 now called from Exec because during node evacuation iallocator was only
10995 called with an unmodified cluster model, not taking planned changes into
10999 instance = self.instance
11000 secondary_node = instance.secondary_nodes[0]
11002 if self.iallocator_name is None:
11003 remote_node = self.remote_node
11005 remote_node = self._RunAllocator(self.lu, self.iallocator_name,
11006 instance.name, instance.secondary_nodes)
11008 if remote_node is None:
11009 self.remote_node_info = None
11011 assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
11012 "Remote node '%s' is not locked" % remote_node
11014 self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
11015 assert self.remote_node_info is not None, \
11016 "Cannot retrieve locked node %s" % remote_node
11018 if remote_node == self.instance.primary_node:
11019 raise errors.OpPrereqError("The specified node is the primary node of"
11020 " the instance", errors.ECODE_INVAL)
11022 if remote_node == secondary_node:
11023 raise errors.OpPrereqError("The specified node is already the"
11024 " secondary node of the instance",
11025 errors.ECODE_INVAL)
11027 if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
11028 constants.REPLACE_DISK_CHG):
11029 raise errors.OpPrereqError("Cannot specify disks to be replaced",
11030 errors.ECODE_INVAL)
11032 if self.mode == constants.REPLACE_DISK_AUTO:
11033 if not self._CheckDisksActivated(instance):
11034 raise errors.OpPrereqError("Please run activate-disks on instance %s"
11035 " first" % self.instance_name,
11036 errors.ECODE_STATE)
11037 faulty_primary = self._FindFaultyDisks(instance.primary_node)
11038 faulty_secondary = self._FindFaultyDisks(secondary_node)
11040 if faulty_primary and faulty_secondary:
11041 raise errors.OpPrereqError("Instance %s has faulty disks on more than"
11042 " one node and can not be repaired"
11043 " automatically" % self.instance_name,
11044 errors.ECODE_STATE)
11047 self.disks = faulty_primary
11048 self.target_node = instance.primary_node
11049 self.other_node = secondary_node
11050 check_nodes = [self.target_node, self.other_node]
11051 elif faulty_secondary:
11052 self.disks = faulty_secondary
11053 self.target_node = secondary_node
11054 self.other_node = instance.primary_node
11055 check_nodes = [self.target_node, self.other_node]
11061 # Non-automatic modes
11062 if self.mode == constants.REPLACE_DISK_PRI:
11063 self.target_node = instance.primary_node
11064 self.other_node = secondary_node
11065 check_nodes = [self.target_node, self.other_node]
11067 elif self.mode == constants.REPLACE_DISK_SEC:
11068 self.target_node = secondary_node
11069 self.other_node = instance.primary_node
11070 check_nodes = [self.target_node, self.other_node]
11072 elif self.mode == constants.REPLACE_DISK_CHG:
11073 self.new_node = remote_node
11074 self.other_node = instance.primary_node
11075 self.target_node = secondary_node
11076 check_nodes = [self.new_node, self.other_node]
11078 _CheckNodeNotDrained(self.lu, remote_node)
11079 _CheckNodeVmCapable(self.lu, remote_node)
11081 old_node_info = self.cfg.GetNodeInfo(secondary_node)
11082 assert old_node_info is not None
11083 if old_node_info.offline and not self.early_release:
11084 # doesn't make sense to delay the release
11085 self.early_release = True
11086 self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
11087 " early-release mode", secondary_node)
11090 raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
11093 # If not specified all disks should be replaced
11095 self.disks = range(len(self.instance.disks))
11097 # TODO: This is ugly, but right now we can't distinguish between internal
11098 # submitted opcode and external one. We should fix that.
11099 if self.remote_node_info:
11100 # We change the node, lets verify it still meets instance policy
11101 new_group_info = self.cfg.GetNodeGroup(self.remote_node_info.group)
11102 cluster = self.cfg.GetClusterInfo()
11103 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
11105 _CheckTargetNodeIPolicy(self, ipolicy, instance, self.remote_node_info,
11106 ignore=self.ignore_ipolicy)
11108 for node in check_nodes:
11109 _CheckNodeOnline(self.lu, node)
11111 touched_nodes = frozenset(node_name for node_name in [self.new_node,
11114 if node_name is not None)
11116 # Release unneeded node and node resource locks
11117 _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
11118 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
11120 # Release any owned node group
11121 if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
11122 _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
11124 # Check whether disks are valid
11125 for disk_idx in self.disks:
11126 instance.FindDisk(disk_idx)
11128 # Get secondary node IP addresses
11129 self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
11130 in self.cfg.GetMultiNodeInfo(touched_nodes))
11132 def Exec(self, feedback_fn):
11133 """Execute disk replacement.
11135 This dispatches the disk replacement to the appropriate handler.
11138 if self.delay_iallocator:
11139 self._CheckPrereq2()
11142 # Verify owned locks before starting operation
11143 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
11144 assert set(owned_nodes) == set(self.node_secondary_ip), \
11145 ("Incorrect node locks, owning %s, expected %s" %
11146 (owned_nodes, self.node_secondary_ip.keys()))
11147 assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
11148 self.lu.owned_locks(locking.LEVEL_NODE_RES))
11150 owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
11151 assert list(owned_instances) == [self.instance_name], \
11152 "Instance '%s' not locked" % self.instance_name
11154 assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
11155 "Should not own any node group lock at this point"
11158 feedback_fn("No disks need replacement for instance '%s'" %
11159 self.instance.name)
11162 feedback_fn("Replacing disk(s) %s for instance '%s'" %
11163 (utils.CommaJoin(self.disks), self.instance.name))
11164 feedback_fn("Current primary node: %s", self.instance.primary_node)
11165 feedback_fn("Current seconary node: %s",
11166 utils.CommaJoin(self.instance.secondary_nodes))
11168 activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
11170 # Activate the instance disks if we're replacing them on a down instance
11172 _StartInstanceDisks(self.lu, self.instance, True)
11175 # Should we replace the secondary node?
11176 if self.new_node is not None:
11177 fn = self._ExecDrbd8Secondary
11179 fn = self._ExecDrbd8DiskOnly
11181 result = fn(feedback_fn)
11183 # Deactivate the instance disks if we're replacing them on a
11186 _SafeShutdownInstanceDisks(self.lu, self.instance)
11188 assert not self.lu.owned_locks(locking.LEVEL_NODE)
11191 # Verify owned locks
11192 owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
11193 nodes = frozenset(self.node_secondary_ip)
11194 assert ((self.early_release and not owned_nodes) or
11195 (not self.early_release and not (set(owned_nodes) - nodes))), \
11196 ("Not owning the correct locks, early_release=%s, owned=%r,"
11197 " nodes=%r" % (self.early_release, owned_nodes, nodes))
11201 def _CheckVolumeGroup(self, nodes):
11202 self.lu.LogInfo("Checking volume groups")
11204 vgname = self.cfg.GetVGName()
11206 # Make sure volume group exists on all involved nodes
11207 results = self.rpc.call_vg_list(nodes)
11209 raise errors.OpExecError("Can't list volume groups on the nodes")
11212 res = results[node]
11213 res.Raise("Error checking node %s" % node)
11214 if vgname not in res.payload:
11215 raise errors.OpExecError("Volume group '%s' not found on node %s" %
11218 def _CheckDisksExistence(self, nodes):
11219 # Check disk existence
11220 for idx, dev in enumerate(self.instance.disks):
11221 if idx not in self.disks:
11225 self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
11226 self.cfg.SetDiskID(dev, node)
11228 result = _BlockdevFind(self, node, dev, self.instance)
11230 msg = result.fail_msg
11231 if msg or not result.payload:
11233 msg = "disk not found"
11234 raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
11237 def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
11238 for idx, dev in enumerate(self.instance.disks):
11239 if idx not in self.disks:
11242 self.lu.LogInfo("Checking disk/%d consistency on node %s" %
11245 if not _CheckDiskConsistency(self.lu, self.instance, dev, node_name,
11246 on_primary, ldisk=ldisk):
11247 raise errors.OpExecError("Node %s has degraded storage, unsafe to"
11248 " replace disks for instance %s" %
11249 (node_name, self.instance.name))
11251 def _CreateNewStorage(self, node_name):
11252 """Create new storage on the primary or secondary node.
11254 This is only used for same-node replaces, not for changing the
11255 secondary node, hence we don't want to modify the existing disk.
11260 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11261 for idx, dev in enumerate(disks):
11262 if idx not in self.disks:
11265 self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
11267 self.cfg.SetDiskID(dev, node_name)
11269 lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
11270 names = _GenerateUniqueNames(self.lu, lv_names)
11272 (data_disk, meta_disk) = dev.children
11273 vg_data = data_disk.logical_id[0]
11274 lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
11275 logical_id=(vg_data, names[0]),
11276 params=data_disk.params)
11277 vg_meta = meta_disk.logical_id[0]
11278 lv_meta = objects.Disk(dev_type=constants.LD_LV,
11279 size=constants.DRBD_META_SIZE,
11280 logical_id=(vg_meta, names[1]),
11281 params=meta_disk.params)
11283 new_lvs = [lv_data, lv_meta]
11284 old_lvs = [child.Copy() for child in dev.children]
11285 iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
11287 # we pass force_create=True to force the LVM creation
11288 for new_lv in new_lvs:
11289 _CreateBlockDevInner(self.lu, node_name, self.instance, new_lv, True,
11290 _GetInstanceInfoText(self.instance), False)
11294 def _CheckDevices(self, node_name, iv_names):
11295 for name, (dev, _, _) in iv_names.iteritems():
11296 self.cfg.SetDiskID(dev, node_name)
11298 result = _BlockdevFind(self, node_name, dev, self.instance)
11300 msg = result.fail_msg
11301 if msg or not result.payload:
11303 msg = "disk not found"
11304 raise errors.OpExecError("Can't find DRBD device %s: %s" %
11307 if result.payload.is_degraded:
11308 raise errors.OpExecError("DRBD device %s is degraded!" % name)
11310 def _RemoveOldStorage(self, node_name, iv_names):
11311 for name, (_, old_lvs, _) in iv_names.iteritems():
11312 self.lu.LogInfo("Remove logical volumes for %s" % name)
11315 self.cfg.SetDiskID(lv, node_name)
11317 msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
11319 self.lu.LogWarning("Can't remove old LV: %s" % msg,
11320 hint="remove unused LVs manually")
11322 def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
11323 """Replace a disk on the primary or secondary for DRBD 8.
11325 The algorithm for replace is quite complicated:
11327 1. for each disk to be replaced:
11329 1. create new LVs on the target node with unique names
11330 1. detach old LVs from the drbd device
11331 1. rename old LVs to name_replaced.<time_t>
11332 1. rename new LVs to old LVs
11333 1. attach the new LVs (with the old names now) to the drbd device
11335 1. wait for sync across all devices
11337 1. for each modified disk:
11339 1. remove old LVs (which have the name name_replaces.<time_t>)
11341 Failures are not very well handled.
11346 # Step: check device activation
11347 self.lu.LogStep(1, steps_total, "Check device existence")
11348 self._CheckDisksExistence([self.other_node, self.target_node])
11349 self._CheckVolumeGroup([self.target_node, self.other_node])
11351 # Step: check other node consistency
11352 self.lu.LogStep(2, steps_total, "Check peer consistency")
11353 self._CheckDisksConsistency(self.other_node,
11354 self.other_node == self.instance.primary_node,
11357 # Step: create new storage
11358 self.lu.LogStep(3, steps_total, "Allocate new storage")
11359 iv_names = self._CreateNewStorage(self.target_node)
11361 # Step: for each lv, detach+rename*2+attach
11362 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11363 for dev, old_lvs, new_lvs in iv_names.itervalues():
11364 self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
11366 result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
11368 result.Raise("Can't detach drbd from local storage on node"
11369 " %s for device %s" % (self.target_node, dev.iv_name))
11371 #cfg.Update(instance)
11373 # ok, we created the new LVs, so now we know we have the needed
11374 # storage; as such, we proceed on the target node to rename
11375 # old_lv to _old, and new_lv to old_lv; note that we rename LVs
11376 # using the assumption that logical_id == physical_id (which in
11377 # turn is the unique_id on that node)
11379 # FIXME(iustin): use a better name for the replaced LVs
11380 temp_suffix = int(time.time())
11381 ren_fn = lambda d, suff: (d.physical_id[0],
11382 d.physical_id[1] + "_replaced-%s" % suff)
11384 # Build the rename list based on what LVs exist on the node
11385 rename_old_to_new = []
11386 for to_ren in old_lvs:
11387 result = self.rpc.call_blockdev_find(self.target_node, to_ren)
11388 if not result.fail_msg and result.payload:
11390 rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
11392 self.lu.LogInfo("Renaming the old LVs on the target node")
11393 result = self.rpc.call_blockdev_rename(self.target_node,
11395 result.Raise("Can't rename old LVs on node %s" % self.target_node)
11397 # Now we rename the new LVs to the old LVs
11398 self.lu.LogInfo("Renaming the new LVs on the target node")
11399 rename_new_to_old = [(new, old.physical_id)
11400 for old, new in zip(old_lvs, new_lvs)]
11401 result = self.rpc.call_blockdev_rename(self.target_node,
11403 result.Raise("Can't rename new LVs on node %s" % self.target_node)
11405 # Intermediate steps of in memory modifications
11406 for old, new in zip(old_lvs, new_lvs):
11407 new.logical_id = old.logical_id
11408 self.cfg.SetDiskID(new, self.target_node)
11410 # We need to modify old_lvs so that removal later removes the
11411 # right LVs, not the newly added ones; note that old_lvs is a
11413 for disk in old_lvs:
11414 disk.logical_id = ren_fn(disk, temp_suffix)
11415 self.cfg.SetDiskID(disk, self.target_node)
11417 # Now that the new lvs have the old name, we can add them to the device
11418 self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
11419 result = self.rpc.call_blockdev_addchildren(self.target_node,
11420 (dev, self.instance), new_lvs)
11421 msg = result.fail_msg
11423 for new_lv in new_lvs:
11424 msg2 = self.rpc.call_blockdev_remove(self.target_node,
11427 self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
11428 hint=("cleanup manually the unused logical"
11430 raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
11432 cstep = itertools.count(5)
11434 if self.early_release:
11435 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11436 self._RemoveOldStorage(self.target_node, iv_names)
11437 # TODO: Check if releasing locks early still makes sense
11438 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11440 # Release all resource locks except those used by the instance
11441 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11442 keep=self.node_secondary_ip.keys())
11444 # Release all node locks while waiting for sync
11445 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11447 # TODO: Can the instance lock be downgraded here? Take the optional disk
11448 # shutdown in the caller into consideration.
11451 # This can fail as the old devices are degraded and _WaitForSync
11452 # does a combined result over all disks, so we don't check its return value
11453 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11454 _WaitForSync(self.lu, self.instance)
11456 # Check all devices manually
11457 self._CheckDevices(self.instance.primary_node, iv_names)
11459 # Step: remove old storage
11460 if not self.early_release:
11461 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11462 self._RemoveOldStorage(self.target_node, iv_names)
11464 def _ExecDrbd8Secondary(self, feedback_fn):
11465 """Replace the secondary node for DRBD 8.
11467 The algorithm for replace is quite complicated:
11468 - for all disks of the instance:
11469 - create new LVs on the new node with same names
11470 - shutdown the drbd device on the old secondary
11471 - disconnect the drbd network on the primary
11472 - create the drbd device on the new secondary
11473 - network attach the drbd on the primary, using an artifice:
11474 the drbd code for Attach() will connect to the network if it
11475 finds a device which is connected to the good local disks but
11476 not network enabled
11477 - wait for sync across all devices
11478 - remove all disks from the old secondary
11480 Failures are not very well handled.
11485 pnode = self.instance.primary_node
11487 # Step: check device activation
11488 self.lu.LogStep(1, steps_total, "Check device existence")
11489 self._CheckDisksExistence([self.instance.primary_node])
11490 self._CheckVolumeGroup([self.instance.primary_node])
11492 # Step: check other node consistency
11493 self.lu.LogStep(2, steps_total, "Check peer consistency")
11494 self._CheckDisksConsistency(self.instance.primary_node, True, True)
11496 # Step: create new storage
11497 self.lu.LogStep(3, steps_total, "Allocate new storage")
11498 disks = _AnnotateDiskParams(self.instance, self.instance.disks, self.cfg)
11499 for idx, dev in enumerate(disks):
11500 self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
11501 (self.new_node, idx))
11502 # we pass force_create=True to force LVM creation
11503 for new_lv in dev.children:
11504 _CreateBlockDevInner(self.lu, self.new_node, self.instance, new_lv,
11505 True, _GetInstanceInfoText(self.instance), False)
11507 # Step 4: dbrd minors and drbd setups changes
11508 # after this, we must manually remove the drbd minors on both the
11509 # error and the success paths
11510 self.lu.LogStep(4, steps_total, "Changing drbd configuration")
11511 minors = self.cfg.AllocateDRBDMinor([self.new_node
11512 for dev in self.instance.disks],
11513 self.instance.name)
11514 logging.debug("Allocated minors %r", minors)
11517 for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
11518 self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
11519 (self.new_node, idx))
11520 # create new devices on new_node; note that we create two IDs:
11521 # one without port, so the drbd will be activated without
11522 # networking information on the new node at this stage, and one
11523 # with network, for the latter activation in step 4
11524 (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
11525 if self.instance.primary_node == o_node1:
11528 assert self.instance.primary_node == o_node2, "Three-node instance?"
11531 new_alone_id = (self.instance.primary_node, self.new_node, None,
11532 p_minor, new_minor, o_secret)
11533 new_net_id = (self.instance.primary_node, self.new_node, o_port,
11534 p_minor, new_minor, o_secret)
11536 iv_names[idx] = (dev, dev.children, new_net_id)
11537 logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
11539 new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
11540 logical_id=new_alone_id,
11541 children=dev.children,
11544 (anno_new_drbd,) = _AnnotateDiskParams(self.instance, [new_drbd],
11547 _CreateSingleBlockDev(self.lu, self.new_node, self.instance,
11549 _GetInstanceInfoText(self.instance), False)
11550 except errors.GenericError:
11551 self.cfg.ReleaseDRBDMinors(self.instance.name)
11554 # We have new devices, shutdown the drbd on the old secondary
11555 for idx, dev in enumerate(self.instance.disks):
11556 self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
11557 self.cfg.SetDiskID(dev, self.target_node)
11558 msg = self.rpc.call_blockdev_shutdown(self.target_node,
11559 (dev, self.instance)).fail_msg
11561 self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
11562 "node: %s" % (idx, msg),
11563 hint=("Please cleanup this device manually as"
11564 " soon as possible"))
11566 self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
11567 result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
11568 self.instance.disks)[pnode]
11570 msg = result.fail_msg
11572 # detaches didn't succeed (unlikely)
11573 self.cfg.ReleaseDRBDMinors(self.instance.name)
11574 raise errors.OpExecError("Can't detach the disks from the network on"
11575 " old node: %s" % (msg,))
11577 # if we managed to detach at least one, we update all the disks of
11578 # the instance to point to the new secondary
11579 self.lu.LogInfo("Updating instance configuration")
11580 for dev, _, new_logical_id in iv_names.itervalues():
11581 dev.logical_id = new_logical_id
11582 self.cfg.SetDiskID(dev, self.instance.primary_node)
11584 self.cfg.Update(self.instance, feedback_fn)
11586 # Release all node locks (the configuration has been updated)
11587 _ReleaseLocks(self.lu, locking.LEVEL_NODE)
11589 # and now perform the drbd attach
11590 self.lu.LogInfo("Attaching primary drbds to new secondary"
11591 " (standalone => connected)")
11592 result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
11594 self.node_secondary_ip,
11595 (self.instance.disks, self.instance),
11596 self.instance.name,
11598 for to_node, to_result in result.items():
11599 msg = to_result.fail_msg
11601 self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
11603 hint=("please do a gnt-instance info to see the"
11604 " status of disks"))
11606 cstep = itertools.count(5)
11608 if self.early_release:
11609 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11610 self._RemoveOldStorage(self.target_node, iv_names)
11611 # TODO: Check if releasing locks early still makes sense
11612 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
11614 # Release all resource locks except those used by the instance
11615 _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
11616 keep=self.node_secondary_ip.keys())
11618 # TODO: Can the instance lock be downgraded here? Take the optional disk
11619 # shutdown in the caller into consideration.
11622 # This can fail as the old devices are degraded and _WaitForSync
11623 # does a combined result over all disks, so we don't check its return value
11624 self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
11625 _WaitForSync(self.lu, self.instance)
11627 # Check all devices manually
11628 self._CheckDevices(self.instance.primary_node, iv_names)
11630 # Step: remove old storage
11631 if not self.early_release:
11632 self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
11633 self._RemoveOldStorage(self.target_node, iv_names)
11636 class LURepairNodeStorage(NoHooksLU):
11637 """Repairs the volume group on a node.
11642 def CheckArguments(self):
11643 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11645 storage_type = self.op.storage_type
11647 if (constants.SO_FIX_CONSISTENCY not in
11648 constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
11649 raise errors.OpPrereqError("Storage units of type '%s' can not be"
11650 " repaired" % storage_type,
11651 errors.ECODE_INVAL)
11653 def ExpandNames(self):
11654 self.needed_locks = {
11655 locking.LEVEL_NODE: [self.op.node_name],
11658 def _CheckFaultyDisks(self, instance, node_name):
11659 """Ensure faulty disks abort the opcode or at least warn."""
11661 if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
11663 raise errors.OpPrereqError("Instance '%s' has faulty disks on"
11664 " node '%s'" % (instance.name, node_name),
11665 errors.ECODE_STATE)
11666 except errors.OpPrereqError, err:
11667 if self.op.ignore_consistency:
11668 self.proc.LogWarning(str(err.args[0]))
11672 def CheckPrereq(self):
11673 """Check prerequisites.
11676 # Check whether any instance on this node has faulty disks
11677 for inst in _GetNodeInstances(self.cfg, self.op.node_name):
11678 if inst.admin_state != constants.ADMINST_UP:
11680 check_nodes = set(inst.all_nodes)
11681 check_nodes.discard(self.op.node_name)
11682 for inst_node_name in check_nodes:
11683 self._CheckFaultyDisks(inst, inst_node_name)
11685 def Exec(self, feedback_fn):
11686 feedback_fn("Repairing storage unit '%s' on %s ..." %
11687 (self.op.name, self.op.node_name))
11689 st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
11690 result = self.rpc.call_storage_execute(self.op.node_name,
11691 self.op.storage_type, st_args,
11693 constants.SO_FIX_CONSISTENCY)
11694 result.Raise("Failed to repair storage unit '%s' on %s" %
11695 (self.op.name, self.op.node_name))
11698 class LUNodeEvacuate(NoHooksLU):
11699 """Evacuates instances off a list of nodes.
11704 _MODE2IALLOCATOR = {
11705 constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
11706 constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
11707 constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
11709 assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
11710 assert (frozenset(_MODE2IALLOCATOR.values()) ==
11711 constants.IALLOCATOR_NEVAC_MODES)
11713 def CheckArguments(self):
11714 _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
11716 def ExpandNames(self):
11717 self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
11719 if self.op.remote_node is not None:
11720 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11721 assert self.op.remote_node
11723 if self.op.remote_node == self.op.node_name:
11724 raise errors.OpPrereqError("Can not use evacuated node as a new"
11725 " secondary node", errors.ECODE_INVAL)
11727 if self.op.mode != constants.NODE_EVAC_SEC:
11728 raise errors.OpPrereqError("Without the use of an iallocator only"
11729 " secondary instances can be evacuated",
11730 errors.ECODE_INVAL)
11733 self.share_locks = _ShareAll()
11734 self.needed_locks = {
11735 locking.LEVEL_INSTANCE: [],
11736 locking.LEVEL_NODEGROUP: [],
11737 locking.LEVEL_NODE: [],
11740 # Determine nodes (via group) optimistically, needs verification once locks
11741 # have been acquired
11742 self.lock_nodes = self._DetermineNodes()
11744 def _DetermineNodes(self):
11745 """Gets the list of nodes to operate on.
11748 if self.op.remote_node is None:
11749 # Iallocator will choose any node(s) in the same group
11750 group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
11752 group_nodes = frozenset([self.op.remote_node])
11754 # Determine nodes to be locked
11755 return set([self.op.node_name]) | group_nodes
11757 def _DetermineInstances(self):
11758 """Builds list of instances to operate on.
11761 assert self.op.mode in constants.NODE_EVAC_MODES
11763 if self.op.mode == constants.NODE_EVAC_PRI:
11764 # Primary instances only
11765 inst_fn = _GetNodePrimaryInstances
11766 assert self.op.remote_node is None, \
11767 "Evacuating primary instances requires iallocator"
11768 elif self.op.mode == constants.NODE_EVAC_SEC:
11769 # Secondary instances only
11770 inst_fn = _GetNodeSecondaryInstances
11773 assert self.op.mode == constants.NODE_EVAC_ALL
11774 inst_fn = _GetNodeInstances
11775 # TODO: In 2.6, change the iallocator interface to take an evacuation mode
11777 raise errors.OpPrereqError("Due to an issue with the iallocator"
11778 " interface it is not possible to evacuate"
11779 " all instances at once; specify explicitly"
11780 " whether to evacuate primary or secondary"
11782 errors.ECODE_INVAL)
11784 return inst_fn(self.cfg, self.op.node_name)
11786 def DeclareLocks(self, level):
11787 if level == locking.LEVEL_INSTANCE:
11788 # Lock instances optimistically, needs verification once node and group
11789 # locks have been acquired
11790 self.needed_locks[locking.LEVEL_INSTANCE] = \
11791 set(i.name for i in self._DetermineInstances())
11793 elif level == locking.LEVEL_NODEGROUP:
11794 # Lock node groups for all potential target nodes optimistically, needs
11795 # verification once nodes have been acquired
11796 self.needed_locks[locking.LEVEL_NODEGROUP] = \
11797 self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
11799 elif level == locking.LEVEL_NODE:
11800 self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
11802 def CheckPrereq(self):
11804 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
11805 owned_nodes = self.owned_locks(locking.LEVEL_NODE)
11806 owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
11808 need_nodes = self._DetermineNodes()
11810 if not owned_nodes.issuperset(need_nodes):
11811 raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
11812 " locks were acquired, current nodes are"
11813 " are '%s', used to be '%s'; retry the"
11815 (self.op.node_name,
11816 utils.CommaJoin(need_nodes),
11817 utils.CommaJoin(owned_nodes)),
11818 errors.ECODE_STATE)
11820 wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
11821 if owned_groups != wanted_groups:
11822 raise errors.OpExecError("Node groups changed since locks were acquired,"
11823 " current groups are '%s', used to be '%s';"
11824 " retry the operation" %
11825 (utils.CommaJoin(wanted_groups),
11826 utils.CommaJoin(owned_groups)))
11828 # Determine affected instances
11829 self.instances = self._DetermineInstances()
11830 self.instance_names = [i.name for i in self.instances]
11832 if set(self.instance_names) != owned_instances:
11833 raise errors.OpExecError("Instances on node '%s' changed since locks"
11834 " were acquired, current instances are '%s',"
11835 " used to be '%s'; retry the operation" %
11836 (self.op.node_name,
11837 utils.CommaJoin(self.instance_names),
11838 utils.CommaJoin(owned_instances)))
11840 if self.instance_names:
11841 self.LogInfo("Evacuating instances from node '%s': %s",
11843 utils.CommaJoin(utils.NiceSort(self.instance_names)))
11845 self.LogInfo("No instances to evacuate from node '%s'",
11848 if self.op.remote_node is not None:
11849 for i in self.instances:
11850 if i.primary_node == self.op.remote_node:
11851 raise errors.OpPrereqError("Node %s is the primary node of"
11852 " instance %s, cannot use it as"
11854 (self.op.remote_node, i.name),
11855 errors.ECODE_INVAL)
11857 def Exec(self, feedback_fn):
11858 assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
11860 if not self.instance_names:
11861 # No instances to evacuate
11864 elif self.op.iallocator is not None:
11865 # TODO: Implement relocation to other group
11866 evac_mode = self._MODE2IALLOCATOR[self.op.mode]
11867 req = iallocator.IAReqNodeEvac(evac_mode=evac_mode,
11868 instances=list(self.instance_names))
11869 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
11871 ial.Run(self.op.iallocator)
11873 if not ial.success:
11874 raise errors.OpPrereqError("Can't compute node evacuation using"
11875 " iallocator '%s': %s" %
11876 (self.op.iallocator, ial.info),
11877 errors.ECODE_NORES)
11879 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
11881 elif self.op.remote_node is not None:
11882 assert self.op.mode == constants.NODE_EVAC_SEC
11884 [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
11885 remote_node=self.op.remote_node,
11887 mode=constants.REPLACE_DISK_CHG,
11888 early_release=self.op.early_release)]
11889 for instance_name in self.instance_names
11893 raise errors.ProgrammerError("No iallocator or remote node")
11895 return ResultWithJobs(jobs)
11898 def _SetOpEarlyRelease(early_release, op):
11899 """Sets C{early_release} flag on opcodes if available.
11903 op.early_release = early_release
11904 except AttributeError:
11905 assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
11910 def _NodeEvacDest(use_nodes, group, nodes):
11911 """Returns group or nodes depending on caller's choice.
11915 return utils.CommaJoin(nodes)
11920 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
11921 """Unpacks the result of change-group and node-evacuate iallocator requests.
11923 Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
11924 L{constants.IALLOCATOR_MODE_CHG_GROUP}.
11926 @type lu: L{LogicalUnit}
11927 @param lu: Logical unit instance
11928 @type alloc_result: tuple/list
11929 @param alloc_result: Result from iallocator
11930 @type early_release: bool
11931 @param early_release: Whether to release locks early if possible
11932 @type use_nodes: bool
11933 @param use_nodes: Whether to display node names instead of groups
11936 (moved, failed, jobs) = alloc_result
11939 failreason = utils.CommaJoin("%s (%s)" % (name, reason)
11940 for (name, reason) in failed)
11941 lu.LogWarning("Unable to evacuate instances %s", failreason)
11942 raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
11945 lu.LogInfo("Instances to be moved: %s",
11946 utils.CommaJoin("%s (to %s)" %
11947 (name, _NodeEvacDest(use_nodes, group, nodes))
11948 for (name, group, nodes) in moved))
11950 return [map(compat.partial(_SetOpEarlyRelease, early_release),
11951 map(opcodes.OpCode.LoadOpCode, ops))
11955 def _DiskSizeInBytesToMebibytes(lu, size):
11956 """Converts a disk size in bytes to mebibytes.
11958 Warns and rounds up if the size isn't an even multiple of 1 MiB.
11961 (mib, remainder) = divmod(size, 1024 * 1024)
11964 lu.LogWarning("Disk size is not an even multiple of 1 MiB; rounding up"
11965 " to not overwrite existing data (%s bytes will not be"
11966 " wiped)", (1024 * 1024) - remainder)
11972 class LUInstanceGrowDisk(LogicalUnit):
11973 """Grow a disk of an instance.
11976 HPATH = "disk-grow"
11977 HTYPE = constants.HTYPE_INSTANCE
11980 def ExpandNames(self):
11981 self._ExpandAndLockInstance()
11982 self.needed_locks[locking.LEVEL_NODE] = []
11983 self.needed_locks[locking.LEVEL_NODE_RES] = []
11984 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11985 self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
11987 def DeclareLocks(self, level):
11988 if level == locking.LEVEL_NODE:
11989 self._LockInstancesNodes()
11990 elif level == locking.LEVEL_NODE_RES:
11992 self.needed_locks[locking.LEVEL_NODE_RES] = \
11993 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
11995 def BuildHooksEnv(self):
11996 """Build hooks env.
11998 This runs on the master, the primary and all the secondaries.
12002 "DISK": self.op.disk,
12003 "AMOUNT": self.op.amount,
12004 "ABSOLUTE": self.op.absolute,
12006 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12009 def BuildHooksNodes(self):
12010 """Build hooks nodes.
12013 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12016 def CheckPrereq(self):
12017 """Check prerequisites.
12019 This checks that the instance is in the cluster.
12022 instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12023 assert instance is not None, \
12024 "Cannot retrieve locked instance %s" % self.op.instance_name
12025 nodenames = list(instance.all_nodes)
12026 for node in nodenames:
12027 _CheckNodeOnline(self, node)
12029 self.instance = instance
12031 if instance.disk_template not in constants.DTS_GROWABLE:
12032 raise errors.OpPrereqError("Instance's disk layout does not support"
12033 " growing", errors.ECODE_INVAL)
12035 self.disk = instance.FindDisk(self.op.disk)
12037 if self.op.absolute:
12038 self.target = self.op.amount
12039 self.delta = self.target - self.disk.size
12041 raise errors.OpPrereqError("Requested size (%s) is smaller than "
12042 "current disk size (%s)" %
12043 (utils.FormatUnit(self.target, "h"),
12044 utils.FormatUnit(self.disk.size, "h")),
12045 errors.ECODE_STATE)
12047 self.delta = self.op.amount
12048 self.target = self.disk.size + self.delta
12050 raise errors.OpPrereqError("Requested increment (%s) is negative" %
12051 utils.FormatUnit(self.delta, "h"),
12052 errors.ECODE_INVAL)
12054 if instance.disk_template not in (constants.DT_FILE,
12055 constants.DT_SHARED_FILE,
12057 # TODO: check the free disk space for file, when that feature will be
12059 _CheckNodesFreeDiskPerVG(self, nodenames,
12060 self.disk.ComputeGrowth(self.delta))
12062 def Exec(self, feedback_fn):
12063 """Execute disk grow.
12066 instance = self.instance
12069 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12070 assert (self.owned_locks(locking.LEVEL_NODE) ==
12071 self.owned_locks(locking.LEVEL_NODE_RES))
12073 wipe_disks = self.cfg.GetClusterInfo().prealloc_wipe_disks
12075 disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
12077 raise errors.OpExecError("Cannot activate block device to grow")
12079 feedback_fn("Growing disk %s of instance '%s' by %s to %s" %
12080 (self.op.disk, instance.name,
12081 utils.FormatUnit(self.delta, "h"),
12082 utils.FormatUnit(self.target, "h")))
12084 # First run all grow ops in dry-run mode
12085 for node in instance.all_nodes:
12086 self.cfg.SetDiskID(disk, node)
12087 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12089 result.Raise("Dry-run grow request failed to node %s" % node)
12092 # Get disk size from primary node for wiping
12093 result = self.rpc.call_blockdev_getsize(instance.primary_node, [disk])
12094 result.Raise("Failed to retrieve disk size from node '%s'" %
12095 instance.primary_node)
12097 (disk_size_in_bytes, ) = result.payload
12099 if disk_size_in_bytes is None:
12100 raise errors.OpExecError("Failed to retrieve disk size from primary"
12101 " node '%s'" % instance.primary_node)
12103 old_disk_size = _DiskSizeInBytesToMebibytes(self, disk_size_in_bytes)
12105 assert old_disk_size >= disk.size, \
12106 ("Retrieved disk size too small (got %s, should be at least %s)" %
12107 (old_disk_size, disk.size))
12109 old_disk_size = None
12111 # We know that (as far as we can test) operations across different
12112 # nodes will succeed, time to run it for real on the backing storage
12113 for node in instance.all_nodes:
12114 self.cfg.SetDiskID(disk, node)
12115 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12117 result.Raise("Grow request failed to node %s" % node)
12119 # And now execute it for logical storage, on the primary node
12120 node = instance.primary_node
12121 self.cfg.SetDiskID(disk, node)
12122 result = self.rpc.call_blockdev_grow(node, (disk, instance), self.delta,
12124 result.Raise("Grow request failed to node %s" % node)
12126 disk.RecordGrow(self.delta)
12127 self.cfg.Update(instance, feedback_fn)
12129 # Changes have been recorded, release node lock
12130 _ReleaseLocks(self, locking.LEVEL_NODE)
12132 # Downgrade lock while waiting for sync
12133 self.glm.downgrade(locking.LEVEL_INSTANCE)
12135 assert wipe_disks ^ (old_disk_size is None)
12138 assert instance.disks[self.op.disk] == disk
12140 # Wipe newly added disk space
12141 _WipeDisks(self, instance,
12142 disks=[(self.op.disk, disk, old_disk_size)])
12144 if self.op.wait_for_sync:
12145 disk_abort = not _WaitForSync(self, instance, disks=[disk])
12147 self.proc.LogWarning("Disk sync-ing has not returned a good"
12148 " status; please check the instance")
12149 if instance.admin_state != constants.ADMINST_UP:
12150 _SafeShutdownInstanceDisks(self, instance, disks=[disk])
12151 elif instance.admin_state != constants.ADMINST_UP:
12152 self.proc.LogWarning("Not shutting down the disk even if the instance is"
12153 " not supposed to be running because no wait for"
12154 " sync mode was requested")
12156 assert self.owned_locks(locking.LEVEL_NODE_RES)
12157 assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
12160 class LUInstanceQueryData(NoHooksLU):
12161 """Query runtime instance data.
12166 def ExpandNames(self):
12167 self.needed_locks = {}
12169 # Use locking if requested or when non-static information is wanted
12170 if not (self.op.static or self.op.use_locking):
12171 self.LogWarning("Non-static data requested, locks need to be acquired")
12172 self.op.use_locking = True
12174 if self.op.instances or not self.op.use_locking:
12175 # Expand instance names right here
12176 self.wanted_names = _GetWantedInstances(self, self.op.instances)
12178 # Will use acquired locks
12179 self.wanted_names = None
12181 if self.op.use_locking:
12182 self.share_locks = _ShareAll()
12184 if self.wanted_names is None:
12185 self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
12187 self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
12189 self.needed_locks[locking.LEVEL_NODEGROUP] = []
12190 self.needed_locks[locking.LEVEL_NODE] = []
12191 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12193 def DeclareLocks(self, level):
12194 if self.op.use_locking:
12195 if level == locking.LEVEL_NODEGROUP:
12196 owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
12198 # Lock all groups used by instances optimistically; this requires going
12199 # via the node before it's locked, requiring verification later on
12200 self.needed_locks[locking.LEVEL_NODEGROUP] = \
12201 frozenset(group_uuid
12202 for instance_name in owned_instances
12204 self.cfg.GetInstanceNodeGroups(instance_name))
12206 elif level == locking.LEVEL_NODE:
12207 self._LockInstancesNodes()
12209 def CheckPrereq(self):
12210 """Check prerequisites.
12212 This only checks the optional instance list against the existing names.
12215 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12216 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12217 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12219 if self.wanted_names is None:
12220 assert self.op.use_locking, "Locking was not used"
12221 self.wanted_names = owned_instances
12223 instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
12225 if self.op.use_locking:
12226 _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
12229 assert not (owned_instances or owned_groups or owned_nodes)
12231 self.wanted_instances = instances.values()
12233 def _ComputeBlockdevStatus(self, node, instance, dev):
12234 """Returns the status of a block device
12237 if self.op.static or not node:
12240 self.cfg.SetDiskID(dev, node)
12242 result = self.rpc.call_blockdev_find(node, dev)
12246 result.Raise("Can't compute disk status for %s" % instance.name)
12248 status = result.payload
12252 return (status.dev_path, status.major, status.minor,
12253 status.sync_percent, status.estimated_time,
12254 status.is_degraded, status.ldisk_status)
12256 def _ComputeDiskStatus(self, instance, snode, dev):
12257 """Compute block device status.
12260 (anno_dev,) = _AnnotateDiskParams(instance, [dev], self.cfg)
12262 return self._ComputeDiskStatusInner(instance, snode, anno_dev)
12264 def _ComputeDiskStatusInner(self, instance, snode, dev):
12265 """Compute block device status.
12267 @attention: The device has to be annotated already.
12270 if dev.dev_type in constants.LDS_DRBD:
12271 # we change the snode then (otherwise we use the one passed in)
12272 if dev.logical_id[0] == instance.primary_node:
12273 snode = dev.logical_id[1]
12275 snode = dev.logical_id[0]
12277 dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
12279 dev_sstatus = self._ComputeBlockdevStatus(snode, instance, dev)
12282 dev_children = map(compat.partial(self._ComputeDiskStatusInner,
12289 "iv_name": dev.iv_name,
12290 "dev_type": dev.dev_type,
12291 "logical_id": dev.logical_id,
12292 "physical_id": dev.physical_id,
12293 "pstatus": dev_pstatus,
12294 "sstatus": dev_sstatus,
12295 "children": dev_children,
12300 def Exec(self, feedback_fn):
12301 """Gather and return data"""
12304 cluster = self.cfg.GetClusterInfo()
12306 node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
12307 nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
12309 groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
12310 for node in nodes.values()))
12312 group2name_fn = lambda uuid: groups[uuid].name
12314 for instance in self.wanted_instances:
12315 pnode = nodes[instance.primary_node]
12317 if self.op.static or pnode.offline:
12318 remote_state = None
12320 self.LogWarning("Primary node %s is marked offline, returning static"
12321 " information only for instance %s" %
12322 (pnode.name, instance.name))
12324 remote_info = self.rpc.call_instance_info(instance.primary_node,
12326 instance.hypervisor)
12327 remote_info.Raise("Error checking node %s" % instance.primary_node)
12328 remote_info = remote_info.payload
12329 if remote_info and "state" in remote_info:
12330 remote_state = "up"
12332 if instance.admin_state == constants.ADMINST_UP:
12333 remote_state = "down"
12335 remote_state = instance.admin_state
12337 disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
12340 snodes_group_uuids = [nodes[snode_name].group
12341 for snode_name in instance.secondary_nodes]
12343 result[instance.name] = {
12344 "name": instance.name,
12345 "config_state": instance.admin_state,
12346 "run_state": remote_state,
12347 "pnode": instance.primary_node,
12348 "pnode_group_uuid": pnode.group,
12349 "pnode_group_name": group2name_fn(pnode.group),
12350 "snodes": instance.secondary_nodes,
12351 "snodes_group_uuids": snodes_group_uuids,
12352 "snodes_group_names": map(group2name_fn, snodes_group_uuids),
12354 # this happens to be the same format used for hooks
12355 "nics": _NICListToTuple(self, instance.nics),
12356 "disk_template": instance.disk_template,
12358 "hypervisor": instance.hypervisor,
12359 "network_port": instance.network_port,
12360 "hv_instance": instance.hvparams,
12361 "hv_actual": cluster.FillHV(instance, skip_globals=True),
12362 "be_instance": instance.beparams,
12363 "be_actual": cluster.FillBE(instance),
12364 "os_instance": instance.osparams,
12365 "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
12366 "serial_no": instance.serial_no,
12367 "mtime": instance.mtime,
12368 "ctime": instance.ctime,
12369 "uuid": instance.uuid,
12375 def PrepareContainerMods(mods, private_fn):
12376 """Prepares a list of container modifications by adding a private data field.
12378 @type mods: list of tuples; (operation, index, parameters)
12379 @param mods: List of modifications
12380 @type private_fn: callable or None
12381 @param private_fn: Callable for constructing a private data field for a
12386 if private_fn is None:
12391 return [(op, idx, params, fn()) for (op, idx, params) in mods]
12394 #: Type description for changes as returned by L{ApplyContainerMods}'s
12396 _TApplyContModsCbChanges = \
12397 ht.TMaybeListOf(ht.TAnd(ht.TIsLength(2), ht.TItems([
12398 ht.TNonEmptyString,
12403 def ApplyContainerMods(kind, container, chgdesc, mods,
12404 create_fn, modify_fn, remove_fn):
12405 """Applies descriptions in C{mods} to C{container}.
12408 @param kind: One-word item description
12409 @type container: list
12410 @param container: Container to modify
12411 @type chgdesc: None or list
12412 @param chgdesc: List of applied changes
12414 @param mods: Modifications as returned by L{PrepareContainerMods}
12415 @type create_fn: callable
12416 @param create_fn: Callback for creating a new item (L{constants.DDM_ADD});
12417 receives absolute item index, parameters and private data object as added
12418 by L{PrepareContainerMods}, returns tuple containing new item and changes
12420 @type modify_fn: callable
12421 @param modify_fn: Callback for modifying an existing item
12422 (L{constants.DDM_MODIFY}); receives absolute item index, item, parameters
12423 and private data object as added by L{PrepareContainerMods}, returns
12425 @type remove_fn: callable
12426 @param remove_fn: Callback on removing item; receives absolute item index,
12427 item and private data object as added by L{PrepareContainerMods}
12430 for (op, idx, params, private) in mods:
12433 absidx = len(container) - 1
12435 raise IndexError("Not accepting negative indices other than -1")
12436 elif idx > len(container):
12437 raise IndexError("Got %s index %s, but there are only %s" %
12438 (kind, idx, len(container)))
12444 if op == constants.DDM_ADD:
12445 # Calculate where item will be added
12447 addidx = len(container)
12451 if create_fn is None:
12454 (item, changes) = create_fn(addidx, params, private)
12457 container.append(item)
12460 assert idx <= len(container)
12461 # list.insert does so before the specified index
12462 container.insert(idx, item)
12464 # Retrieve existing item
12466 item = container[absidx]
12468 raise IndexError("Invalid %s index %s" % (kind, idx))
12470 if op == constants.DDM_REMOVE:
12473 if remove_fn is not None:
12474 remove_fn(absidx, item, private)
12476 changes = [("%s/%s" % (kind, absidx), "remove")]
12478 assert container[absidx] == item
12479 del container[absidx]
12480 elif op == constants.DDM_MODIFY:
12481 if modify_fn is not None:
12482 changes = modify_fn(absidx, item, params, private)
12484 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12486 assert _TApplyContModsCbChanges(changes)
12488 if not (chgdesc is None or changes is None):
12489 chgdesc.extend(changes)
12492 def _UpdateIvNames(base_index, disks):
12493 """Updates the C{iv_name} attribute of disks.
12495 @type disks: list of L{objects.Disk}
12498 for (idx, disk) in enumerate(disks):
12499 disk.iv_name = "disk/%s" % (base_index + idx, )
12502 class _InstNicModPrivate:
12503 """Data structure for network interface modifications.
12505 Used by L{LUInstanceSetParams}.
12508 def __init__(self):
12513 class LUInstanceSetParams(LogicalUnit):
12514 """Modifies an instances's parameters.
12517 HPATH = "instance-modify"
12518 HTYPE = constants.HTYPE_INSTANCE
12522 def _UpgradeDiskNicMods(kind, mods, verify_fn):
12523 assert ht.TList(mods)
12524 assert not mods or len(mods[0]) in (2, 3)
12526 if mods and len(mods[0]) == 2:
12530 for op, params in mods:
12531 if op in (constants.DDM_ADD, constants.DDM_REMOVE):
12532 result.append((op, -1, params))
12536 raise errors.OpPrereqError("Only one %s add or remove operation is"
12537 " supported at a time" % kind,
12538 errors.ECODE_INVAL)
12540 result.append((constants.DDM_MODIFY, op, params))
12542 assert verify_fn(result)
12549 def _CheckMods(kind, mods, key_types, item_fn):
12550 """Ensures requested disk/NIC modifications are valid.
12553 for (op, _, params) in mods:
12554 assert ht.TDict(params)
12556 utils.ForceDictType(params, key_types)
12558 if op == constants.DDM_REMOVE:
12560 raise errors.OpPrereqError("No settings should be passed when"
12561 " removing a %s" % kind,
12562 errors.ECODE_INVAL)
12563 elif op in (constants.DDM_ADD, constants.DDM_MODIFY):
12564 item_fn(op, params)
12566 raise errors.ProgrammerError("Unhandled operation '%s'" % op)
12569 def _VerifyDiskModification(op, params):
12570 """Verifies a disk modification.
12573 if op == constants.DDM_ADD:
12574 mode = params.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
12575 if mode not in constants.DISK_ACCESS_SET:
12576 raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
12577 errors.ECODE_INVAL)
12579 size = params.get(constants.IDISK_SIZE, None)
12581 raise errors.OpPrereqError("Required disk parameter '%s' missing" %
12582 constants.IDISK_SIZE, errors.ECODE_INVAL)
12586 except (TypeError, ValueError), err:
12587 raise errors.OpPrereqError("Invalid disk size parameter: %s" % err,
12588 errors.ECODE_INVAL)
12590 params[constants.IDISK_SIZE] = size
12592 elif op == constants.DDM_MODIFY and constants.IDISK_SIZE in params:
12593 raise errors.OpPrereqError("Disk size change not possible, use"
12594 " grow-disk", errors.ECODE_INVAL)
12597 def _VerifyNicModification(op, params):
12598 """Verifies a network interface modification.
12601 if op in (constants.DDM_ADD, constants.DDM_MODIFY):
12602 ip = params.get(constants.INIC_IP, None)
12605 elif ip.lower() == constants.VALUE_NONE:
12606 params[constants.INIC_IP] = None
12607 elif not netutils.IPAddress.IsValid(ip):
12608 raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
12609 errors.ECODE_INVAL)
12611 bridge = params.get("bridge", None)
12612 link = params.get(constants.INIC_LINK, None)
12613 if bridge and link:
12614 raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
12615 " at the same time", errors.ECODE_INVAL)
12616 elif bridge and bridge.lower() == constants.VALUE_NONE:
12617 params["bridge"] = None
12618 elif link and link.lower() == constants.VALUE_NONE:
12619 params[constants.INIC_LINK] = None
12621 if op == constants.DDM_ADD:
12622 macaddr = params.get(constants.INIC_MAC, None)
12623 if macaddr is None:
12624 params[constants.INIC_MAC] = constants.VALUE_AUTO
12626 if constants.INIC_MAC in params:
12627 macaddr = params[constants.INIC_MAC]
12628 if macaddr not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12629 macaddr = utils.NormalizeAndValidateMac(macaddr)
12631 if op == constants.DDM_MODIFY and macaddr == constants.VALUE_AUTO:
12632 raise errors.OpPrereqError("'auto' is not a valid MAC address when"
12633 " modifying an existing NIC",
12634 errors.ECODE_INVAL)
12636 def CheckArguments(self):
12637 if not (self.op.nics or self.op.disks or self.op.disk_template or
12638 self.op.hvparams or self.op.beparams or self.op.os_name or
12639 self.op.offline is not None or self.op.runtime_mem):
12640 raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
12642 if self.op.hvparams:
12643 _CheckGlobalHvParams(self.op.hvparams)
12645 self.op.disks = self._UpgradeDiskNicMods(
12646 "disk", self.op.disks, opcodes.OpInstanceSetParams.TestDiskModifications)
12647 self.op.nics = self._UpgradeDiskNicMods(
12648 "NIC", self.op.nics, opcodes.OpInstanceSetParams.TestNicModifications)
12650 # Check disk modifications
12651 self._CheckMods("disk", self.op.disks, constants.IDISK_PARAMS_TYPES,
12652 self._VerifyDiskModification)
12654 if self.op.disks and self.op.disk_template is not None:
12655 raise errors.OpPrereqError("Disk template conversion and other disk"
12656 " changes not supported at the same time",
12657 errors.ECODE_INVAL)
12659 if (self.op.disk_template and
12660 self.op.disk_template in constants.DTS_INT_MIRROR and
12661 self.op.remote_node is None):
12662 raise errors.OpPrereqError("Changing the disk template to a mirrored"
12663 " one requires specifying a secondary node",
12664 errors.ECODE_INVAL)
12666 # Check NIC modifications
12667 self._CheckMods("NIC", self.op.nics, constants.INIC_PARAMS_TYPES,
12668 self._VerifyNicModification)
12670 def ExpandNames(self):
12671 self._ExpandAndLockInstance()
12672 # Can't even acquire node locks in shared mode as upcoming changes in
12673 # Ganeti 2.6 will start to modify the node object on disk conversion
12674 self.needed_locks[locking.LEVEL_NODE] = []
12675 self.needed_locks[locking.LEVEL_NODE_RES] = []
12676 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
12678 def DeclareLocks(self, level):
12679 # TODO: Acquire group lock in shared mode (disk parameters)
12680 if level == locking.LEVEL_NODE:
12681 self._LockInstancesNodes()
12682 if self.op.disk_template and self.op.remote_node:
12683 self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
12684 self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
12685 elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
12687 self.needed_locks[locking.LEVEL_NODE_RES] = \
12688 _CopyLockList(self.needed_locks[locking.LEVEL_NODE])
12690 def BuildHooksEnv(self):
12691 """Build hooks env.
12693 This runs on the master, primary and secondaries.
12697 if constants.BE_MINMEM in self.be_new:
12698 args["minmem"] = self.be_new[constants.BE_MINMEM]
12699 if constants.BE_MAXMEM in self.be_new:
12700 args["maxmem"] = self.be_new[constants.BE_MAXMEM]
12701 if constants.BE_VCPUS in self.be_new:
12702 args["vcpus"] = self.be_new[constants.BE_VCPUS]
12703 # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
12704 # information at all.
12706 if self._new_nics is not None:
12709 for nic in self._new_nics:
12710 nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
12711 mode = nicparams[constants.NIC_MODE]
12712 link = nicparams[constants.NIC_LINK]
12713 nics.append((nic.ip, nic.mac, mode, link))
12715 args["nics"] = nics
12717 env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
12718 if self.op.disk_template:
12719 env["NEW_DISK_TEMPLATE"] = self.op.disk_template
12720 if self.op.runtime_mem:
12721 env["RUNTIME_MEMORY"] = self.op.runtime_mem
12725 def BuildHooksNodes(self):
12726 """Build hooks nodes.
12729 nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
12732 def _PrepareNicModification(self, params, private, old_ip, old_params,
12734 update_params_dict = dict([(key, params[key])
12735 for key in constants.NICS_PARAMETERS
12738 if "bridge" in params:
12739 update_params_dict[constants.NIC_LINK] = params["bridge"]
12741 new_params = _GetUpdatedParams(old_params, update_params_dict)
12742 utils.ForceDictType(new_params, constants.NICS_PARAMETER_TYPES)
12744 new_filled_params = cluster.SimpleFillNIC(new_params)
12745 objects.NIC.CheckParameterSyntax(new_filled_params)
12747 new_mode = new_filled_params[constants.NIC_MODE]
12748 if new_mode == constants.NIC_MODE_BRIDGED:
12749 bridge = new_filled_params[constants.NIC_LINK]
12750 msg = self.rpc.call_bridges_exist(pnode, [bridge]).fail_msg
12752 msg = "Error checking bridges on node '%s': %s" % (pnode, msg)
12754 self.warn.append(msg)
12756 raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
12758 elif new_mode == constants.NIC_MODE_ROUTED:
12759 ip = params.get(constants.INIC_IP, old_ip)
12761 raise errors.OpPrereqError("Cannot set the NIC IP address to None"
12762 " on a routed NIC", errors.ECODE_INVAL)
12764 if constants.INIC_MAC in params:
12765 mac = params[constants.INIC_MAC]
12767 raise errors.OpPrereqError("Cannot unset the NIC MAC address",
12768 errors.ECODE_INVAL)
12769 elif mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
12770 # otherwise generate the MAC address
12771 params[constants.INIC_MAC] = \
12772 self.cfg.GenerateMAC(self.proc.GetECId())
12774 # or validate/reserve the current one
12776 self.cfg.ReserveMAC(mac, self.proc.GetECId())
12777 except errors.ReservationError:
12778 raise errors.OpPrereqError("MAC address '%s' already in use"
12779 " in cluster" % mac,
12780 errors.ECODE_NOTUNIQUE)
12782 private.params = new_params
12783 private.filled = new_filled_params
12785 def CheckPrereq(self):
12786 """Check prerequisites.
12788 This only checks the instance list against the existing names.
12791 # checking the new params on the primary/secondary nodes
12793 instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12794 cluster = self.cluster = self.cfg.GetClusterInfo()
12795 assert self.instance is not None, \
12796 "Cannot retrieve locked instance %s" % self.op.instance_name
12797 pnode = instance.primary_node
12798 nodelist = list(instance.all_nodes)
12799 pnode_info = self.cfg.GetNodeInfo(pnode)
12800 self.diskparams = self.cfg.GetInstanceDiskParams(instance)
12802 # Prepare disk/NIC modifications
12803 self.diskmod = PrepareContainerMods(self.op.disks, None)
12804 self.nicmod = PrepareContainerMods(self.op.nics, _InstNicModPrivate)
12807 if self.op.os_name and not self.op.force:
12808 _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
12809 self.op.force_variant)
12810 instance_os = self.op.os_name
12812 instance_os = instance.os
12814 assert not (self.op.disk_template and self.op.disks), \
12815 "Can't modify disk template and apply disk changes at the same time"
12817 if self.op.disk_template:
12818 if instance.disk_template == self.op.disk_template:
12819 raise errors.OpPrereqError("Instance already has disk template %s" %
12820 instance.disk_template, errors.ECODE_INVAL)
12822 if (instance.disk_template,
12823 self.op.disk_template) not in self._DISK_CONVERSIONS:
12824 raise errors.OpPrereqError("Unsupported disk template conversion from"
12825 " %s to %s" % (instance.disk_template,
12826 self.op.disk_template),
12827 errors.ECODE_INVAL)
12828 _CheckInstanceState(self, instance, INSTANCE_DOWN,
12829 msg="cannot change disk template")
12830 if self.op.disk_template in constants.DTS_INT_MIRROR:
12831 if self.op.remote_node == pnode:
12832 raise errors.OpPrereqError("Given new secondary node %s is the same"
12833 " as the primary node of the instance" %
12834 self.op.remote_node, errors.ECODE_STATE)
12835 _CheckNodeOnline(self, self.op.remote_node)
12836 _CheckNodeNotDrained(self, self.op.remote_node)
12837 # FIXME: here we assume that the old instance type is DT_PLAIN
12838 assert instance.disk_template == constants.DT_PLAIN
12839 disks = [{constants.IDISK_SIZE: d.size,
12840 constants.IDISK_VG: d.logical_id[0]}
12841 for d in instance.disks]
12842 required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
12843 _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
12845 snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
12846 snode_group = self.cfg.GetNodeGroup(snode_info.group)
12847 ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
12849 _CheckTargetNodeIPolicy(self, ipolicy, instance, snode_info,
12850 ignore=self.op.ignore_ipolicy)
12851 if pnode_info.group != snode_info.group:
12852 self.LogWarning("The primary and secondary nodes are in two"
12853 " different node groups; the disk parameters"
12854 " from the first disk's node group will be"
12857 # hvparams processing
12858 if self.op.hvparams:
12859 hv_type = instance.hypervisor
12860 i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
12861 utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
12862 hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
12865 hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
12866 _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
12867 self.hv_proposed = self.hv_new = hv_new # the new actual values
12868 self.hv_inst = i_hvdict # the new dict (without defaults)
12870 self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
12872 self.hv_new = self.hv_inst = {}
12874 # beparams processing
12875 if self.op.beparams:
12876 i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
12878 objects.UpgradeBeParams(i_bedict)
12879 utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
12880 be_new = cluster.SimpleFillBE(i_bedict)
12881 self.be_proposed = self.be_new = be_new # the new actual values
12882 self.be_inst = i_bedict # the new dict (without defaults)
12884 self.be_new = self.be_inst = {}
12885 self.be_proposed = cluster.SimpleFillBE(instance.beparams)
12886 be_old = cluster.FillBE(instance)
12888 # CPU param validation -- checking every time a parameter is
12889 # changed to cover all cases where either CPU mask or vcpus have
12891 if (constants.BE_VCPUS in self.be_proposed and
12892 constants.HV_CPU_MASK in self.hv_proposed):
12894 utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
12895 # Verify mask is consistent with number of vCPUs. Can skip this
12896 # test if only 1 entry in the CPU mask, which means same mask
12897 # is applied to all vCPUs.
12898 if (len(cpu_list) > 1 and
12899 len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
12900 raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
12902 (self.be_proposed[constants.BE_VCPUS],
12903 self.hv_proposed[constants.HV_CPU_MASK]),
12904 errors.ECODE_INVAL)
12906 # Only perform this test if a new CPU mask is given
12907 if constants.HV_CPU_MASK in self.hv_new:
12908 # Calculate the largest CPU number requested
12909 max_requested_cpu = max(map(max, cpu_list))
12910 # Check that all of the instance's nodes have enough physical CPUs to
12911 # satisfy the requested CPU mask
12912 _CheckNodesPhysicalCPUs(self, instance.all_nodes,
12913 max_requested_cpu + 1, instance.hypervisor)
12915 # osparams processing
12916 if self.op.osparams:
12917 i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
12918 _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
12919 self.os_inst = i_osdict # the new dict (without defaults)
12925 #TODO(dynmem): do the appropriate check involving MINMEM
12926 if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
12927 be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
12928 mem_check_list = [pnode]
12929 if be_new[constants.BE_AUTO_BALANCE]:
12930 # either we changed auto_balance to yes or it was from before
12931 mem_check_list.extend(instance.secondary_nodes)
12932 instance_info = self.rpc.call_instance_info(pnode, instance.name,
12933 instance.hypervisor)
12934 nodeinfo = self.rpc.call_node_info(mem_check_list, None,
12935 [instance.hypervisor])
12936 pninfo = nodeinfo[pnode]
12937 msg = pninfo.fail_msg
12939 # Assume the primary node is unreachable and go ahead
12940 self.warn.append("Can't get info from primary node %s: %s" %
12943 (_, _, (pnhvinfo, )) = pninfo.payload
12944 if not isinstance(pnhvinfo.get("memory_free", None), int):
12945 self.warn.append("Node data from primary node %s doesn't contain"
12946 " free memory information" % pnode)
12947 elif instance_info.fail_msg:
12948 self.warn.append("Can't get instance runtime information: %s" %
12949 instance_info.fail_msg)
12951 if instance_info.payload:
12952 current_mem = int(instance_info.payload["memory"])
12954 # Assume instance not running
12955 # (there is a slight race condition here, but it's not very
12956 # probable, and we have no other way to check)
12957 # TODO: Describe race condition
12959 #TODO(dynmem): do the appropriate check involving MINMEM
12960 miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
12961 pnhvinfo["memory_free"])
12963 raise errors.OpPrereqError("This change will prevent the instance"
12964 " from starting, due to %d MB of memory"
12965 " missing on its primary node" %
12966 miss_mem, errors.ECODE_NORES)
12968 if be_new[constants.BE_AUTO_BALANCE]:
12969 for node, nres in nodeinfo.items():
12970 if node not in instance.secondary_nodes:
12972 nres.Raise("Can't get info from secondary node %s" % node,
12973 prereq=True, ecode=errors.ECODE_STATE)
12974 (_, _, (nhvinfo, )) = nres.payload
12975 if not isinstance(nhvinfo.get("memory_free", None), int):
12976 raise errors.OpPrereqError("Secondary node %s didn't return free"
12977 " memory information" % node,
12978 errors.ECODE_STATE)
12979 #TODO(dynmem): do the appropriate check involving MINMEM
12980 elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
12981 raise errors.OpPrereqError("This change will prevent the instance"
12982 " from failover to its secondary node"
12983 " %s, due to not enough memory" % node,
12984 errors.ECODE_STATE)
12986 if self.op.runtime_mem:
12987 remote_info = self.rpc.call_instance_info(instance.primary_node,
12989 instance.hypervisor)
12990 remote_info.Raise("Error checking node %s" % instance.primary_node)
12991 if not remote_info.payload: # not running already
12992 raise errors.OpPrereqError("Instance %s is not running" %
12993 instance.name, errors.ECODE_STATE)
12995 current_memory = remote_info.payload["memory"]
12996 if (not self.op.force and
12997 (self.op.runtime_mem > self.be_proposed[constants.BE_MAXMEM] or
12998 self.op.runtime_mem < self.be_proposed[constants.BE_MINMEM])):
12999 raise errors.OpPrereqError("Instance %s must have memory between %d"
13000 " and %d MB of memory unless --force is"
13003 self.be_proposed[constants.BE_MINMEM],
13004 self.be_proposed[constants.BE_MAXMEM]),
13005 errors.ECODE_INVAL)
13007 delta = self.op.runtime_mem - current_memory
13009 _CheckNodeFreeMemory(self, instance.primary_node,
13010 "ballooning memory for instance %s" %
13011 instance.name, delta, instance.hypervisor)
13013 if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
13014 raise errors.OpPrereqError("Disk operations not supported for"
13015 " diskless instances", errors.ECODE_INVAL)
13017 def _PrepareNicCreate(_, params, private):
13018 self._PrepareNicModification(params, private, None, {}, cluster, pnode)
13019 return (None, None)
13021 def _PrepareNicMod(_, nic, params, private):
13022 self._PrepareNicModification(params, private, nic.ip,
13023 nic.nicparams, cluster, pnode)
13026 # Verify NIC changes (operating on copy)
13027 nics = instance.nics[:]
13028 ApplyContainerMods("NIC", nics, None, self.nicmod,
13029 _PrepareNicCreate, _PrepareNicMod, None)
13030 if len(nics) > constants.MAX_NICS:
13031 raise errors.OpPrereqError("Instance has too many network interfaces"
13032 " (%d), cannot add more" % constants.MAX_NICS,
13033 errors.ECODE_STATE)
13035 # Verify disk changes (operating on a copy)
13036 disks = instance.disks[:]
13037 ApplyContainerMods("disk", disks, None, self.diskmod, None, None, None)
13038 if len(disks) > constants.MAX_DISKS:
13039 raise errors.OpPrereqError("Instance has too many disks (%d), cannot add"
13040 " more" % constants.MAX_DISKS,
13041 errors.ECODE_STATE)
13043 if self.op.offline is not None:
13044 if self.op.offline:
13045 msg = "can't change to offline"
13047 msg = "can't change to online"
13048 _CheckInstanceState(self, instance, CAN_CHANGE_INSTANCE_OFFLINE, msg=msg)
13050 # Pre-compute NIC changes (necessary to use result in hooks)
13051 self._nic_chgdesc = []
13053 # Operate on copies as this is still in prereq
13054 nics = [nic.Copy() for nic in instance.nics]
13055 ApplyContainerMods("NIC", nics, self._nic_chgdesc, self.nicmod,
13056 self._CreateNewNic, self._ApplyNicMods, None)
13057 self._new_nics = nics
13059 self._new_nics = None
13061 def _ConvertPlainToDrbd(self, feedback_fn):
13062 """Converts an instance from plain to drbd.
13065 feedback_fn("Converting template to drbd")
13066 instance = self.instance
13067 pnode = instance.primary_node
13068 snode = self.op.remote_node
13070 assert instance.disk_template == constants.DT_PLAIN
13072 # create a fake disk info for _GenerateDiskTemplate
13073 disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
13074 constants.IDISK_VG: d.logical_id[0]}
13075 for d in instance.disks]
13076 new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
13077 instance.name, pnode, [snode],
13078 disk_info, None, None, 0, feedback_fn,
13080 anno_disks = rpc.AnnotateDiskParams(constants.DT_DRBD8, new_disks,
13082 info = _GetInstanceInfoText(instance)
13083 feedback_fn("Creating additional volumes...")
13084 # first, create the missing data and meta devices
13085 for disk in anno_disks:
13086 # unfortunately this is... not too nice
13087 _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
13089 for child in disk.children:
13090 _CreateSingleBlockDev(self, snode, instance, child, info, True)
13091 # at this stage, all new LVs have been created, we can rename the
13093 feedback_fn("Renaming original volumes...")
13094 rename_list = [(o, n.children[0].logical_id)
13095 for (o, n) in zip(instance.disks, new_disks)]
13096 result = self.rpc.call_blockdev_rename(pnode, rename_list)
13097 result.Raise("Failed to rename original LVs")
13099 feedback_fn("Initializing DRBD devices...")
13100 # all child devices are in place, we can now create the DRBD devices
13101 for disk in anno_disks:
13102 for node in [pnode, snode]:
13103 f_create = node == pnode
13104 _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
13106 # at this point, the instance has been modified
13107 instance.disk_template = constants.DT_DRBD8
13108 instance.disks = new_disks
13109 self.cfg.Update(instance, feedback_fn)
13111 # Release node locks while waiting for sync
13112 _ReleaseLocks(self, locking.LEVEL_NODE)
13114 # disks are created, waiting for sync
13115 disk_abort = not _WaitForSync(self, instance,
13116 oneshot=not self.op.wait_for_sync)
13118 raise errors.OpExecError("There are some degraded disks for"
13119 " this instance, please cleanup manually")
13121 # Node resource locks will be released by caller
13123 def _ConvertDrbdToPlain(self, feedback_fn):
13124 """Converts an instance from drbd to plain.
13127 instance = self.instance
13129 assert len(instance.secondary_nodes) == 1
13130 assert instance.disk_template == constants.DT_DRBD8
13132 pnode = instance.primary_node
13133 snode = instance.secondary_nodes[0]
13134 feedback_fn("Converting template to plain")
13136 old_disks = _AnnotateDiskParams(instance, instance.disks, self.cfg)
13137 new_disks = [d.children[0] for d in instance.disks]
13139 # copy over size and mode
13140 for parent, child in zip(old_disks, new_disks):
13141 child.size = parent.size
13142 child.mode = parent.mode
13144 # this is a DRBD disk, return its port to the pool
13145 # NOTE: this must be done right before the call to cfg.Update!
13146 for disk in old_disks:
13147 tcp_port = disk.logical_id[2]
13148 self.cfg.AddTcpUdpPort(tcp_port)
13150 # update instance structure
13151 instance.disks = new_disks
13152 instance.disk_template = constants.DT_PLAIN
13153 self.cfg.Update(instance, feedback_fn)
13155 # Release locks in case removing disks takes a while
13156 _ReleaseLocks(self, locking.LEVEL_NODE)
13158 feedback_fn("Removing volumes on the secondary node...")
13159 for disk in old_disks:
13160 self.cfg.SetDiskID(disk, snode)
13161 msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
13163 self.LogWarning("Could not remove block device %s on node %s,"
13164 " continuing anyway: %s", disk.iv_name, snode, msg)
13166 feedback_fn("Removing unneeded volumes on the primary node...")
13167 for idx, disk in enumerate(old_disks):
13168 meta = disk.children[1]
13169 self.cfg.SetDiskID(meta, pnode)
13170 msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
13172 self.LogWarning("Could not remove metadata for disk %d on node %s,"
13173 " continuing anyway: %s", idx, pnode, msg)
13175 def _CreateNewDisk(self, idx, params, _):
13176 """Creates a new disk.
13179 instance = self.instance
13182 if instance.disk_template in constants.DTS_FILEBASED:
13183 (file_driver, file_path) = instance.disks[0].logical_id
13184 file_path = os.path.dirname(file_path)
13186 file_driver = file_path = None
13189 _GenerateDiskTemplate(self, instance.disk_template, instance.name,
13190 instance.primary_node, instance.secondary_nodes,
13191 [params], file_path, file_driver, idx,
13192 self.Log, self.diskparams)[0]
13194 info = _GetInstanceInfoText(instance)
13196 logging.info("Creating volume %s for instance %s",
13197 disk.iv_name, instance.name)
13198 # Note: this needs to be kept in sync with _CreateDisks
13200 for node in instance.all_nodes:
13201 f_create = (node == instance.primary_node)
13203 _CreateBlockDev(self, node, instance, disk, f_create, info, f_create)
13204 except errors.OpExecError, err:
13205 self.LogWarning("Failed to create volume %s (%s) on node '%s': %s",
13206 disk.iv_name, disk, node, err)
13209 ("disk/%d" % idx, "add:size=%s,mode=%s" % (disk.size, disk.mode)),
13213 def _ModifyDisk(idx, disk, params, _):
13214 """Modifies a disk.
13217 disk.mode = params[constants.IDISK_MODE]
13220 ("disk.mode/%d" % idx, disk.mode),
13223 def _RemoveDisk(self, idx, root, _):
13227 (anno_disk,) = _AnnotateDiskParams(self.instance, [root], self.cfg)
13228 for node, disk in anno_disk.ComputeNodeTree(self.instance.primary_node):
13229 self.cfg.SetDiskID(disk, node)
13230 msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
13232 self.LogWarning("Could not remove disk/%d on node '%s': %s,"
13233 " continuing anyway", idx, node, msg)
13235 # if this is a DRBD disk, return its port to the pool
13236 if root.dev_type in constants.LDS_DRBD:
13237 self.cfg.AddTcpUdpPort(root.logical_id[2])
13240 def _CreateNewNic(idx, params, private):
13241 """Creates data structure for a new network interface.
13244 mac = params[constants.INIC_MAC]
13245 ip = params.get(constants.INIC_IP, None)
13246 nicparams = private.params
13248 return (objects.NIC(mac=mac, ip=ip, nicparams=nicparams), [
13250 "add:mac=%s,ip=%s,mode=%s,link=%s" %
13251 (mac, ip, private.filled[constants.NIC_MODE],
13252 private.filled[constants.NIC_LINK])),
13256 def _ApplyNicMods(idx, nic, params, private):
13257 """Modifies a network interface.
13262 for key in [constants.INIC_MAC, constants.INIC_IP]:
13264 changes.append(("nic.%s/%d" % (key, idx), params[key]))
13265 setattr(nic, key, params[key])
13268 nic.nicparams = private.params
13270 for (key, val) in params.items():
13271 changes.append(("nic.%s/%d" % (key, idx), val))
13275 def Exec(self, feedback_fn):
13276 """Modifies an instance.
13278 All parameters take effect only at the next restart of the instance.
13281 # Process here the warnings from CheckPrereq, as we don't have a
13282 # feedback_fn there.
13283 # TODO: Replace with self.LogWarning
13284 for warn in self.warn:
13285 feedback_fn("WARNING: %s" % warn)
13287 assert ((self.op.disk_template is None) ^
13288 bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
13289 "Not owning any node resource locks"
13292 instance = self.instance
13295 if self.op.runtime_mem:
13296 rpcres = self.rpc.call_instance_balloon_memory(instance.primary_node,
13298 self.op.runtime_mem)
13299 rpcres.Raise("Cannot modify instance runtime memory")
13300 result.append(("runtime_memory", self.op.runtime_mem))
13302 # Apply disk changes
13303 ApplyContainerMods("disk", instance.disks, result, self.diskmod,
13304 self._CreateNewDisk, self._ModifyDisk, self._RemoveDisk)
13305 _UpdateIvNames(0, instance.disks)
13307 if self.op.disk_template:
13309 check_nodes = set(instance.all_nodes)
13310 if self.op.remote_node:
13311 check_nodes.add(self.op.remote_node)
13312 for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
13313 owned = self.owned_locks(level)
13314 assert not (check_nodes - owned), \
13315 ("Not owning the correct locks, owning %r, expected at least %r" %
13316 (owned, check_nodes))
13318 r_shut = _ShutdownInstanceDisks(self, instance)
13320 raise errors.OpExecError("Cannot shutdown instance disks, unable to"
13321 " proceed with disk template conversion")
13322 mode = (instance.disk_template, self.op.disk_template)
13324 self._DISK_CONVERSIONS[mode](self, feedback_fn)
13326 self.cfg.ReleaseDRBDMinors(instance.name)
13328 result.append(("disk_template", self.op.disk_template))
13330 assert instance.disk_template == self.op.disk_template, \
13331 ("Expected disk template '%s', found '%s'" %
13332 (self.op.disk_template, instance.disk_template))
13334 # Release node and resource locks if there are any (they might already have
13335 # been released during disk conversion)
13336 _ReleaseLocks(self, locking.LEVEL_NODE)
13337 _ReleaseLocks(self, locking.LEVEL_NODE_RES)
13339 # Apply NIC changes
13340 if self._new_nics is not None:
13341 instance.nics = self._new_nics
13342 result.extend(self._nic_chgdesc)
13345 if self.op.hvparams:
13346 instance.hvparams = self.hv_inst
13347 for key, val in self.op.hvparams.iteritems():
13348 result.append(("hv/%s" % key, val))
13351 if self.op.beparams:
13352 instance.beparams = self.be_inst
13353 for key, val in self.op.beparams.iteritems():
13354 result.append(("be/%s" % key, val))
13357 if self.op.os_name:
13358 instance.os = self.op.os_name
13361 if self.op.osparams:
13362 instance.osparams = self.os_inst
13363 for key, val in self.op.osparams.iteritems():
13364 result.append(("os/%s" % key, val))
13366 if self.op.offline is None:
13369 elif self.op.offline:
13370 # Mark instance as offline
13371 self.cfg.MarkInstanceOffline(instance.name)
13372 result.append(("admin_state", constants.ADMINST_OFFLINE))
13374 # Mark instance as online, but stopped
13375 self.cfg.MarkInstanceDown(instance.name)
13376 result.append(("admin_state", constants.ADMINST_DOWN))
13378 self.cfg.Update(instance, feedback_fn)
13380 assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
13381 self.owned_locks(locking.LEVEL_NODE)), \
13382 "All node locks should have been released by now"
13386 _DISK_CONVERSIONS = {
13387 (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
13388 (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
13392 class LUInstanceChangeGroup(LogicalUnit):
13393 HPATH = "instance-change-group"
13394 HTYPE = constants.HTYPE_INSTANCE
13397 def ExpandNames(self):
13398 self.share_locks = _ShareAll()
13399 self.needed_locks = {
13400 locking.LEVEL_NODEGROUP: [],
13401 locking.LEVEL_NODE: [],
13404 self._ExpandAndLockInstance()
13406 if self.op.target_groups:
13407 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13408 self.op.target_groups)
13410 self.req_target_uuids = None
13412 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13414 def DeclareLocks(self, level):
13415 if level == locking.LEVEL_NODEGROUP:
13416 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13418 if self.req_target_uuids:
13419 lock_groups = set(self.req_target_uuids)
13421 # Lock all groups used by instance optimistically; this requires going
13422 # via the node before it's locked, requiring verification later on
13423 instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
13424 lock_groups.update(instance_groups)
13426 # No target groups, need to lock all of them
13427 lock_groups = locking.ALL_SET
13429 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13431 elif level == locking.LEVEL_NODE:
13432 if self.req_target_uuids:
13433 # Lock all nodes used by instances
13434 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13435 self._LockInstancesNodes()
13437 # Lock all nodes in all potential target groups
13438 lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
13439 self.cfg.GetInstanceNodeGroups(self.op.instance_name))
13440 member_nodes = [node_name
13441 for group in lock_groups
13442 for node_name in self.cfg.GetNodeGroup(group).members]
13443 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13445 # Lock all nodes as all groups are potential targets
13446 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13448 def CheckPrereq(self):
13449 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13450 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13451 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13453 assert (self.req_target_uuids is None or
13454 owned_groups.issuperset(self.req_target_uuids))
13455 assert owned_instances == set([self.op.instance_name])
13457 # Get instance information
13458 self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
13460 # Check if node groups for locked instance are still correct
13461 assert owned_nodes.issuperset(self.instance.all_nodes), \
13462 ("Instance %s's nodes changed while we kept the lock" %
13463 self.op.instance_name)
13465 inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
13468 if self.req_target_uuids:
13469 # User requested specific target groups
13470 self.target_uuids = frozenset(self.req_target_uuids)
13472 # All groups except those used by the instance are potential targets
13473 self.target_uuids = owned_groups - inst_groups
13475 conflicting_groups = self.target_uuids & inst_groups
13476 if conflicting_groups:
13477 raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
13478 " used by the instance '%s'" %
13479 (utils.CommaJoin(conflicting_groups),
13480 self.op.instance_name),
13481 errors.ECODE_INVAL)
13483 if not self.target_uuids:
13484 raise errors.OpPrereqError("There are no possible target groups",
13485 errors.ECODE_INVAL)
13487 def BuildHooksEnv(self):
13488 """Build hooks env.
13491 assert self.target_uuids
13494 "TARGET_GROUPS": " ".join(self.target_uuids),
13497 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13501 def BuildHooksNodes(self):
13502 """Build hooks nodes.
13505 mn = self.cfg.GetMasterNode()
13506 return ([mn], [mn])
13508 def Exec(self, feedback_fn):
13509 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13511 assert instances == [self.op.instance_name], "Instance not locked"
13513 req = iallocator.IAReqGroupChange(instances=instances,
13514 target_groups=list(self.target_uuids))
13515 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
13517 ial.Run(self.op.iallocator)
13519 if not ial.success:
13520 raise errors.OpPrereqError("Can't compute solution for changing group of"
13521 " instance '%s' using iallocator '%s': %s" %
13522 (self.op.instance_name, self.op.iallocator,
13523 ial.info), errors.ECODE_NORES)
13525 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13527 self.LogInfo("Iallocator returned %s job(s) for changing group of"
13528 " instance '%s'", len(jobs), self.op.instance_name)
13530 return ResultWithJobs(jobs)
13533 class LUBackupQuery(NoHooksLU):
13534 """Query the exports list
13539 def CheckArguments(self):
13540 self.expq = _ExportQuery(qlang.MakeSimpleFilter("node", self.op.nodes),
13541 ["node", "export"], self.op.use_locking)
13543 def ExpandNames(self):
13544 self.expq.ExpandNames(self)
13546 def DeclareLocks(self, level):
13547 self.expq.DeclareLocks(self, level)
13549 def Exec(self, feedback_fn):
13552 for (node, expname) in self.expq.OldStyleQuery(self):
13553 if expname is None:
13554 result[node] = False
13556 result.setdefault(node, []).append(expname)
13561 class _ExportQuery(_QueryBase):
13562 FIELDS = query.EXPORT_FIELDS
13564 #: The node name is not a unique key for this query
13565 SORT_FIELD = "node"
13567 def ExpandNames(self, lu):
13568 lu.needed_locks = {}
13570 # The following variables interact with _QueryBase._GetNames
13572 self.wanted = _GetWantedNodes(lu, self.names)
13574 self.wanted = locking.ALL_SET
13576 self.do_locking = self.use_locking
13578 if self.do_locking:
13579 lu.share_locks = _ShareAll()
13580 lu.needed_locks = {
13581 locking.LEVEL_NODE: self.wanted,
13584 def DeclareLocks(self, lu, level):
13587 def _GetQueryData(self, lu):
13588 """Computes the list of nodes and their attributes.
13591 # Locking is not used
13593 assert not (compat.any(lu.glm.is_owned(level)
13594 for level in locking.LEVELS
13595 if level != locking.LEVEL_CLUSTER) or
13596 self.do_locking or self.use_locking)
13598 nodes = self._GetNames(lu, lu.cfg.GetNodeList(), locking.LEVEL_NODE)
13602 for (node, nres) in lu.rpc.call_export_list(nodes).items():
13604 result.append((node, None))
13606 result.extend((node, expname) for expname in nres.payload)
13611 class LUBackupPrepare(NoHooksLU):
13612 """Prepares an instance for an export and returns useful information.
13617 def ExpandNames(self):
13618 self._ExpandAndLockInstance()
13620 def CheckPrereq(self):
13621 """Check prerequisites.
13624 instance_name = self.op.instance_name
13626 self.instance = self.cfg.GetInstanceInfo(instance_name)
13627 assert self.instance is not None, \
13628 "Cannot retrieve locked instance %s" % self.op.instance_name
13629 _CheckNodeOnline(self, self.instance.primary_node)
13631 self._cds = _GetClusterDomainSecret()
13633 def Exec(self, feedback_fn):
13634 """Prepares an instance for an export.
13637 instance = self.instance
13639 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13640 salt = utils.GenerateSecret(8)
13642 feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
13643 result = self.rpc.call_x509_cert_create(instance.primary_node,
13644 constants.RIE_CERT_VALIDITY)
13645 result.Raise("Can't create X509 key and certificate on %s" % result.node)
13647 (name, cert_pem) = result.payload
13649 cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
13653 "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
13654 "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
13656 "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
13662 class LUBackupExport(LogicalUnit):
13663 """Export an instance to an image in the cluster.
13666 HPATH = "instance-export"
13667 HTYPE = constants.HTYPE_INSTANCE
13670 def CheckArguments(self):
13671 """Check the arguments.
13674 self.x509_key_name = self.op.x509_key_name
13675 self.dest_x509_ca_pem = self.op.destination_x509_ca
13677 if self.op.mode == constants.EXPORT_MODE_REMOTE:
13678 if not self.x509_key_name:
13679 raise errors.OpPrereqError("Missing X509 key name for encryption",
13680 errors.ECODE_INVAL)
13682 if not self.dest_x509_ca_pem:
13683 raise errors.OpPrereqError("Missing destination X509 CA",
13684 errors.ECODE_INVAL)
13686 def ExpandNames(self):
13687 self._ExpandAndLockInstance()
13689 # Lock all nodes for local exports
13690 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13691 # FIXME: lock only instance primary and destination node
13693 # Sad but true, for now we have do lock all nodes, as we don't know where
13694 # the previous export might be, and in this LU we search for it and
13695 # remove it from its current node. In the future we could fix this by:
13696 # - making a tasklet to search (share-lock all), then create the
13697 # new one, then one to remove, after
13698 # - removing the removal operation altogether
13699 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13701 def DeclareLocks(self, level):
13702 """Last minute lock declaration."""
13703 # All nodes are locked anyway, so nothing to do here.
13705 def BuildHooksEnv(self):
13706 """Build hooks env.
13708 This will run on the master, primary node and target node.
13712 "EXPORT_MODE": self.op.mode,
13713 "EXPORT_NODE": self.op.target_node,
13714 "EXPORT_DO_SHUTDOWN": self.op.shutdown,
13715 "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
13716 # TODO: Generic function for boolean env variables
13717 "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
13720 env.update(_BuildInstanceHookEnvByObject(self, self.instance))
13724 def BuildHooksNodes(self):
13725 """Build hooks nodes.
13728 nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
13730 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13731 nl.append(self.op.target_node)
13735 def CheckPrereq(self):
13736 """Check prerequisites.
13738 This checks that the instance and node names are valid.
13741 instance_name = self.op.instance_name
13743 self.instance = self.cfg.GetInstanceInfo(instance_name)
13744 assert self.instance is not None, \
13745 "Cannot retrieve locked instance %s" % self.op.instance_name
13746 _CheckNodeOnline(self, self.instance.primary_node)
13748 if (self.op.remove_instance and
13749 self.instance.admin_state == constants.ADMINST_UP and
13750 not self.op.shutdown):
13751 raise errors.OpPrereqError("Can not remove instance without shutting it"
13752 " down before", errors.ECODE_STATE)
13754 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13755 self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
13756 self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
13757 assert self.dst_node is not None
13759 _CheckNodeOnline(self, self.dst_node.name)
13760 _CheckNodeNotDrained(self, self.dst_node.name)
13763 self.dest_disk_info = None
13764 self.dest_x509_ca = None
13766 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13767 self.dst_node = None
13769 if len(self.op.target_node) != len(self.instance.disks):
13770 raise errors.OpPrereqError(("Received destination information for %s"
13771 " disks, but instance %s has %s disks") %
13772 (len(self.op.target_node), instance_name,
13773 len(self.instance.disks)),
13774 errors.ECODE_INVAL)
13776 cds = _GetClusterDomainSecret()
13778 # Check X509 key name
13780 (key_name, hmac_digest, hmac_salt) = self.x509_key_name
13781 except (TypeError, ValueError), err:
13782 raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err,
13783 errors.ECODE_INVAL)
13785 if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
13786 raise errors.OpPrereqError("HMAC for X509 key name is wrong",
13787 errors.ECODE_INVAL)
13789 # Load and verify CA
13791 (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
13792 except OpenSSL.crypto.Error, err:
13793 raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
13794 (err, ), errors.ECODE_INVAL)
13796 (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
13797 if errcode is not None:
13798 raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
13799 (msg, ), errors.ECODE_INVAL)
13801 self.dest_x509_ca = cert
13803 # Verify target information
13805 for idx, disk_data in enumerate(self.op.target_node):
13807 (host, port, magic) = \
13808 masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
13809 except errors.GenericError, err:
13810 raise errors.OpPrereqError("Target info for disk %s: %s" %
13811 (idx, err), errors.ECODE_INVAL)
13813 disk_info.append((host, port, magic))
13815 assert len(disk_info) == len(self.op.target_node)
13816 self.dest_disk_info = disk_info
13819 raise errors.ProgrammerError("Unhandled export mode %r" %
13822 # instance disk type verification
13823 # TODO: Implement export support for file-based disks
13824 for disk in self.instance.disks:
13825 if disk.dev_type == constants.LD_FILE:
13826 raise errors.OpPrereqError("Export not supported for instances with"
13827 " file-based disks", errors.ECODE_INVAL)
13829 def _CleanupExports(self, feedback_fn):
13830 """Removes exports of current instance from all other nodes.
13832 If an instance in a cluster with nodes A..D was exported to node C, its
13833 exports will be removed from the nodes A, B and D.
13836 assert self.op.mode != constants.EXPORT_MODE_REMOTE
13838 nodelist = self.cfg.GetNodeList()
13839 nodelist.remove(self.dst_node.name)
13841 # on one-node clusters nodelist will be empty after the removal
13842 # if we proceed the backup would be removed because OpBackupQuery
13843 # substitutes an empty list with the full cluster node list.
13844 iname = self.instance.name
13846 feedback_fn("Removing old exports for instance %s" % iname)
13847 exportlist = self.rpc.call_export_list(nodelist)
13848 for node in exportlist:
13849 if exportlist[node].fail_msg:
13851 if iname in exportlist[node].payload:
13852 msg = self.rpc.call_export_remove(node, iname).fail_msg
13854 self.LogWarning("Could not remove older export for instance %s"
13855 " on node %s: %s", iname, node, msg)
13857 def Exec(self, feedback_fn):
13858 """Export an instance to an image in the cluster.
13861 assert self.op.mode in constants.EXPORT_MODES
13863 instance = self.instance
13864 src_node = instance.primary_node
13866 if self.op.shutdown:
13867 # shutdown the instance, but not the disks
13868 feedback_fn("Shutting down instance %s" % instance.name)
13869 result = self.rpc.call_instance_shutdown(src_node, instance,
13870 self.op.shutdown_timeout)
13871 # TODO: Maybe ignore failures if ignore_remove_failures is set
13872 result.Raise("Could not shutdown instance %s on"
13873 " node %s" % (instance.name, src_node))
13875 # set the disks ID correctly since call_instance_start needs the
13876 # correct drbd minor to create the symlinks
13877 for disk in instance.disks:
13878 self.cfg.SetDiskID(disk, src_node)
13880 activate_disks = (instance.admin_state != constants.ADMINST_UP)
13883 # Activate the instance disks if we'exporting a stopped instance
13884 feedback_fn("Activating disks for %s" % instance.name)
13885 _StartInstanceDisks(self, instance, None)
13888 helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
13891 helper.CreateSnapshots()
13893 if (self.op.shutdown and
13894 instance.admin_state == constants.ADMINST_UP and
13895 not self.op.remove_instance):
13896 assert not activate_disks
13897 feedback_fn("Starting instance %s" % instance.name)
13898 result = self.rpc.call_instance_start(src_node,
13899 (instance, None, None), False)
13900 msg = result.fail_msg
13902 feedback_fn("Failed to start instance: %s" % msg)
13903 _ShutdownInstanceDisks(self, instance)
13904 raise errors.OpExecError("Could not start instance: %s" % msg)
13906 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13907 (fin_resu, dresults) = helper.LocalExport(self.dst_node)
13908 elif self.op.mode == constants.EXPORT_MODE_REMOTE:
13909 connect_timeout = constants.RIE_CONNECT_TIMEOUT
13910 timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
13912 (key_name, _, _) = self.x509_key_name
13915 OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
13918 (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
13919 key_name, dest_ca_pem,
13924 # Check for backwards compatibility
13925 assert len(dresults) == len(instance.disks)
13926 assert compat.all(isinstance(i, bool) for i in dresults), \
13927 "Not all results are boolean: %r" % dresults
13931 feedback_fn("Deactivating disks for %s" % instance.name)
13932 _ShutdownInstanceDisks(self, instance)
13934 if not (compat.all(dresults) and fin_resu):
13937 failures.append("export finalization")
13938 if not compat.all(dresults):
13939 fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
13941 failures.append("disk export: disk(s) %s" % fdsk)
13943 raise errors.OpExecError("Export failed, errors in %s" %
13944 utils.CommaJoin(failures))
13946 # At this point, the export was successful, we can cleanup/finish
13948 # Remove instance if requested
13949 if self.op.remove_instance:
13950 feedback_fn("Removing instance %s" % instance.name)
13951 _RemoveInstance(self, feedback_fn, instance,
13952 self.op.ignore_remove_failures)
13954 if self.op.mode == constants.EXPORT_MODE_LOCAL:
13955 self._CleanupExports(feedback_fn)
13957 return fin_resu, dresults
13960 class LUBackupRemove(NoHooksLU):
13961 """Remove exports related to the named instance.
13966 def ExpandNames(self):
13967 self.needed_locks = {}
13968 # We need all nodes to be locked in order for RemoveExport to work, but we
13969 # don't need to lock the instance itself, as nothing will happen to it (and
13970 # we can remove exports also for a removed instance)
13971 self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
13973 def Exec(self, feedback_fn):
13974 """Remove any export.
13977 instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
13978 # If the instance was not found we'll try with the name that was passed in.
13979 # This will only work if it was an FQDN, though.
13981 if not instance_name:
13983 instance_name = self.op.instance_name
13985 locked_nodes = self.owned_locks(locking.LEVEL_NODE)
13986 exportlist = self.rpc.call_export_list(locked_nodes)
13988 for node in exportlist:
13989 msg = exportlist[node].fail_msg
13991 self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
13993 if instance_name in exportlist[node].payload:
13995 result = self.rpc.call_export_remove(node, instance_name)
13996 msg = result.fail_msg
13998 logging.error("Could not remove export for instance %s"
13999 " on node %s: %s", instance_name, node, msg)
14001 if fqdn_warn and not found:
14002 feedback_fn("Export not found. If trying to remove an export belonging"
14003 " to a deleted instance please use its Fully Qualified"
14007 class LUGroupAdd(LogicalUnit):
14008 """Logical unit for creating node groups.
14011 HPATH = "group-add"
14012 HTYPE = constants.HTYPE_GROUP
14015 def ExpandNames(self):
14016 # We need the new group's UUID here so that we can create and acquire the
14017 # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
14018 # that it should not check whether the UUID exists in the configuration.
14019 self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
14020 self.needed_locks = {}
14021 self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14023 def CheckPrereq(self):
14024 """Check prerequisites.
14026 This checks that the given group name is not an existing node group
14031 existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14032 except errors.OpPrereqError:
14035 raise errors.OpPrereqError("Desired group name '%s' already exists as a"
14036 " node group (UUID: %s)" %
14037 (self.op.group_name, existing_uuid),
14038 errors.ECODE_EXISTS)
14040 if self.op.ndparams:
14041 utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
14043 if self.op.hv_state:
14044 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state, None)
14046 self.new_hv_state = None
14048 if self.op.disk_state:
14049 self.new_disk_state = _MergeAndVerifyDiskState(self.op.disk_state, None)
14051 self.new_disk_state = None
14053 if self.op.diskparams:
14054 for templ in constants.DISK_TEMPLATES:
14055 if templ in self.op.diskparams:
14056 utils.ForceDictType(self.op.diskparams[templ],
14057 constants.DISK_DT_TYPES)
14058 self.new_diskparams = self.op.diskparams
14060 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14061 except errors.OpPrereqError, err:
14062 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14063 errors.ECODE_INVAL)
14065 self.new_diskparams = {}
14067 if self.op.ipolicy:
14068 cluster = self.cfg.GetClusterInfo()
14069 full_ipolicy = cluster.SimpleFillIPolicy(self.op.ipolicy)
14071 objects.InstancePolicy.CheckParameterSyntax(full_ipolicy, False)
14072 except errors.ConfigurationError, err:
14073 raise errors.OpPrereqError("Invalid instance policy: %s" % err,
14074 errors.ECODE_INVAL)
14076 def BuildHooksEnv(self):
14077 """Build hooks env.
14081 "GROUP_NAME": self.op.group_name,
14084 def BuildHooksNodes(self):
14085 """Build hooks nodes.
14088 mn = self.cfg.GetMasterNode()
14089 return ([mn], [mn])
14091 def Exec(self, feedback_fn):
14092 """Add the node group to the cluster.
14095 group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
14096 uuid=self.group_uuid,
14097 alloc_policy=self.op.alloc_policy,
14098 ndparams=self.op.ndparams,
14099 diskparams=self.new_diskparams,
14100 ipolicy=self.op.ipolicy,
14101 hv_state_static=self.new_hv_state,
14102 disk_state_static=self.new_disk_state)
14104 self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
14105 del self.remove_locks[locking.LEVEL_NODEGROUP]
14108 class LUGroupAssignNodes(NoHooksLU):
14109 """Logical unit for assigning nodes to groups.
14114 def ExpandNames(self):
14115 # These raise errors.OpPrereqError on their own:
14116 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14117 self.op.nodes = _GetWantedNodes(self, self.op.nodes)
14119 # We want to lock all the affected nodes and groups. We have readily
14120 # available the list of nodes, and the *destination* group. To gather the
14121 # list of "source" groups, we need to fetch node information later on.
14122 self.needed_locks = {
14123 locking.LEVEL_NODEGROUP: set([self.group_uuid]),
14124 locking.LEVEL_NODE: self.op.nodes,
14127 def DeclareLocks(self, level):
14128 if level == locking.LEVEL_NODEGROUP:
14129 assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
14131 # Try to get all affected nodes' groups without having the group or node
14132 # lock yet. Needs verification later in the code flow.
14133 groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
14135 self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
14137 def CheckPrereq(self):
14138 """Check prerequisites.
14141 assert self.needed_locks[locking.LEVEL_NODEGROUP]
14142 assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
14143 frozenset(self.op.nodes))
14145 expected_locks = (set([self.group_uuid]) |
14146 self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
14147 actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
14148 if actual_locks != expected_locks:
14149 raise errors.OpExecError("Nodes changed groups since locks were acquired,"
14150 " current groups are '%s', used to be '%s'" %
14151 (utils.CommaJoin(expected_locks),
14152 utils.CommaJoin(actual_locks)))
14154 self.node_data = self.cfg.GetAllNodesInfo()
14155 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14156 instance_data = self.cfg.GetAllInstancesInfo()
14158 if self.group is None:
14159 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14160 (self.op.group_name, self.group_uuid))
14162 (new_splits, previous_splits) = \
14163 self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
14164 for node in self.op.nodes],
14165 self.node_data, instance_data)
14168 fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
14170 if not self.op.force:
14171 raise errors.OpExecError("The following instances get split by this"
14172 " change and --force was not given: %s" %
14175 self.LogWarning("This operation will split the following instances: %s",
14178 if previous_splits:
14179 self.LogWarning("In addition, these already-split instances continue"
14180 " to be split across groups: %s",
14181 utils.CommaJoin(utils.NiceSort(previous_splits)))
14183 def Exec(self, feedback_fn):
14184 """Assign nodes to a new group.
14187 mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
14189 self.cfg.AssignGroupNodes(mods)
14192 def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
14193 """Check for split instances after a node assignment.
14195 This method considers a series of node assignments as an atomic operation,
14196 and returns information about split instances after applying the set of
14199 In particular, it returns information about newly split instances, and
14200 instances that were already split, and remain so after the change.
14202 Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
14205 @type changes: list of (node_name, new_group_uuid) pairs.
14206 @param changes: list of node assignments to consider.
14207 @param node_data: a dict with data for all nodes
14208 @param instance_data: a dict with all instances to consider
14209 @rtype: a two-tuple
14210 @return: a list of instances that were previously okay and result split as a
14211 consequence of this change, and a list of instances that were previously
14212 split and this change does not fix.
14215 changed_nodes = dict((node, group) for node, group in changes
14216 if node_data[node].group != group)
14218 all_split_instances = set()
14219 previously_split_instances = set()
14221 def InstanceNodes(instance):
14222 return [instance.primary_node] + list(instance.secondary_nodes)
14224 for inst in instance_data.values():
14225 if inst.disk_template not in constants.DTS_INT_MIRROR:
14228 instance_nodes = InstanceNodes(inst)
14230 if len(set(node_data[node].group for node in instance_nodes)) > 1:
14231 previously_split_instances.add(inst.name)
14233 if len(set(changed_nodes.get(node, node_data[node].group)
14234 for node in instance_nodes)) > 1:
14235 all_split_instances.add(inst.name)
14237 return (list(all_split_instances - previously_split_instances),
14238 list(previously_split_instances & all_split_instances))
14241 class _GroupQuery(_QueryBase):
14242 FIELDS = query.GROUP_FIELDS
14244 def ExpandNames(self, lu):
14245 lu.needed_locks = {}
14247 self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
14248 self._cluster = lu.cfg.GetClusterInfo()
14249 name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
14252 self.wanted = [name_to_uuid[name]
14253 for name in utils.NiceSort(name_to_uuid.keys())]
14255 # Accept names to be either names or UUIDs.
14258 all_uuid = frozenset(self._all_groups.keys())
14260 for name in self.names:
14261 if name in all_uuid:
14262 self.wanted.append(name)
14263 elif name in name_to_uuid:
14264 self.wanted.append(name_to_uuid[name])
14266 missing.append(name)
14269 raise errors.OpPrereqError("Some groups do not exist: %s" %
14270 utils.CommaJoin(missing),
14271 errors.ECODE_NOENT)
14273 def DeclareLocks(self, lu, level):
14276 def _GetQueryData(self, lu):
14277 """Computes the list of node groups and their attributes.
14280 do_nodes = query.GQ_NODE in self.requested_data
14281 do_instances = query.GQ_INST in self.requested_data
14283 group_to_nodes = None
14284 group_to_instances = None
14286 # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
14287 # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
14288 # latter GetAllInstancesInfo() is not enough, for we have to go through
14289 # instance->node. Hence, we will need to process nodes even if we only need
14290 # instance information.
14291 if do_nodes or do_instances:
14292 all_nodes = lu.cfg.GetAllNodesInfo()
14293 group_to_nodes = dict((uuid, []) for uuid in self.wanted)
14296 for node in all_nodes.values():
14297 if node.group in group_to_nodes:
14298 group_to_nodes[node.group].append(node.name)
14299 node_to_group[node.name] = node.group
14302 all_instances = lu.cfg.GetAllInstancesInfo()
14303 group_to_instances = dict((uuid, []) for uuid in self.wanted)
14305 for instance in all_instances.values():
14306 node = instance.primary_node
14307 if node in node_to_group:
14308 group_to_instances[node_to_group[node]].append(instance.name)
14311 # Do not pass on node information if it was not requested.
14312 group_to_nodes = None
14314 return query.GroupQueryData(self._cluster,
14315 [self._all_groups[uuid]
14316 for uuid in self.wanted],
14317 group_to_nodes, group_to_instances,
14318 query.GQ_DISKPARAMS in self.requested_data)
14321 class LUGroupQuery(NoHooksLU):
14322 """Logical unit for querying node groups.
14327 def CheckArguments(self):
14328 self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
14329 self.op.output_fields, False)
14331 def ExpandNames(self):
14332 self.gq.ExpandNames(self)
14334 def DeclareLocks(self, level):
14335 self.gq.DeclareLocks(self, level)
14337 def Exec(self, feedback_fn):
14338 return self.gq.OldStyleQuery(self)
14341 class LUGroupSetParams(LogicalUnit):
14342 """Modifies the parameters of a node group.
14345 HPATH = "group-modify"
14346 HTYPE = constants.HTYPE_GROUP
14349 def CheckArguments(self):
14352 self.op.diskparams,
14353 self.op.alloc_policy,
14355 self.op.disk_state,
14359 if all_changes.count(None) == len(all_changes):
14360 raise errors.OpPrereqError("Please pass at least one modification",
14361 errors.ECODE_INVAL)
14363 def ExpandNames(self):
14364 # This raises errors.OpPrereqError on its own:
14365 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14367 self.needed_locks = {
14368 locking.LEVEL_INSTANCE: [],
14369 locking.LEVEL_NODEGROUP: [self.group_uuid],
14372 self.share_locks[locking.LEVEL_INSTANCE] = 1
14374 def DeclareLocks(self, level):
14375 if level == locking.LEVEL_INSTANCE:
14376 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14378 # Lock instances optimistically, needs verification once group lock has
14380 self.needed_locks[locking.LEVEL_INSTANCE] = \
14381 self.cfg.GetNodeGroupInstances(self.group_uuid)
14384 def _UpdateAndVerifyDiskParams(old, new):
14385 """Updates and verifies disk parameters.
14388 new_params = _GetUpdatedParams(old, new)
14389 utils.ForceDictType(new_params, constants.DISK_DT_TYPES)
14392 def CheckPrereq(self):
14393 """Check prerequisites.
14396 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14398 # Check if locked instances are still correct
14399 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14401 self.group = self.cfg.GetNodeGroup(self.group_uuid)
14402 cluster = self.cfg.GetClusterInfo()
14404 if self.group is None:
14405 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14406 (self.op.group_name, self.group_uuid))
14408 if self.op.ndparams:
14409 new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
14410 utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
14411 self.new_ndparams = new_ndparams
14413 if self.op.diskparams:
14414 diskparams = self.group.diskparams
14415 uavdp = self._UpdateAndVerifyDiskParams
14416 # For each disktemplate subdict update and verify the values
14417 new_diskparams = dict((dt,
14418 uavdp(diskparams.get(dt, {}),
14419 self.op.diskparams[dt]))
14420 for dt in constants.DISK_TEMPLATES
14421 if dt in self.op.diskparams)
14422 # As we've all subdicts of diskparams ready, lets merge the actual
14423 # dict with all updated subdicts
14424 self.new_diskparams = objects.FillDict(diskparams, new_diskparams)
14426 utils.VerifyDictOptions(self.new_diskparams, constants.DISK_DT_DEFAULTS)
14427 except errors.OpPrereqError, err:
14428 raise errors.OpPrereqError("While verify diskparams options: %s" % err,
14429 errors.ECODE_INVAL)
14431 if self.op.hv_state:
14432 self.new_hv_state = _MergeAndVerifyHvState(self.op.hv_state,
14433 self.group.hv_state_static)
14435 if self.op.disk_state:
14436 self.new_disk_state = \
14437 _MergeAndVerifyDiskState(self.op.disk_state,
14438 self.group.disk_state_static)
14440 if self.op.ipolicy:
14441 self.new_ipolicy = _GetUpdatedIPolicy(self.group.ipolicy,
14445 new_ipolicy = cluster.SimpleFillIPolicy(self.new_ipolicy)
14446 inst_filter = lambda inst: inst.name in owned_instances
14447 instances = self.cfg.GetInstancesInfoByFilter(inst_filter).values()
14448 gmi = ganeti.masterd.instance
14450 _ComputeNewInstanceViolations(gmi.CalculateGroupIPolicy(cluster,
14452 new_ipolicy, instances)
14455 self.LogWarning("After the ipolicy change the following instances"
14456 " violate them: %s",
14457 utils.CommaJoin(violations))
14459 def BuildHooksEnv(self):
14460 """Build hooks env.
14464 "GROUP_NAME": self.op.group_name,
14465 "NEW_ALLOC_POLICY": self.op.alloc_policy,
14468 def BuildHooksNodes(self):
14469 """Build hooks nodes.
14472 mn = self.cfg.GetMasterNode()
14473 return ([mn], [mn])
14475 def Exec(self, feedback_fn):
14476 """Modifies the node group.
14481 if self.op.ndparams:
14482 self.group.ndparams = self.new_ndparams
14483 result.append(("ndparams", str(self.group.ndparams)))
14485 if self.op.diskparams:
14486 self.group.diskparams = self.new_diskparams
14487 result.append(("diskparams", str(self.group.diskparams)))
14489 if self.op.alloc_policy:
14490 self.group.alloc_policy = self.op.alloc_policy
14492 if self.op.hv_state:
14493 self.group.hv_state_static = self.new_hv_state
14495 if self.op.disk_state:
14496 self.group.disk_state_static = self.new_disk_state
14498 if self.op.ipolicy:
14499 self.group.ipolicy = self.new_ipolicy
14501 self.cfg.Update(self.group, feedback_fn)
14505 class LUGroupRemove(LogicalUnit):
14506 HPATH = "group-remove"
14507 HTYPE = constants.HTYPE_GROUP
14510 def ExpandNames(self):
14511 # This will raises errors.OpPrereqError on its own:
14512 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14513 self.needed_locks = {
14514 locking.LEVEL_NODEGROUP: [self.group_uuid],
14517 def CheckPrereq(self):
14518 """Check prerequisites.
14520 This checks that the given group name exists as a node group, that is
14521 empty (i.e., contains no nodes), and that is not the last group of the
14525 # Verify that the group is empty.
14526 group_nodes = [node.name
14527 for node in self.cfg.GetAllNodesInfo().values()
14528 if node.group == self.group_uuid]
14531 raise errors.OpPrereqError("Group '%s' not empty, has the following"
14533 (self.op.group_name,
14534 utils.CommaJoin(utils.NiceSort(group_nodes))),
14535 errors.ECODE_STATE)
14537 # Verify the cluster would not be left group-less.
14538 if len(self.cfg.GetNodeGroupList()) == 1:
14539 raise errors.OpPrereqError("Group '%s' is the only group, cannot be"
14540 " removed" % self.op.group_name,
14541 errors.ECODE_STATE)
14543 def BuildHooksEnv(self):
14544 """Build hooks env.
14548 "GROUP_NAME": self.op.group_name,
14551 def BuildHooksNodes(self):
14552 """Build hooks nodes.
14555 mn = self.cfg.GetMasterNode()
14556 return ([mn], [mn])
14558 def Exec(self, feedback_fn):
14559 """Remove the node group.
14563 self.cfg.RemoveNodeGroup(self.group_uuid)
14564 except errors.ConfigurationError:
14565 raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
14566 (self.op.group_name, self.group_uuid))
14568 self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
14571 class LUGroupRename(LogicalUnit):
14572 HPATH = "group-rename"
14573 HTYPE = constants.HTYPE_GROUP
14576 def ExpandNames(self):
14577 # This raises errors.OpPrereqError on its own:
14578 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14580 self.needed_locks = {
14581 locking.LEVEL_NODEGROUP: [self.group_uuid],
14584 def CheckPrereq(self):
14585 """Check prerequisites.
14587 Ensures requested new name is not yet used.
14591 new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
14592 except errors.OpPrereqError:
14595 raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
14596 " node group (UUID: %s)" %
14597 (self.op.new_name, new_name_uuid),
14598 errors.ECODE_EXISTS)
14600 def BuildHooksEnv(self):
14601 """Build hooks env.
14605 "OLD_NAME": self.op.group_name,
14606 "NEW_NAME": self.op.new_name,
14609 def BuildHooksNodes(self):
14610 """Build hooks nodes.
14613 mn = self.cfg.GetMasterNode()
14615 all_nodes = self.cfg.GetAllNodesInfo()
14616 all_nodes.pop(mn, None)
14619 run_nodes.extend(node.name for node in all_nodes.values()
14620 if node.group == self.group_uuid)
14622 return (run_nodes, run_nodes)
14624 def Exec(self, feedback_fn):
14625 """Rename the node group.
14628 group = self.cfg.GetNodeGroup(self.group_uuid)
14631 raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
14632 (self.op.group_name, self.group_uuid))
14634 group.name = self.op.new_name
14635 self.cfg.Update(group, feedback_fn)
14637 return self.op.new_name
14640 class LUGroupEvacuate(LogicalUnit):
14641 HPATH = "group-evacuate"
14642 HTYPE = constants.HTYPE_GROUP
14645 def ExpandNames(self):
14646 # This raises errors.OpPrereqError on its own:
14647 self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
14649 if self.op.target_groups:
14650 self.req_target_uuids = map(self.cfg.LookupNodeGroup,
14651 self.op.target_groups)
14653 self.req_target_uuids = []
14655 if self.group_uuid in self.req_target_uuids:
14656 raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
14657 " as a target group (targets are %s)" %
14659 utils.CommaJoin(self.req_target_uuids)),
14660 errors.ECODE_INVAL)
14662 self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
14664 self.share_locks = _ShareAll()
14665 self.needed_locks = {
14666 locking.LEVEL_INSTANCE: [],
14667 locking.LEVEL_NODEGROUP: [],
14668 locking.LEVEL_NODE: [],
14671 def DeclareLocks(self, level):
14672 if level == locking.LEVEL_INSTANCE:
14673 assert not self.needed_locks[locking.LEVEL_INSTANCE]
14675 # Lock instances optimistically, needs verification once node and group
14676 # locks have been acquired
14677 self.needed_locks[locking.LEVEL_INSTANCE] = \
14678 self.cfg.GetNodeGroupInstances(self.group_uuid)
14680 elif level == locking.LEVEL_NODEGROUP:
14681 assert not self.needed_locks[locking.LEVEL_NODEGROUP]
14683 if self.req_target_uuids:
14684 lock_groups = set([self.group_uuid] + self.req_target_uuids)
14686 # Lock all groups used by instances optimistically; this requires going
14687 # via the node before it's locked, requiring verification later on
14688 lock_groups.update(group_uuid
14689 for instance_name in
14690 self.owned_locks(locking.LEVEL_INSTANCE)
14692 self.cfg.GetInstanceNodeGroups(instance_name))
14694 # No target groups, need to lock all of them
14695 lock_groups = locking.ALL_SET
14697 self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
14699 elif level == locking.LEVEL_NODE:
14700 # This will only lock the nodes in the group to be evacuated which
14701 # contain actual instances
14702 self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
14703 self._LockInstancesNodes()
14705 # Lock all nodes in group to be evacuated and target groups
14706 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14707 assert self.group_uuid in owned_groups
14708 member_nodes = [node_name
14709 for group in owned_groups
14710 for node_name in self.cfg.GetNodeGroup(group).members]
14711 self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
14713 def CheckPrereq(self):
14714 owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
14715 owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
14716 owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
14718 assert owned_groups.issuperset(self.req_target_uuids)
14719 assert self.group_uuid in owned_groups
14721 # Check if locked instances are still correct
14722 _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
14724 # Get instance information
14725 self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
14727 # Check if node groups for locked instances are still correct
14728 _CheckInstancesNodeGroups(self.cfg, self.instances,
14729 owned_groups, owned_nodes, self.group_uuid)
14731 if self.req_target_uuids:
14732 # User requested specific target groups
14733 self.target_uuids = self.req_target_uuids
14735 # All groups except the one to be evacuated are potential targets
14736 self.target_uuids = [group_uuid for group_uuid in owned_groups
14737 if group_uuid != self.group_uuid]
14739 if not self.target_uuids:
14740 raise errors.OpPrereqError("There are no possible target groups",
14741 errors.ECODE_INVAL)
14743 def BuildHooksEnv(self):
14744 """Build hooks env.
14748 "GROUP_NAME": self.op.group_name,
14749 "TARGET_GROUPS": " ".join(self.target_uuids),
14752 def BuildHooksNodes(self):
14753 """Build hooks nodes.
14756 mn = self.cfg.GetMasterNode()
14758 assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
14760 run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
14762 return (run_nodes, run_nodes)
14764 def Exec(self, feedback_fn):
14765 instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
14767 assert self.group_uuid not in self.target_uuids
14769 req = iallocator.IAReqGroupChange(instances=instances,
14770 target_groups=self.target_uuids)
14771 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
14773 ial.Run(self.op.iallocator)
14775 if not ial.success:
14776 raise errors.OpPrereqError("Can't compute group evacuation using"
14777 " iallocator '%s': %s" %
14778 (self.op.iallocator, ial.info),
14779 errors.ECODE_NORES)
14781 jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
14783 self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
14784 len(jobs), self.op.group_name)
14786 return ResultWithJobs(jobs)
14789 class TagsLU(NoHooksLU): # pylint: disable=W0223
14790 """Generic tags LU.
14792 This is an abstract class which is the parent of all the other tags LUs.
14795 def ExpandNames(self):
14796 self.group_uuid = None
14797 self.needed_locks = {}
14799 if self.op.kind == constants.TAG_NODE:
14800 self.op.name = _ExpandNodeName(self.cfg, self.op.name)
14801 lock_level = locking.LEVEL_NODE
14802 lock_name = self.op.name
14803 elif self.op.kind == constants.TAG_INSTANCE:
14804 self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
14805 lock_level = locking.LEVEL_INSTANCE
14806 lock_name = self.op.name
14807 elif self.op.kind == constants.TAG_NODEGROUP:
14808 self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
14809 lock_level = locking.LEVEL_NODEGROUP
14810 lock_name = self.group_uuid
14815 if lock_level and getattr(self.op, "use_locking", True):
14816 self.needed_locks[lock_level] = lock_name
14818 # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
14819 # not possible to acquire the BGL based on opcode parameters)
14821 def CheckPrereq(self):
14822 """Check prerequisites.
14825 if self.op.kind == constants.TAG_CLUSTER:
14826 self.target = self.cfg.GetClusterInfo()
14827 elif self.op.kind == constants.TAG_NODE:
14828 self.target = self.cfg.GetNodeInfo(self.op.name)
14829 elif self.op.kind == constants.TAG_INSTANCE:
14830 self.target = self.cfg.GetInstanceInfo(self.op.name)
14831 elif self.op.kind == constants.TAG_NODEGROUP:
14832 self.target = self.cfg.GetNodeGroup(self.group_uuid)
14834 raise errors.OpPrereqError("Wrong tag type requested (%s)" %
14835 str(self.op.kind), errors.ECODE_INVAL)
14838 class LUTagsGet(TagsLU):
14839 """Returns the tags of a given object.
14844 def ExpandNames(self):
14845 TagsLU.ExpandNames(self)
14847 # Share locks as this is only a read operation
14848 self.share_locks = _ShareAll()
14850 def Exec(self, feedback_fn):
14851 """Returns the tag list.
14854 return list(self.target.GetTags())
14857 class LUTagsSearch(NoHooksLU):
14858 """Searches the tags for a given pattern.
14863 def ExpandNames(self):
14864 self.needed_locks = {}
14866 def CheckPrereq(self):
14867 """Check prerequisites.
14869 This checks the pattern passed for validity by compiling it.
14873 self.re = re.compile(self.op.pattern)
14874 except re.error, err:
14875 raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
14876 (self.op.pattern, err), errors.ECODE_INVAL)
14878 def Exec(self, feedback_fn):
14879 """Returns the tag list.
14883 tgts = [("/cluster", cfg.GetClusterInfo())]
14884 ilist = cfg.GetAllInstancesInfo().values()
14885 tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
14886 nlist = cfg.GetAllNodesInfo().values()
14887 tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
14888 tgts.extend(("/nodegroup/%s" % n.name, n)
14889 for n in cfg.GetAllNodeGroupsInfo().values())
14891 for path, target in tgts:
14892 for tag in target.GetTags():
14893 if self.re.search(tag):
14894 results.append((path, tag))
14898 class LUTagsSet(TagsLU):
14899 """Sets a tag on a given object.
14904 def CheckPrereq(self):
14905 """Check prerequisites.
14907 This checks the type and length of the tag name and value.
14910 TagsLU.CheckPrereq(self)
14911 for tag in self.op.tags:
14912 objects.TaggableObject.ValidateTag(tag)
14914 def Exec(self, feedback_fn):
14919 for tag in self.op.tags:
14920 self.target.AddTag(tag)
14921 except errors.TagError, err:
14922 raise errors.OpExecError("Error while setting tag: %s" % str(err))
14923 self.cfg.Update(self.target, feedback_fn)
14926 class LUTagsDel(TagsLU):
14927 """Delete a list of tags from a given object.
14932 def CheckPrereq(self):
14933 """Check prerequisites.
14935 This checks that we have the given tag.
14938 TagsLU.CheckPrereq(self)
14939 for tag in self.op.tags:
14940 objects.TaggableObject.ValidateTag(tag)
14941 del_tags = frozenset(self.op.tags)
14942 cur_tags = self.target.GetTags()
14944 diff_tags = del_tags - cur_tags
14946 diff_names = ("'%s'" % i for i in sorted(diff_tags))
14947 raise errors.OpPrereqError("Tag(s) %s not found" %
14948 (utils.CommaJoin(diff_names), ),
14949 errors.ECODE_NOENT)
14951 def Exec(self, feedback_fn):
14952 """Remove the tag from the object.
14955 for tag in self.op.tags:
14956 self.target.RemoveTag(tag)
14957 self.cfg.Update(self.target, feedback_fn)
14960 class LUTestDelay(NoHooksLU):
14961 """Sleep for a specified amount of time.
14963 This LU sleeps on the master and/or nodes for a specified amount of
14969 def ExpandNames(self):
14970 """Expand names and set required locks.
14972 This expands the node list, if any.
14975 self.needed_locks = {}
14976 if self.op.on_nodes:
14977 # _GetWantedNodes can be used here, but is not always appropriate to use
14978 # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
14979 # more information.
14980 self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
14981 self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
14983 def _TestDelay(self):
14984 """Do the actual sleep.
14987 if self.op.on_master:
14988 if not utils.TestDelay(self.op.duration):
14989 raise errors.OpExecError("Error during master delay test")
14990 if self.op.on_nodes:
14991 result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
14992 for node, node_result in result.items():
14993 node_result.Raise("Failure during rpc call to node %s" % node)
14995 def Exec(self, feedback_fn):
14996 """Execute the test delay opcode, with the wanted repetitions.
14999 if self.op.repeat == 0:
15002 top_value = self.op.repeat - 1
15003 for i in range(self.op.repeat):
15004 self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
15008 class LUTestJqueue(NoHooksLU):
15009 """Utility LU to test some aspects of the job queue.
15014 # Must be lower than default timeout for WaitForJobChange to see whether it
15015 # notices changed jobs
15016 _CLIENT_CONNECT_TIMEOUT = 20.0
15017 _CLIENT_CONFIRM_TIMEOUT = 60.0
15020 def _NotifyUsingSocket(cls, cb, errcls):
15021 """Opens a Unix socket and waits for another program to connect.
15024 @param cb: Callback to send socket name to client
15025 @type errcls: class
15026 @param errcls: Exception class to use for errors
15029 # Using a temporary directory as there's no easy way to create temporary
15030 # sockets without writing a custom loop around tempfile.mktemp and
15032 tmpdir = tempfile.mkdtemp()
15034 tmpsock = utils.PathJoin(tmpdir, "sock")
15036 logging.debug("Creating temporary socket at %s", tmpsock)
15037 sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
15042 # Send details to client
15045 # Wait for client to connect before continuing
15046 sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
15048 (conn, _) = sock.accept()
15049 except socket.error, err:
15050 raise errcls("Client didn't connect in time (%s)" % err)
15054 # Remove as soon as client is connected
15055 shutil.rmtree(tmpdir)
15057 # Wait for client to close
15060 # pylint: disable=E1101
15061 # Instance of '_socketobject' has no ... member
15062 conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
15064 except socket.error, err:
15065 raise errcls("Client failed to confirm notification (%s)" % err)
15069 def _SendNotification(self, test, arg, sockname):
15070 """Sends a notification to the client.
15073 @param test: Test name
15074 @param arg: Test argument (depends on test)
15075 @type sockname: string
15076 @param sockname: Socket path
15079 self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
15081 def _Notify(self, prereq, test, arg):
15082 """Notifies the client of a test.
15085 @param prereq: Whether this is a prereq-phase test
15087 @param test: Test name
15088 @param arg: Test argument (depends on test)
15092 errcls = errors.OpPrereqError
15094 errcls = errors.OpExecError
15096 return self._NotifyUsingSocket(compat.partial(self._SendNotification,
15100 def CheckArguments(self):
15101 self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
15102 self.expandnames_calls = 0
15104 def ExpandNames(self):
15105 checkargs_calls = getattr(self, "checkargs_calls", 0)
15106 if checkargs_calls < 1:
15107 raise errors.ProgrammerError("CheckArguments was not called")
15109 self.expandnames_calls += 1
15111 if self.op.notify_waitlock:
15112 self._Notify(True, constants.JQT_EXPANDNAMES, None)
15114 self.LogInfo("Expanding names")
15116 # Get lock on master node (just to get a lock, not for a particular reason)
15117 self.needed_locks = {
15118 locking.LEVEL_NODE: self.cfg.GetMasterNode(),
15121 def Exec(self, feedback_fn):
15122 if self.expandnames_calls < 1:
15123 raise errors.ProgrammerError("ExpandNames was not called")
15125 if self.op.notify_exec:
15126 self._Notify(False, constants.JQT_EXEC, None)
15128 self.LogInfo("Executing")
15130 if self.op.log_messages:
15131 self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
15132 for idx, msg in enumerate(self.op.log_messages):
15133 self.LogInfo("Sending log message %s", idx + 1)
15134 feedback_fn(constants.JQT_MSGPREFIX + msg)
15135 # Report how many test messages have been sent
15136 self._Notify(False, constants.JQT_LOGMSG, idx + 1)
15139 raise errors.OpExecError("Opcode failure was requested")
15144 class LUTestAllocator(NoHooksLU):
15145 """Run allocator tests.
15147 This LU runs the allocator tests
15150 def CheckPrereq(self):
15151 """Check prerequisites.
15153 This checks the opcode parameters depending on the director and mode test.
15156 if self.op.mode in (constants.IALLOCATOR_MODE_ALLOC,
15157 constants.IALLOCATOR_MODE_MULTI_ALLOC):
15158 for attr in ["memory", "disks", "disk_template",
15159 "os", "tags", "nics", "vcpus"]:
15160 if not hasattr(self.op, attr):
15161 raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
15162 attr, errors.ECODE_INVAL)
15163 iname = self.cfg.ExpandInstanceName(self.op.name)
15164 if iname is not None:
15165 raise errors.OpPrereqError("Instance '%s' already in the cluster" %
15166 iname, errors.ECODE_EXISTS)
15167 if not isinstance(self.op.nics, list):
15168 raise errors.OpPrereqError("Invalid parameter 'nics'",
15169 errors.ECODE_INVAL)
15170 if not isinstance(self.op.disks, list):
15171 raise errors.OpPrereqError("Invalid parameter 'disks'",
15172 errors.ECODE_INVAL)
15173 for row in self.op.disks:
15174 if (not isinstance(row, dict) or
15175 constants.IDISK_SIZE not in row or
15176 not isinstance(row[constants.IDISK_SIZE], int) or
15177 constants.IDISK_MODE not in row or
15178 row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
15179 raise errors.OpPrereqError("Invalid contents of the 'disks'"
15180 " parameter", errors.ECODE_INVAL)
15181 if self.op.hypervisor is None:
15182 self.op.hypervisor = self.cfg.GetHypervisorType()
15183 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15184 fname = _ExpandInstanceName(self.cfg, self.op.name)
15185 self.op.name = fname
15186 self.relocate_from = \
15187 list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
15188 elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
15189 constants.IALLOCATOR_MODE_NODE_EVAC):
15190 if not self.op.instances:
15191 raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
15192 self.op.instances = _GetWantedInstances(self, self.op.instances)
15194 raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
15195 self.op.mode, errors.ECODE_INVAL)
15197 if self.op.direction == constants.IALLOCATOR_DIR_OUT:
15198 if self.op.allocator is None:
15199 raise errors.OpPrereqError("Missing allocator name",
15200 errors.ECODE_INVAL)
15201 elif self.op.direction != constants.IALLOCATOR_DIR_IN:
15202 raise errors.OpPrereqError("Wrong allocator test '%s'" %
15203 self.op.direction, errors.ECODE_INVAL)
15205 def Exec(self, feedback_fn):
15206 """Run the allocator test.
15209 if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
15210 req = iallocator.IAReqInstanceAlloc(name=self.op.name,
15211 memory=self.op.memory,
15212 disks=self.op.disks,
15213 disk_template=self.op.disk_template,
15217 vcpus=self.op.vcpus,
15218 spindle_use=self.op.spindle_use,
15219 hypervisor=self.op.hypervisor)
15220 elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
15221 req = iallocator.IAReqRelocate(name=self.op.name,
15222 relocate_from=list(self.relocate_from))
15223 elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
15224 req = iallocator.IAReqGroupChange(instances=self.op.instances,
15225 target_groups=self.op.target_groups)
15226 elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
15227 req = iallocator.IAReqNodeEvac(instances=self.op.instances,
15228 evac_mode=self.op.evac_mode)
15229 elif self.op.mode == constants.IALLOCATOR_MODE_MULTI_ALLOC:
15230 disk_template = self.op.disk_template
15231 insts = [iallocator.IAReqInstanceAlloc(name="%s%s" % (self.op.name, idx),
15232 memory=self.op.memory,
15233 disks=self.op.disks,
15234 disk_template=disk_template,
15238 vcpus=self.op.vcpus,
15239 spindle_use=self.op.spindle_use,
15240 hypervisor=self.op.hypervisor)
15241 for idx in range(self.op.count)]
15242 req = iallocator.IAReqMultiInstanceAlloc(instances=insts)
15244 raise errors.ProgrammerError("Uncatched mode %s in"
15245 " LUTestAllocator.Exec", self.op.mode)
15247 ial = iallocator.IAllocator(self.cfg, self.rpc, req)
15248 if self.op.direction == constants.IALLOCATOR_DIR_IN:
15249 result = ial.in_text
15251 ial.Run(self.op.allocator, validate=False)
15252 result = ial.out_text
15256 #: Query type implementations
15258 constants.QR_CLUSTER: _ClusterQuery,
15259 constants.QR_INSTANCE: _InstanceQuery,
15260 constants.QR_NODE: _NodeQuery,
15261 constants.QR_GROUP: _GroupQuery,
15262 constants.QR_OS: _OsQuery,
15263 constants.QR_EXPORT: _ExportQuery,
15266 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
15269 def _GetQueryImplementation(name):
15270 """Returns the implemtnation for a query type.
15272 @param name: Query type, must be one of L{constants.QR_VIA_OP}
15276 return _QUERY_IMPL[name]
15278 raise errors.OpPrereqError("Unknown query resource '%s'" % name,
15279 errors.ECODE_INVAL)